/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 600 - (show annotations)
Mon May 9 08:54:11 2011 UTC (4 years, 3 months ago) by ph10
File MIME type: text/plain
File size: 188836 byte(s)
Error occurred while calculating annotation data.
Fix backup bug for \R with greedy quantifier.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2011 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains pcre_exec(), the externally visible function that does
42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43 possible. There are also some static supporting functions. */
44
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48
49 #define NLBLOCK md /* Block containing newline information */
50 #define PSSTART start_subject /* Field containing processed string start */
51 #define PSEND end_subject /* Field containing processed string end */
52
53 #include "pcre_internal.h"
54
55 /* Undefine some potentially clashing cpp symbols */
56
57 #undef min
58 #undef max
59
60 /* Flag bits for the match() function */
61
62 #define match_condassert 0x01 /* Called to check a condition assertion */
63 #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64
65 /* Non-error returns from the match() function. Error returns are externally
66 defined PCRE_ERROR_xxx codes, which are all negative. */
67
68 #define MATCH_MATCH 1
69 #define MATCH_NOMATCH 0
70
71 /* Special internal returns from the match() function. Make them sufficiently
72 negative to avoid the external error codes. */
73
74 #define MATCH_ACCEPT (-999)
75 #define MATCH_COMMIT (-998)
76 #define MATCH_PRUNE (-997)
77 #define MATCH_SKIP (-996)
78 #define MATCH_SKIP_ARG (-995)
79 #define MATCH_THEN (-994)
80
81 /* This is a convenience macro for code that occurs many times. */
82
83 #define MRRETURN(ra) \
84 { \
85 md->mark = markptr; \
86 RRETURN(ra); \
87 }
88
89 /* Maximum number of ints of offset to save on the stack for recursive calls.
90 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91 because the offset vector is always a multiple of 3 long. */
92
93 #define REC_STACK_SAVE_MAX 30
94
95 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
96
97 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
98 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
99
100
101
102 #ifdef PCRE_DEBUG
103 /*************************************************
104 * Debugging function to print chars *
105 *************************************************/
106
107 /* Print a sequence of chars in printable format, stopping at the end of the
108 subject if the requested.
109
110 Arguments:
111 p points to characters
112 length number to print
113 is_subject TRUE if printing from within md->start_subject
114 md pointer to matching data block, if is_subject is TRUE
115
116 Returns: nothing
117 */
118
119 static void
120 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121 {
122 unsigned int c;
123 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124 while (length-- > 0)
125 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
126 }
127 #endif
128
129
130
131 /*************************************************
132 * Match a back-reference *
133 *************************************************/
134
135 /* Normally, if a back reference hasn't been set, the length that is passed is
136 negative, so the match always fails. However, in JavaScript compatibility mode,
137 the length passed is zero. Note that in caseless UTF-8 mode, the number of
138 subject bytes matched may be different to the number of reference bytes.
139
140 Arguments:
141 offset index into the offset vector
142 eptr pointer into the subject
143 length length of reference to be matched (number of bytes)
144 md points to match data block
145 ims the ims flags
146
147 Returns: < 0 if not matched, otherwise the number of subject bytes matched
148 */
149
150 static int
151 match_ref(int offset, register USPTR eptr, int length, match_data *md,
152 unsigned long int ims)
153 {
154 USPTR eptr_start = eptr;
155 register USPTR p = md->start_subject + md->offset_vector[offset];
156
157 #ifdef PCRE_DEBUG
158 if (eptr >= md->end_subject)
159 printf("matching subject <null>");
160 else
161 {
162 printf("matching subject ");
163 pchars(eptr, length, TRUE, md);
164 }
165 printf(" against backref ");
166 pchars(p, length, FALSE, md);
167 printf("\n");
168 #endif
169
170 /* Always fail if reference not set (and not JavaScript compatible). */
171
172 if (length < 0) return -1;
173
174 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175 properly if Unicode properties are supported. Otherwise, we can check only
176 ASCII characters. */
177
178 if ((ims & PCRE_CASELESS) != 0)
179 {
180 #ifdef SUPPORT_UTF8
181 #ifdef SUPPORT_UCP
182 if (md->utf8)
183 {
184 /* Match characters up to the end of the reference. NOTE: the number of
185 bytes matched may differ, because there are some characters whose upper and
186 lower case versions code as different numbers of bytes. For example, U+023A
187 (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188 a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189 the latter. It is important, therefore, to check the length along the
190 reference, not along the subject (earlier code did this wrong). */
191
192 USPTR endptr = p + length;
193 while (p < endptr)
194 {
195 int c, d;
196 if (eptr >= md->end_subject) return -1;
197 GETCHARINC(c, eptr);
198 GETCHARINC(d, p);
199 if (c != d && c != UCD_OTHERCASE(d)) return -1;
200 }
201 }
202 else
203 #endif
204 #endif
205
206 /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
207 is no UCP support. */
208 {
209 if (eptr + length > md->end_subject) return -1;
210 while (length-- > 0)
211 { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
212 }
213 }
214
215 /* In the caseful case, we can just compare the bytes, whether or not we
216 are in UTF-8 mode. */
217
218 else
219 {
220 if (eptr + length > md->end_subject) return -1;
221 while (length-- > 0) if (*p++ != *eptr++) return -1;
222 }
223
224 return eptr - eptr_start;
225 }
226
227
228
229 /***************************************************************************
230 ****************************************************************************
231 RECURSION IN THE match() FUNCTION
232
233 The match() function is highly recursive, though not every recursive call
234 increases the recursive depth. Nevertheless, some regular expressions can cause
235 it to recurse to a great depth. I was writing for Unix, so I just let it call
236 itself recursively. This uses the stack for saving everything that has to be
237 saved for a recursive call. On Unix, the stack can be large, and this works
238 fine.
239
240 It turns out that on some non-Unix-like systems there are problems with
241 programs that use a lot of stack. (This despite the fact that every last chip
242 has oodles of memory these days, and techniques for extending the stack have
243 been known for decades.) So....
244
245 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
246 calls by keeping local variables that need to be preserved in blocks of memory
247 obtained from malloc() instead instead of on the stack. Macros are used to
248 achieve this so that the actual code doesn't look very different to what it
249 always used to.
250
251 The original heap-recursive code used longjmp(). However, it seems that this
252 can be very slow on some operating systems. Following a suggestion from Stan
253 Switzer, the use of longjmp() has been abolished, at the cost of having to
254 provide a unique number for each call to RMATCH. There is no way of generating
255 a sequence of numbers at compile time in C. I have given them names, to make
256 them stand out more clearly.
257
258 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
259 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
260 tests. Furthermore, not using longjmp() means that local dynamic variables
261 don't have indeterminate values; this has meant that the frame size can be
262 reduced because the result can be "passed back" by straight setting of the
263 variable instead of being passed in the frame.
264 ****************************************************************************
265 ***************************************************************************/
266
267 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
268 below must be updated in sync. */
269
270 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
271 RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
272 RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
273 RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
274 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
275 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
276 RM61, RM62 };
277
278 /* These versions of the macros use the stack, as normal. There are debugging
279 versions and production versions. Note that the "rw" argument of RMATCH isn't
280 actually used in this definition. */
281
282 #ifndef NO_RECURSE
283 #define REGISTER register
284
285 #ifdef PCRE_DEBUG
286 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
287 { \
288 printf("match() called in line %d\n", __LINE__); \
289 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
290 printf("to line %d\n", __LINE__); \
291 }
292 #define RRETURN(ra) \
293 { \
294 printf("match() returned %d from line %d ", ra, __LINE__); \
295 return ra; \
296 }
297 #else
298 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
299 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
300 #define RRETURN(ra) return ra
301 #endif
302
303 #else
304
305
306 /* These versions of the macros manage a private stack on the heap. Note that
307 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
308 argument of match(), which never changes. */
309
310 #define REGISTER
311
312 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
313 {\
314 heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
315 if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
316 frame->Xwhere = rw; \
317 newframe->Xeptr = ra;\
318 newframe->Xecode = rb;\
319 newframe->Xmstart = mstart;\
320 newframe->Xmarkptr = markptr;\
321 newframe->Xoffset_top = rc;\
322 newframe->Xims = re;\
323 newframe->Xeptrb = rf;\
324 newframe->Xflags = rg;\
325 newframe->Xrdepth = frame->Xrdepth + 1;\
326 newframe->Xprevframe = frame;\
327 frame = newframe;\
328 DPRINTF(("restarting from line %d\n", __LINE__));\
329 goto HEAP_RECURSE;\
330 L_##rw:\
331 DPRINTF(("jumped back to line %d\n", __LINE__));\
332 }
333
334 #define RRETURN(ra)\
335 {\
336 heapframe *oldframe = frame;\
337 frame = oldframe->Xprevframe;\
338 (pcre_stack_free)(oldframe);\
339 if (frame != NULL)\
340 {\
341 rrc = ra;\
342 goto HEAP_RETURN;\
343 }\
344 return ra;\
345 }
346
347
348 /* Structure for remembering the local variables in a private frame */
349
350 typedef struct heapframe {
351 struct heapframe *Xprevframe;
352
353 /* Function arguments that may change */
354
355 USPTR Xeptr;
356 const uschar *Xecode;
357 USPTR Xmstart;
358 USPTR Xmarkptr;
359 int Xoffset_top;
360 long int Xims;
361 eptrblock *Xeptrb;
362 int Xflags;
363 unsigned int Xrdepth;
364
365 /* Function local variables */
366
367 USPTR Xcallpat;
368 #ifdef SUPPORT_UTF8
369 USPTR Xcharptr;
370 #endif
371 USPTR Xdata;
372 USPTR Xnext;
373 USPTR Xpp;
374 USPTR Xprev;
375 USPTR Xsaved_eptr;
376
377 recursion_info Xnew_recursive;
378
379 BOOL Xcur_is_word;
380 BOOL Xcondition;
381 BOOL Xprev_is_word;
382
383 unsigned long int Xoriginal_ims;
384
385 #ifdef SUPPORT_UCP
386 int Xprop_type;
387 int Xprop_value;
388 int Xprop_fail_result;
389 int Xprop_category;
390 int Xprop_chartype;
391 int Xprop_script;
392 int Xoclength;
393 uschar Xocchars[8];
394 #endif
395
396 int Xcodelink;
397 int Xctype;
398 unsigned int Xfc;
399 int Xfi;
400 int Xlength;
401 int Xmax;
402 int Xmin;
403 int Xnumber;
404 int Xoffset;
405 int Xop;
406 int Xsave_capture_last;
407 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
408 int Xstacksave[REC_STACK_SAVE_MAX];
409
410 eptrblock Xnewptrb;
411
412 /* Where to jump back to */
413
414 int Xwhere;
415
416 } heapframe;
417
418 #endif
419
420
421 /***************************************************************************
422 ***************************************************************************/
423
424
425
426 /*************************************************
427 * Match from current position *
428 *************************************************/
429
430 /* This function is called recursively in many circumstances. Whenever it
431 returns a negative (error) response, the outer incarnation must also return the
432 same response. */
433
434 /* These macros pack up tests that are used for partial matching, and which
435 appears several times in the code. We set the "hit end" flag if the pointer is
436 at the end of the subject and also past the start of the subject (i.e.
437 something has been matched). For hard partial matching, we then return
438 immediately. The second one is used when we already know we are past the end of
439 the subject. */
440
441 #define CHECK_PARTIAL()\
442 if (md->partial != 0 && eptr >= md->end_subject && \
443 eptr > md->start_used_ptr) \
444 { \
445 md->hitend = TRUE; \
446 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
447 }
448
449 #define SCHECK_PARTIAL()\
450 if (md->partial != 0 && eptr > md->start_used_ptr) \
451 { \
452 md->hitend = TRUE; \
453 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
454 }
455
456
457 /* Performance note: It might be tempting to extract commonly used fields from
458 the md structure (e.g. utf8, end_subject) into individual variables to improve
459 performance. Tests using gcc on a SPARC disproved this; in the first case, it
460 made performance worse.
461
462 Arguments:
463 eptr pointer to current character in subject
464 ecode pointer to current position in compiled code
465 mstart pointer to the current match start position (can be modified
466 by encountering \K)
467 markptr pointer to the most recent MARK name, or NULL
468 offset_top current top pointer
469 md pointer to "static" info for the match
470 ims current /i, /m, and /s options
471 eptrb pointer to chain of blocks containing eptr at start of
472 brackets - for testing for empty matches
473 flags can contain
474 match_condassert - this is an assertion condition
475 match_cbegroup - this is the start of an unlimited repeat
476 group that can match an empty string
477 rdepth the recursion depth
478
479 Returns: MATCH_MATCH if matched ) these values are >= 0
480 MATCH_NOMATCH if failed to match )
481 a negative MATCH_xxx value for PRUNE, SKIP, etc
482 a negative PCRE_ERROR_xxx value if aborted by an error condition
483 (e.g. stopped by repeated call or recursion limit)
484 */
485
486 static int
487 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
488 const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
489 eptrblock *eptrb, int flags, unsigned int rdepth)
490 {
491 /* These variables do not need to be preserved over recursion in this function,
492 so they can be ordinary variables in all cases. Mark some of them with
493 "register" because they are used a lot in loops. */
494
495 register int rrc; /* Returns from recursive calls */
496 register int i; /* Used for loops not involving calls to RMATCH() */
497 register unsigned int c; /* Character values not kept over RMATCH() calls */
498 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
499
500 BOOL minimize, possessive; /* Quantifier options */
501 int condcode;
502
503 /* When recursion is not being used, all "local" variables that have to be
504 preserved over calls to RMATCH() are part of a "frame" which is obtained from
505 heap storage. Set up the top-level frame here; others are obtained from the
506 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
507
508 #ifdef NO_RECURSE
509 heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
510 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
511 frame->Xprevframe = NULL; /* Marks the top level */
512
513 /* Copy in the original argument variables */
514
515 frame->Xeptr = eptr;
516 frame->Xecode = ecode;
517 frame->Xmstart = mstart;
518 frame->Xmarkptr = markptr;
519 frame->Xoffset_top = offset_top;
520 frame->Xims = ims;
521 frame->Xeptrb = eptrb;
522 frame->Xflags = flags;
523 frame->Xrdepth = rdepth;
524
525 /* This is where control jumps back to to effect "recursion" */
526
527 HEAP_RECURSE:
528
529 /* Macros make the argument variables come from the current frame */
530
531 #define eptr frame->Xeptr
532 #define ecode frame->Xecode
533 #define mstart frame->Xmstart
534 #define markptr frame->Xmarkptr
535 #define offset_top frame->Xoffset_top
536 #define ims frame->Xims
537 #define eptrb frame->Xeptrb
538 #define flags frame->Xflags
539 #define rdepth frame->Xrdepth
540
541 /* Ditto for the local variables */
542
543 #ifdef SUPPORT_UTF8
544 #define charptr frame->Xcharptr
545 #endif
546 #define callpat frame->Xcallpat
547 #define codelink frame->Xcodelink
548 #define data frame->Xdata
549 #define next frame->Xnext
550 #define pp frame->Xpp
551 #define prev frame->Xprev
552 #define saved_eptr frame->Xsaved_eptr
553
554 #define new_recursive frame->Xnew_recursive
555
556 #define cur_is_word frame->Xcur_is_word
557 #define condition frame->Xcondition
558 #define prev_is_word frame->Xprev_is_word
559
560 #define original_ims frame->Xoriginal_ims
561
562 #ifdef SUPPORT_UCP
563 #define prop_type frame->Xprop_type
564 #define prop_value frame->Xprop_value
565 #define prop_fail_result frame->Xprop_fail_result
566 #define prop_category frame->Xprop_category
567 #define prop_chartype frame->Xprop_chartype
568 #define prop_script frame->Xprop_script
569 #define oclength frame->Xoclength
570 #define occhars frame->Xocchars
571 #endif
572
573 #define ctype frame->Xctype
574 #define fc frame->Xfc
575 #define fi frame->Xfi
576 #define length frame->Xlength
577 #define max frame->Xmax
578 #define min frame->Xmin
579 #define number frame->Xnumber
580 #define offset frame->Xoffset
581 #define op frame->Xop
582 #define save_capture_last frame->Xsave_capture_last
583 #define save_offset1 frame->Xsave_offset1
584 #define save_offset2 frame->Xsave_offset2
585 #define save_offset3 frame->Xsave_offset3
586 #define stacksave frame->Xstacksave
587
588 #define newptrb frame->Xnewptrb
589
590 /* When recursion is being used, local variables are allocated on the stack and
591 get preserved during recursion in the normal way. In this environment, fi and
592 i, and fc and c, can be the same variables. */
593
594 #else /* NO_RECURSE not defined */
595 #define fi i
596 #define fc c
597
598
599 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
600 const uschar *charptr; /* in small blocks of the code. My normal */
601 #endif /* style of coding would have declared */
602 const uschar *callpat; /* them within each of those blocks. */
603 const uschar *data; /* However, in order to accommodate the */
604 const uschar *next; /* version of this code that uses an */
605 USPTR pp; /* external "stack" implemented on the */
606 const uschar *prev; /* heap, it is easier to declare them all */
607 USPTR saved_eptr; /* here, so the declarations can be cut */
608 /* out in a block. The only declarations */
609 recursion_info new_recursive; /* within blocks below are for variables */
610 /* that do not have to be preserved over */
611 BOOL cur_is_word; /* a recursive call to RMATCH(). */
612 BOOL condition;
613 BOOL prev_is_word;
614
615 unsigned long int original_ims;
616
617 #ifdef SUPPORT_UCP
618 int prop_type;
619 int prop_value;
620 int prop_fail_result;
621 int prop_category;
622 int prop_chartype;
623 int prop_script;
624 int oclength;
625 uschar occhars[8];
626 #endif
627
628 int codelink;
629 int ctype;
630 int length;
631 int max;
632 int min;
633 int number;
634 int offset;
635 int op;
636 int save_capture_last;
637 int save_offset1, save_offset2, save_offset3;
638 int stacksave[REC_STACK_SAVE_MAX];
639
640 eptrblock newptrb;
641 #endif /* NO_RECURSE */
642
643 /* These statements are here to stop the compiler complaining about unitialized
644 variables. */
645
646 #ifdef SUPPORT_UCP
647 prop_value = 0;
648 prop_fail_result = 0;
649 #endif
650
651
652 /* This label is used for tail recursion, which is used in a few cases even
653 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
654 used. Thanks to Ian Taylor for noticing this possibility and sending the
655 original patch. */
656
657 TAIL_RECURSE:
658
659 /* OK, now we can get on with the real code of the function. Recursive calls
660 are specified by the macro RMATCH and RRETURN is used to return. When
661 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
662 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
663 defined). However, RMATCH isn't like a function call because it's quite a
664 complicated macro. It has to be used in one particular way. This shouldn't,
665 however, impact performance when true recursion is being used. */
666
667 #ifdef SUPPORT_UTF8
668 utf8 = md->utf8; /* Local copy of the flag */
669 #else
670 utf8 = FALSE;
671 #endif
672
673 /* First check that we haven't called match() too many times, or that we
674 haven't exceeded the recursive call limit. */
675
676 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
677 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
678
679 original_ims = ims; /* Save for resetting on ')' */
680
681 /* At the start of a group with an unlimited repeat that may match an empty
682 string, the match_cbegroup flag is set. When this is the case, add the current
683 subject pointer to the chain of such remembered pointers, to be checked when we
684 hit the closing ket, in order to break infinite loops that match no characters.
685 When match() is called in other circumstances, don't add to the chain. The
686 match_cbegroup flag must NOT be used with tail recursion, because the memory
687 block that is used is on the stack, so a new one may be required for each
688 match(). */
689
690 if ((flags & match_cbegroup) != 0)
691 {
692 newptrb.epb_saved_eptr = eptr;
693 newptrb.epb_prev = eptrb;
694 eptrb = &newptrb;
695 }
696
697 /* Now start processing the opcodes. */
698
699 for (;;)
700 {
701 minimize = possessive = FALSE;
702 op = *ecode;
703
704 switch(op)
705 {
706 case OP_MARK:
707 markptr = ecode + 2;
708 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
709 ims, eptrb, flags, RM55);
710
711 /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
712 argument, and we must check whether that argument matches this MARK's
713 argument. It is passed back in md->start_match_ptr (an overloading of that
714 variable). If it does match, we reset that variable to the current subject
715 position and return MATCH_SKIP. Otherwise, pass back the return code
716 unaltered. */
717
718 if (rrc == MATCH_SKIP_ARG &&
719 strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
720 {
721 md->start_match_ptr = eptr;
722 RRETURN(MATCH_SKIP);
723 }
724
725 if (md->mark == NULL) md->mark = markptr;
726 RRETURN(rrc);
727
728 case OP_FAIL:
729 MRRETURN(MATCH_NOMATCH);
730
731 /* COMMIT overrides PRUNE, SKIP, and THEN */
732
733 case OP_COMMIT:
734 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
735 ims, eptrb, flags, RM52);
736 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
737 rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
738 rrc != MATCH_THEN)
739 RRETURN(rrc);
740 MRRETURN(MATCH_COMMIT);
741
742 /* PRUNE overrides THEN */
743
744 case OP_PRUNE:
745 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
746 ims, eptrb, flags, RM51);
747 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
748 MRRETURN(MATCH_PRUNE);
749
750 case OP_PRUNE_ARG:
751 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
752 ims, eptrb, flags, RM56);
753 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
754 md->mark = ecode + 2;
755 RRETURN(MATCH_PRUNE);
756
757 /* SKIP overrides PRUNE and THEN */
758
759 case OP_SKIP:
760 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
761 ims, eptrb, flags, RM53);
762 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
763 RRETURN(rrc);
764 md->start_match_ptr = eptr; /* Pass back current position */
765 MRRETURN(MATCH_SKIP);
766
767 case OP_SKIP_ARG:
768 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
769 ims, eptrb, flags, RM57);
770 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
771 RRETURN(rrc);
772
773 /* Pass back the current skip name by overloading md->start_match_ptr and
774 returning the special MATCH_SKIP_ARG return code. This will either be
775 caught by a matching MARK, or get to the top, where it is treated the same
776 as PRUNE. */
777
778 md->start_match_ptr = ecode + 2;
779 RRETURN(MATCH_SKIP_ARG);
780
781 /* For THEN (and THEN_ARG) we pass back the address of the bracket or
782 the alt that is at the start of the current branch. This makes it possible
783 to skip back past alternatives that precede the THEN within the current
784 branch. */
785
786 case OP_THEN:
787 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
788 ims, eptrb, flags, RM54);
789 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
790 md->start_match_ptr = ecode - GET(ecode, 1);
791 MRRETURN(MATCH_THEN);
792
793 case OP_THEN_ARG:
794 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
795 offset_top, md, ims, eptrb, flags, RM58);
796 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
797 md->start_match_ptr = ecode - GET(ecode, 1);
798 md->mark = ecode + LINK_SIZE + 2;
799 RRETURN(MATCH_THEN);
800
801 /* Handle a capturing bracket. If there is space in the offset vector, save
802 the current subject position in the working slot at the top of the vector.
803 We mustn't change the current values of the data slot, because they may be
804 set from a previous iteration of this group, and be referred to by a
805 reference inside the group.
806
807 If the bracket fails to match, we need to restore this value and also the
808 values of the final offsets, in case they were set by a previous iteration
809 of the same bracket.
810
811 If there isn't enough space in the offset vector, treat this as if it were
812 a non-capturing bracket. Don't worry about setting the flag for the error
813 case here; that is handled in the code for KET. */
814
815 case OP_CBRA:
816 case OP_SCBRA:
817 number = GET2(ecode, 1+LINK_SIZE);
818 offset = number << 1;
819
820 #ifdef PCRE_DEBUG
821 printf("start bracket %d\n", number);
822 printf("subject=");
823 pchars(eptr, 16, TRUE, md);
824 printf("\n");
825 #endif
826
827 if (offset < md->offset_max)
828 {
829 save_offset1 = md->offset_vector[offset];
830 save_offset2 = md->offset_vector[offset+1];
831 save_offset3 = md->offset_vector[md->offset_end - number];
832 save_capture_last = md->capture_last;
833
834 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
835 md->offset_vector[md->offset_end - number] =
836 (int)(eptr - md->start_subject);
837
838 flags = (op == OP_SCBRA)? match_cbegroup : 0;
839 do
840 {
841 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
842 ims, eptrb, flags, RM1);
843 if (rrc != MATCH_NOMATCH &&
844 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
845 RRETURN(rrc);
846 md->capture_last = save_capture_last;
847 ecode += GET(ecode, 1);
848 }
849 while (*ecode == OP_ALT);
850
851 DPRINTF(("bracket %d failed\n", number));
852
853 md->offset_vector[offset] = save_offset1;
854 md->offset_vector[offset+1] = save_offset2;
855 md->offset_vector[md->offset_end - number] = save_offset3;
856
857 if (rrc != MATCH_THEN) md->mark = markptr;
858 RRETURN(MATCH_NOMATCH);
859 }
860
861 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
862 as a non-capturing bracket. */
863
864 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
865 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
866
867 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
868
869 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
870 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
871
872 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
873 final alternative within the brackets, we would return the result of a
874 recursive call to match() whatever happened. We can reduce stack usage by
875 turning this into a tail recursion, except in the case when match_cbegroup
876 is set.*/
877
878 case OP_BRA:
879 case OP_SBRA:
880 DPRINTF(("start non-capturing bracket\n"));
881 flags = (op >= OP_SBRA)? match_cbegroup : 0;
882 for (;;)
883 {
884 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
885 {
886 if (flags == 0) /* Not a possibly empty group */
887 {
888 ecode += _pcre_OP_lengths[*ecode];
889 DPRINTF(("bracket 0 tail recursion\n"));
890 goto TAIL_RECURSE;
891 }
892
893 /* Possibly empty group; can't use tail recursion. */
894
895 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
896 eptrb, flags, RM48);
897 if (rrc == MATCH_NOMATCH) md->mark = markptr;
898 RRETURN(rrc);
899 }
900
901 /* For non-final alternatives, continue the loop for a NOMATCH result;
902 otherwise return. */
903
904 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
905 eptrb, flags, RM2);
906 if (rrc != MATCH_NOMATCH &&
907 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
908 RRETURN(rrc);
909 ecode += GET(ecode, 1);
910 }
911 /* Control never reaches here. */
912
913 /* Conditional group: compilation checked that there are no more than
914 two branches. If the condition is false, skipping the first branch takes us
915 past the end if there is only one branch, but that's OK because that is
916 exactly what going to the ket would do. As there is only one branch to be
917 obeyed, we can use tail recursion to avoid using another stack frame. */
918
919 case OP_COND:
920 case OP_SCOND:
921 codelink= GET(ecode, 1);
922
923 /* Because of the way auto-callout works during compile, a callout item is
924 inserted between OP_COND and an assertion condition. */
925
926 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
927 {
928 if (pcre_callout != NULL)
929 {
930 pcre_callout_block cb;
931 cb.version = 1; /* Version 1 of the callout block */
932 cb.callout_number = ecode[LINK_SIZE+2];
933 cb.offset_vector = md->offset_vector;
934 cb.subject = (PCRE_SPTR)md->start_subject;
935 cb.subject_length = (int)(md->end_subject - md->start_subject);
936 cb.start_match = (int)(mstart - md->start_subject);
937 cb.current_position = (int)(eptr - md->start_subject);
938 cb.pattern_position = GET(ecode, LINK_SIZE + 3);
939 cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
940 cb.capture_top = offset_top/2;
941 cb.capture_last = md->capture_last;
942 cb.callout_data = md->callout_data;
943 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
944 if (rrc < 0) RRETURN(rrc);
945 }
946 ecode += _pcre_OP_lengths[OP_CALLOUT];
947 }
948
949 condcode = ecode[LINK_SIZE+1];
950
951 /* Now see what the actual condition is */
952
953 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
954 {
955 if (md->recursive == NULL) /* Not recursing => FALSE */
956 {
957 condition = FALSE;
958 ecode += GET(ecode, 1);
959 }
960 else
961 {
962 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
963 condition = (recno == RREF_ANY || recno == md->recursive->group_num);
964
965 /* If the test is for recursion into a specific subpattern, and it is
966 false, but the test was set up by name, scan the table to see if the
967 name refers to any other numbers, and test them. The condition is true
968 if any one is set. */
969
970 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
971 {
972 uschar *slotA = md->name_table;
973 for (i = 0; i < md->name_count; i++)
974 {
975 if (GET2(slotA, 0) == recno) break;
976 slotA += md->name_entry_size;
977 }
978
979 /* Found a name for the number - there can be only one; duplicate
980 names for different numbers are allowed, but not vice versa. First
981 scan down for duplicates. */
982
983 if (i < md->name_count)
984 {
985 uschar *slotB = slotA;
986 while (slotB > md->name_table)
987 {
988 slotB -= md->name_entry_size;
989 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
990 {
991 condition = GET2(slotB, 0) == md->recursive->group_num;
992 if (condition) break;
993 }
994 else break;
995 }
996
997 /* Scan up for duplicates */
998
999 if (!condition)
1000 {
1001 slotB = slotA;
1002 for (i++; i < md->name_count; i++)
1003 {
1004 slotB += md->name_entry_size;
1005 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1006 {
1007 condition = GET2(slotB, 0) == md->recursive->group_num;
1008 if (condition) break;
1009 }
1010 else break;
1011 }
1012 }
1013 }
1014 }
1015
1016 /* Chose branch according to the condition */
1017
1018 ecode += condition? 3 : GET(ecode, 1);
1019 }
1020 }
1021
1022 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
1023 {
1024 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
1025 condition = offset < offset_top && md->offset_vector[offset] >= 0;
1026
1027 /* If the numbered capture is unset, but the reference was by name,
1028 scan the table to see if the name refers to any other numbers, and test
1029 them. The condition is true if any one is set. This is tediously similar
1030 to the code above, but not close enough to try to amalgamate. */
1031
1032 if (!condition && condcode == OP_NCREF)
1033 {
1034 int refno = offset >> 1;
1035 uschar *slotA = md->name_table;
1036
1037 for (i = 0; i < md->name_count; i++)
1038 {
1039 if (GET2(slotA, 0) == refno) break;
1040 slotA += md->name_entry_size;
1041 }
1042
1043 /* Found a name for the number - there can be only one; duplicate names
1044 for different numbers are allowed, but not vice versa. First scan down
1045 for duplicates. */
1046
1047 if (i < md->name_count)
1048 {
1049 uschar *slotB = slotA;
1050 while (slotB > md->name_table)
1051 {
1052 slotB -= md->name_entry_size;
1053 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1054 {
1055 offset = GET2(slotB, 0) << 1;
1056 condition = offset < offset_top &&
1057 md->offset_vector[offset] >= 0;
1058 if (condition) break;
1059 }
1060 else break;
1061 }
1062
1063 /* Scan up for duplicates */
1064
1065 if (!condition)
1066 {
1067 slotB = slotA;
1068 for (i++; i < md->name_count; i++)
1069 {
1070 slotB += md->name_entry_size;
1071 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1072 {
1073 offset = GET2(slotB, 0) << 1;
1074 condition = offset < offset_top &&
1075 md->offset_vector[offset] >= 0;
1076 if (condition) break;
1077 }
1078 else break;
1079 }
1080 }
1081 }
1082 }
1083
1084 /* Chose branch according to the condition */
1085
1086 ecode += condition? 3 : GET(ecode, 1);
1087 }
1088
1089 else if (condcode == OP_DEF) /* DEFINE - always false */
1090 {
1091 condition = FALSE;
1092 ecode += GET(ecode, 1);
1093 }
1094
1095 /* The condition is an assertion. Call match() to evaluate it - setting
1096 the final argument match_condassert causes it to stop at the end of an
1097 assertion. */
1098
1099 else
1100 {
1101 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1102 match_condassert, RM3);
1103 if (rrc == MATCH_MATCH)
1104 {
1105 condition = TRUE;
1106 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1107 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1108 }
1109 else if (rrc != MATCH_NOMATCH &&
1110 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1111 {
1112 RRETURN(rrc); /* Need braces because of following else */
1113 }
1114 else
1115 {
1116 condition = FALSE;
1117 ecode += codelink;
1118 }
1119 }
1120
1121 /* We are now at the branch that is to be obeyed. As there is only one,
1122 we can use tail recursion to avoid using another stack frame, except when
1123 match_cbegroup is required for an unlimited repeat of a possibly empty
1124 group. If the second alternative doesn't exist, we can just plough on. */
1125
1126 if (condition || *ecode == OP_ALT)
1127 {
1128 ecode += 1 + LINK_SIZE;
1129 if (op == OP_SCOND) /* Possibly empty group */
1130 {
1131 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1132 RRETURN(rrc);
1133 }
1134 else /* Group must match something */
1135 {
1136 flags = 0;
1137 goto TAIL_RECURSE;
1138 }
1139 }
1140 else /* Condition false & no alternative */
1141 {
1142 ecode += 1 + LINK_SIZE;
1143 }
1144 break;
1145
1146
1147 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1148 to close any currently open capturing brackets. */
1149
1150 case OP_CLOSE:
1151 number = GET2(ecode, 1);
1152 offset = number << 1;
1153
1154 #ifdef PCRE_DEBUG
1155 printf("end bracket %d at *ACCEPT", number);
1156 printf("\n");
1157 #endif
1158
1159 md->capture_last = number;
1160 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1161 {
1162 md->offset_vector[offset] =
1163 md->offset_vector[md->offset_end - number];
1164 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1165 if (offset_top <= offset) offset_top = offset + 2;
1166 }
1167 ecode += 3;
1168 break;
1169
1170
1171 /* End of the pattern, either real or forced. If we are in a top-level
1172 recursion, we should restore the offsets appropriately and continue from
1173 after the call. */
1174
1175 case OP_ACCEPT:
1176 case OP_END:
1177 if (md->recursive != NULL && md->recursive->group_num == 0)
1178 {
1179 recursion_info *rec = md->recursive;
1180 DPRINTF(("End of pattern in a (?0) recursion\n"));
1181 md->recursive = rec->prevrec;
1182 memmove(md->offset_vector, rec->offset_save,
1183 rec->saved_max * sizeof(int));
1184 offset_top = rec->save_offset_top;
1185 ims = original_ims;
1186 ecode = rec->after_call;
1187 break;
1188 }
1189
1190 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1191 set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1192 the subject. In both cases, backtracking will then try other alternatives,
1193 if any. */
1194
1195 if (eptr == mstart &&
1196 (md->notempty ||
1197 (md->notempty_atstart &&
1198 mstart == md->start_subject + md->start_offset)))
1199 MRRETURN(MATCH_NOMATCH);
1200
1201 /* Otherwise, we have a match. */
1202
1203 md->end_match_ptr = eptr; /* Record where we ended */
1204 md->end_offset_top = offset_top; /* and how many extracts were taken */
1205 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1206
1207 /* For some reason, the macros don't work properly if an expression is
1208 given as the argument to MRRETURN when the heap is in use. */
1209
1210 rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1211 MRRETURN(rrc);
1212
1213 /* Change option settings */
1214
1215 case OP_OPT:
1216 ims = ecode[1];
1217 ecode += 2;
1218 DPRINTF(("ims set to %02lx\n", ims));
1219 break;
1220
1221 /* Assertion brackets. Check the alternative branches in turn - the
1222 matching won't pass the KET for an assertion. If any one branch matches,
1223 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1224 start of each branch to move the current point backwards, so the code at
1225 this level is identical to the lookahead case. */
1226
1227 case OP_ASSERT:
1228 case OP_ASSERTBACK:
1229 do
1230 {
1231 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1232 RM4);
1233 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1234 {
1235 mstart = md->start_match_ptr; /* In case \K reset it */
1236 break;
1237 }
1238 if (rrc != MATCH_NOMATCH &&
1239 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1240 RRETURN(rrc);
1241 ecode += GET(ecode, 1);
1242 }
1243 while (*ecode == OP_ALT);
1244 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1245
1246 /* If checking an assertion for a condition, return MATCH_MATCH. */
1247
1248 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1249
1250 /* Continue from after the assertion, updating the offsets high water
1251 mark, since extracts may have been taken during the assertion. */
1252
1253 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1254 ecode += 1 + LINK_SIZE;
1255 offset_top = md->end_offset_top;
1256 continue;
1257
1258 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1259 PRUNE, or COMMIT means we must assume failure without checking subsequent
1260 branches. */
1261
1262 case OP_ASSERT_NOT:
1263 case OP_ASSERTBACK_NOT:
1264 do
1265 {
1266 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1267 RM5);
1268 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1269 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1270 {
1271 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1272 break;
1273 }
1274 if (rrc != MATCH_NOMATCH &&
1275 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1276 RRETURN(rrc);
1277 ecode += GET(ecode,1);
1278 }
1279 while (*ecode == OP_ALT);
1280
1281 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1282
1283 ecode += 1 + LINK_SIZE;
1284 continue;
1285
1286 /* Move the subject pointer back. This occurs only at the start of
1287 each branch of a lookbehind assertion. If we are too close to the start to
1288 move back, this match function fails. When working with UTF-8 we move
1289 back a number of characters, not bytes. */
1290
1291 case OP_REVERSE:
1292 #ifdef SUPPORT_UTF8
1293 if (utf8)
1294 {
1295 i = GET(ecode, 1);
1296 while (i-- > 0)
1297 {
1298 eptr--;
1299 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1300 BACKCHAR(eptr);
1301 }
1302 }
1303 else
1304 #endif
1305
1306 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1307
1308 {
1309 eptr -= GET(ecode, 1);
1310 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1311 }
1312
1313 /* Save the earliest consulted character, then skip to next op code */
1314
1315 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1316 ecode += 1 + LINK_SIZE;
1317 break;
1318
1319 /* The callout item calls an external function, if one is provided, passing
1320 details of the match so far. This is mainly for debugging, though the
1321 function is able to force a failure. */
1322
1323 case OP_CALLOUT:
1324 if (pcre_callout != NULL)
1325 {
1326 pcre_callout_block cb;
1327 cb.version = 1; /* Version 1 of the callout block */
1328 cb.callout_number = ecode[1];
1329 cb.offset_vector = md->offset_vector;
1330 cb.subject = (PCRE_SPTR)md->start_subject;
1331 cb.subject_length = (int)(md->end_subject - md->start_subject);
1332 cb.start_match = (int)(mstart - md->start_subject);
1333 cb.current_position = (int)(eptr - md->start_subject);
1334 cb.pattern_position = GET(ecode, 2);
1335 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1336 cb.capture_top = offset_top/2;
1337 cb.capture_last = md->capture_last;
1338 cb.callout_data = md->callout_data;
1339 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1340 if (rrc < 0) RRETURN(rrc);
1341 }
1342 ecode += 2 + 2*LINK_SIZE;
1343 break;
1344
1345 /* Recursion either matches the current regex, or some subexpression. The
1346 offset data is the offset to the starting bracket from the start of the
1347 whole pattern. (This is so that it works from duplicated subpatterns.)
1348
1349 If there are any capturing brackets started but not finished, we have to
1350 save their starting points and reinstate them after the recursion. However,
1351 we don't know how many such there are (offset_top records the completed
1352 total) so we just have to save all the potential data. There may be up to
1353 65535 such values, which is too large to put on the stack, but using malloc
1354 for small numbers seems expensive. As a compromise, the stack is used when
1355 there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1356 is used. A problem is what to do if the malloc fails ... there is no way of
1357 returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1358 values on the stack, and accept that the rest may be wrong.
1359
1360 There are also other values that have to be saved. We use a chained
1361 sequence of blocks that actually live on the stack. Thanks to Robin Houston
1362 for the original version of this logic. */
1363
1364 case OP_RECURSE:
1365 {
1366 callpat = md->start_code + GET(ecode, 1);
1367 new_recursive.group_num = (callpat == md->start_code)? 0 :
1368 GET2(callpat, 1 + LINK_SIZE);
1369
1370 /* Add to "recursing stack" */
1371
1372 new_recursive.prevrec = md->recursive;
1373 md->recursive = &new_recursive;
1374
1375 /* Find where to continue from afterwards */
1376
1377 ecode += 1 + LINK_SIZE;
1378 new_recursive.after_call = ecode;
1379
1380 /* Now save the offset data. */
1381
1382 new_recursive.saved_max = md->offset_end;
1383 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1384 new_recursive.offset_save = stacksave;
1385 else
1386 {
1387 new_recursive.offset_save =
1388 (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1389 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1390 }
1391
1392 memcpy(new_recursive.offset_save, md->offset_vector,
1393 new_recursive.saved_max * sizeof(int));
1394 new_recursive.save_offset_top = offset_top;
1395
1396 /* OK, now we can do the recursion. For each top-level alternative we
1397 restore the offset and recursion data. */
1398
1399 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1400 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1401 do
1402 {
1403 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1404 md, ims, eptrb, flags, RM6);
1405 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1406 {
1407 DPRINTF(("Recursion matched\n"));
1408 md->recursive = new_recursive.prevrec;
1409 if (new_recursive.offset_save != stacksave)
1410 (pcre_free)(new_recursive.offset_save);
1411 MRRETURN(MATCH_MATCH);
1412 }
1413 else if (rrc != MATCH_NOMATCH &&
1414 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1415 {
1416 DPRINTF(("Recursion gave error %d\n", rrc));
1417 if (new_recursive.offset_save != stacksave)
1418 (pcre_free)(new_recursive.offset_save);
1419 RRETURN(rrc);
1420 }
1421
1422 md->recursive = &new_recursive;
1423 memcpy(md->offset_vector, new_recursive.offset_save,
1424 new_recursive.saved_max * sizeof(int));
1425 callpat += GET(callpat, 1);
1426 }
1427 while (*callpat == OP_ALT);
1428
1429 DPRINTF(("Recursion didn't match\n"));
1430 md->recursive = new_recursive.prevrec;
1431 if (new_recursive.offset_save != stacksave)
1432 (pcre_free)(new_recursive.offset_save);
1433 MRRETURN(MATCH_NOMATCH);
1434 }
1435 /* Control never reaches here */
1436
1437 /* "Once" brackets are like assertion brackets except that after a match,
1438 the point in the subject string is not moved back. Thus there can never be
1439 a move back into the brackets. Friedl calls these "atomic" subpatterns.
1440 Check the alternative branches in turn - the matching won't pass the KET
1441 for this kind of subpattern. If any one branch matches, we carry on as at
1442 the end of a normal bracket, leaving the subject pointer, but resetting
1443 the start-of-match value in case it was changed by \K. */
1444
1445 case OP_ONCE:
1446 prev = ecode;
1447 saved_eptr = eptr;
1448
1449 do
1450 {
1451 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1452 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1453 {
1454 mstart = md->start_match_ptr;
1455 break;
1456 }
1457 if (rrc != MATCH_NOMATCH &&
1458 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1459 RRETURN(rrc);
1460 ecode += GET(ecode,1);
1461 }
1462 while (*ecode == OP_ALT);
1463
1464 /* If hit the end of the group (which could be repeated), fail */
1465
1466 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1467
1468 /* Continue as from after the assertion, updating the offsets high water
1469 mark, since extracts may have been taken. */
1470
1471 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1472
1473 offset_top = md->end_offset_top;
1474 eptr = md->end_match_ptr;
1475
1476 /* For a non-repeating ket, just continue at this level. This also
1477 happens for a repeating ket if no characters were matched in the group.
1478 This is the forcible breaking of infinite loops as implemented in Perl
1479 5.005. If there is an options reset, it will get obeyed in the normal
1480 course of events. */
1481
1482 if (*ecode == OP_KET || eptr == saved_eptr)
1483 {
1484 ecode += 1+LINK_SIZE;
1485 break;
1486 }
1487
1488 /* The repeating kets try the rest of the pattern or restart from the
1489 preceding bracket, in the appropriate order. The second "call" of match()
1490 uses tail recursion, to avoid using another stack frame. We need to reset
1491 any options that changed within the bracket before re-running it, so
1492 check the next opcode. */
1493
1494 if (ecode[1+LINK_SIZE] == OP_OPT)
1495 {
1496 ims = (ims & ~PCRE_IMS) | ecode[4];
1497 DPRINTF(("ims set to %02lx at group repeat\n", ims));
1498 }
1499
1500 if (*ecode == OP_KETRMIN)
1501 {
1502 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1503 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1504 ecode = prev;
1505 flags = 0;
1506 goto TAIL_RECURSE;
1507 }
1508 else /* OP_KETRMAX */
1509 {
1510 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1511 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1512 ecode += 1 + LINK_SIZE;
1513 flags = 0;
1514 goto TAIL_RECURSE;
1515 }
1516 /* Control never gets here */
1517
1518 /* An alternation is the end of a branch; scan along to find the end of the
1519 bracketed group and go to there. */
1520
1521 case OP_ALT:
1522 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1523 break;
1524
1525 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1526 indicating that it may occur zero times. It may repeat infinitely, or not
1527 at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1528 with fixed upper repeat limits are compiled as a number of copies, with the
1529 optional ones preceded by BRAZERO or BRAMINZERO. */
1530
1531 case OP_BRAZERO:
1532 {
1533 next = ecode+1;
1534 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1535 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1536 do next += GET(next,1); while (*next == OP_ALT);
1537 ecode = next + 1 + LINK_SIZE;
1538 }
1539 break;
1540
1541 case OP_BRAMINZERO:
1542 {
1543 next = ecode+1;
1544 do next += GET(next, 1); while (*next == OP_ALT);
1545 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1546 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1547 ecode++;
1548 }
1549 break;
1550
1551 case OP_SKIPZERO:
1552 {
1553 next = ecode+1;
1554 do next += GET(next,1); while (*next == OP_ALT);
1555 ecode = next + 1 + LINK_SIZE;
1556 }
1557 break;
1558
1559 /* End of a group, repeated or non-repeating. */
1560
1561 case OP_KET:
1562 case OP_KETRMIN:
1563 case OP_KETRMAX:
1564 prev = ecode - GET(ecode, 1);
1565
1566 /* If this was a group that remembered the subject start, in order to break
1567 infinite repeats of empty string matches, retrieve the subject start from
1568 the chain. Otherwise, set it NULL. */
1569
1570 if (*prev >= OP_SBRA)
1571 {
1572 saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1573 eptrb = eptrb->epb_prev; /* Backup to previous group */
1574 }
1575 else saved_eptr = NULL;
1576
1577 /* If we are at the end of an assertion group or an atomic group, stop
1578 matching and return MATCH_MATCH, but record the current high water mark for
1579 use by positive assertions. We also need to record the match start in case
1580 it was changed by \K. */
1581
1582 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1583 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1584 *prev == OP_ONCE)
1585 {
1586 md->end_match_ptr = eptr; /* For ONCE */
1587 md->end_offset_top = offset_top;
1588 md->start_match_ptr = mstart;
1589 MRRETURN(MATCH_MATCH);
1590 }
1591
1592 /* For capturing groups we have to check the group number back at the start
1593 and if necessary complete handling an extraction by setting the offsets and
1594 bumping the high water mark. Note that whole-pattern recursion is coded as
1595 a recurse into group 0, so it won't be picked up here. Instead, we catch it
1596 when the OP_END is reached. Other recursion is handled here. */
1597
1598 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1599 {
1600 number = GET2(prev, 1+LINK_SIZE);
1601 offset = number << 1;
1602
1603 #ifdef PCRE_DEBUG
1604 printf("end bracket %d", number);
1605 printf("\n");
1606 #endif
1607
1608 md->capture_last = number;
1609 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1610 {
1611 md->offset_vector[offset] =
1612 md->offset_vector[md->offset_end - number];
1613 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1614 if (offset_top <= offset) offset_top = offset + 2;
1615 }
1616
1617 /* Handle a recursively called group. Restore the offsets
1618 appropriately and continue from after the call. */
1619
1620 if (md->recursive != NULL && md->recursive->group_num == number)
1621 {
1622 recursion_info *rec = md->recursive;
1623 DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1624 md->recursive = rec->prevrec;
1625 memcpy(md->offset_vector, rec->offset_save,
1626 rec->saved_max * sizeof(int));
1627 offset_top = rec->save_offset_top;
1628 ecode = rec->after_call;
1629 ims = original_ims;
1630 break;
1631 }
1632 }
1633
1634 /* For both capturing and non-capturing groups, reset the value of the ims
1635 flags, in case they got changed during the group. */
1636
1637 ims = original_ims;
1638 DPRINTF(("ims reset to %02lx\n", ims));
1639
1640 /* For a non-repeating ket, just continue at this level. This also
1641 happens for a repeating ket if no characters were matched in the group.
1642 This is the forcible breaking of infinite loops as implemented in Perl
1643 5.005. If there is an options reset, it will get obeyed in the normal
1644 course of events. */
1645
1646 if (*ecode == OP_KET || eptr == saved_eptr)
1647 {
1648 ecode += 1 + LINK_SIZE;
1649 break;
1650 }
1651
1652 /* The repeating kets try the rest of the pattern or restart from the
1653 preceding bracket, in the appropriate order. In the second case, we can use
1654 tail recursion to avoid using another stack frame, unless we have an
1655 unlimited repeat of a group that can match an empty string. */
1656
1657 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1658
1659 if (*ecode == OP_KETRMIN)
1660 {
1661 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1662 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1663 if (flags != 0) /* Could match an empty string */
1664 {
1665 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1666 RRETURN(rrc);
1667 }
1668 ecode = prev;
1669 goto TAIL_RECURSE;
1670 }
1671 else /* OP_KETRMAX */
1672 {
1673 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1674 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1675 ecode += 1 + LINK_SIZE;
1676 flags = 0;
1677 goto TAIL_RECURSE;
1678 }
1679 /* Control never gets here */
1680
1681 /* Start of subject unless notbol, or after internal newline if multiline */
1682
1683 case OP_CIRC:
1684 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1685 if ((ims & PCRE_MULTILINE) != 0)
1686 {
1687 if (eptr != md->start_subject &&
1688 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1689 MRRETURN(MATCH_NOMATCH);
1690 ecode++;
1691 break;
1692 }
1693 /* ... else fall through */
1694
1695 /* Start of subject assertion */
1696
1697 case OP_SOD:
1698 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1699 ecode++;
1700 break;
1701
1702 /* Start of match assertion */
1703
1704 case OP_SOM:
1705 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1706 ecode++;
1707 break;
1708
1709 /* Reset the start of match point */
1710
1711 case OP_SET_SOM:
1712 mstart = eptr;
1713 ecode++;
1714 break;
1715
1716 /* Assert before internal newline if multiline, or before a terminating
1717 newline unless endonly is set, else end of subject unless noteol is set. */
1718
1719 case OP_DOLL:
1720 if ((ims & PCRE_MULTILINE) != 0)
1721 {
1722 if (eptr < md->end_subject)
1723 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1724 else
1725 {
1726 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1727 SCHECK_PARTIAL();
1728 }
1729 ecode++;
1730 break;
1731 }
1732 else /* Not multiline */
1733 {
1734 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1735 if (!md->endonly) goto ASSERT_NL_OR_EOS;
1736 }
1737
1738 /* ... else fall through for endonly */
1739
1740 /* End of subject assertion (\z) */
1741
1742 case OP_EOD:
1743 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1744 SCHECK_PARTIAL();
1745 ecode++;
1746 break;
1747
1748 /* End of subject or ending \n assertion (\Z) */
1749
1750 case OP_EODN:
1751 ASSERT_NL_OR_EOS:
1752 if (eptr < md->end_subject &&
1753 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1754 MRRETURN(MATCH_NOMATCH);
1755
1756 /* Either at end of string or \n before end. */
1757
1758 SCHECK_PARTIAL();
1759 ecode++;
1760 break;
1761
1762 /* Word boundary assertions */
1763
1764 case OP_NOT_WORD_BOUNDARY:
1765 case OP_WORD_BOUNDARY:
1766 {
1767
1768 /* Find out if the previous and current characters are "word" characters.
1769 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1770 be "non-word" characters. Remember the earliest consulted character for
1771 partial matching. */
1772
1773 #ifdef SUPPORT_UTF8
1774 if (utf8)
1775 {
1776 /* Get status of previous character */
1777
1778 if (eptr == md->start_subject) prev_is_word = FALSE; else
1779 {
1780 USPTR lastptr = eptr - 1;
1781 while((*lastptr & 0xc0) == 0x80) lastptr--;
1782 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1783 GETCHAR(c, lastptr);
1784 #ifdef SUPPORT_UCP
1785 if (md->use_ucp)
1786 {
1787 if (c == '_') prev_is_word = TRUE; else
1788 {
1789 int cat = UCD_CATEGORY(c);
1790 prev_is_word = (cat == ucp_L || cat == ucp_N);
1791 }
1792 }
1793 else
1794 #endif
1795 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1796 }
1797
1798 /* Get status of next character */
1799
1800 if (eptr >= md->end_subject)
1801 {
1802 SCHECK_PARTIAL();
1803 cur_is_word = FALSE;
1804 }
1805 else
1806 {
1807 GETCHAR(c, eptr);
1808 #ifdef SUPPORT_UCP
1809 if (md->use_ucp)
1810 {
1811 if (c == '_') cur_is_word = TRUE; else
1812 {
1813 int cat = UCD_CATEGORY(c);
1814 cur_is_word = (cat == ucp_L || cat == ucp_N);
1815 }
1816 }
1817 else
1818 #endif
1819 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1820 }
1821 }
1822 else
1823 #endif
1824
1825 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1826 consistency with the behaviour of \w we do use it in this case. */
1827
1828 {
1829 /* Get status of previous character */
1830
1831 if (eptr == md->start_subject) prev_is_word = FALSE; else
1832 {
1833 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1834 #ifdef SUPPORT_UCP
1835 if (md->use_ucp)
1836 {
1837 c = eptr[-1];
1838 if (c == '_') prev_is_word = TRUE; else
1839 {
1840 int cat = UCD_CATEGORY(c);
1841 prev_is_word = (cat == ucp_L || cat == ucp_N);
1842 }
1843 }
1844 else
1845 #endif
1846 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1847 }
1848
1849 /* Get status of next character */
1850
1851 if (eptr >= md->end_subject)
1852 {
1853 SCHECK_PARTIAL();
1854 cur_is_word = FALSE;
1855 }
1856 else
1857 #ifdef SUPPORT_UCP
1858 if (md->use_ucp)
1859 {
1860 c = *eptr;
1861 if (c == '_') cur_is_word = TRUE; else
1862 {
1863 int cat = UCD_CATEGORY(c);
1864 cur_is_word = (cat == ucp_L || cat == ucp_N);
1865 }
1866 }
1867 else
1868 #endif
1869 cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1870 }
1871
1872 /* Now see if the situation is what we want */
1873
1874 if ((*ecode++ == OP_WORD_BOUNDARY)?
1875 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1876 MRRETURN(MATCH_NOMATCH);
1877 }
1878 break;
1879
1880 /* Match a single character type; inline for speed */
1881
1882 case OP_ANY:
1883 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1884 /* Fall through */
1885
1886 case OP_ALLANY:
1887 if (eptr++ >= md->end_subject)
1888 {
1889 SCHECK_PARTIAL();
1890 MRRETURN(MATCH_NOMATCH);
1891 }
1892 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1893 ecode++;
1894 break;
1895
1896 /* Match a single byte, even in UTF-8 mode. This opcode really does match
1897 any byte, even newline, independent of the setting of PCRE_DOTALL. */
1898
1899 case OP_ANYBYTE:
1900 if (eptr++ >= md->end_subject)
1901 {
1902 SCHECK_PARTIAL();
1903 MRRETURN(MATCH_NOMATCH);
1904 }
1905 ecode++;
1906 break;
1907
1908 case OP_NOT_DIGIT:
1909 if (eptr >= md->end_subject)
1910 {
1911 SCHECK_PARTIAL();
1912 MRRETURN(MATCH_NOMATCH);
1913 }
1914 GETCHARINCTEST(c, eptr);
1915 if (
1916 #ifdef SUPPORT_UTF8
1917 c < 256 &&
1918 #endif
1919 (md->ctypes[c] & ctype_digit) != 0
1920 )
1921 MRRETURN(MATCH_NOMATCH);
1922 ecode++;
1923 break;
1924
1925 case OP_DIGIT:
1926 if (eptr >= md->end_subject)
1927 {
1928 SCHECK_PARTIAL();
1929 MRRETURN(MATCH_NOMATCH);
1930 }
1931 GETCHARINCTEST(c, eptr);
1932 if (
1933 #ifdef SUPPORT_UTF8
1934 c >= 256 ||
1935 #endif
1936 (md->ctypes[c] & ctype_digit) == 0
1937 )
1938 MRRETURN(MATCH_NOMATCH);
1939 ecode++;
1940 break;
1941
1942 case OP_NOT_WHITESPACE:
1943 if (eptr >= md->end_subject)
1944 {
1945 SCHECK_PARTIAL();
1946 MRRETURN(MATCH_NOMATCH);
1947 }
1948 GETCHARINCTEST(c, eptr);
1949 if (
1950 #ifdef SUPPORT_UTF8
1951 c < 256 &&
1952 #endif
1953 (md->ctypes[c] & ctype_space) != 0
1954 )
1955 MRRETURN(MATCH_NOMATCH);
1956 ecode++;
1957 break;
1958
1959 case OP_WHITESPACE:
1960 if (eptr >= md->end_subject)
1961 {
1962 SCHECK_PARTIAL();
1963 MRRETURN(MATCH_NOMATCH);
1964 }
1965 GETCHARINCTEST(c, eptr);
1966 if (
1967 #ifdef SUPPORT_UTF8
1968 c >= 256 ||
1969 #endif
1970 (md->ctypes[c] & ctype_space) == 0
1971 )
1972 MRRETURN(MATCH_NOMATCH);
1973 ecode++;
1974 break;
1975
1976 case OP_NOT_WORDCHAR:
1977 if (eptr >= md->end_subject)
1978 {
1979 SCHECK_PARTIAL();
1980 MRRETURN(MATCH_NOMATCH);
1981 }
1982 GETCHARINCTEST(c, eptr);
1983 if (
1984 #ifdef SUPPORT_UTF8
1985 c < 256 &&
1986 #endif
1987 (md->ctypes[c] & ctype_word) != 0
1988 )
1989 MRRETURN(MATCH_NOMATCH);
1990 ecode++;
1991 break;
1992
1993 case OP_WORDCHAR:
1994 if (eptr >= md->end_subject)
1995 {
1996 SCHECK_PARTIAL();
1997 MRRETURN(MATCH_NOMATCH);
1998 }
1999 GETCHARINCTEST(c, eptr);
2000 if (
2001 #ifdef SUPPORT_UTF8
2002 c >= 256 ||
2003 #endif
2004 (md->ctypes[c] & ctype_word) == 0
2005 )
2006 MRRETURN(MATCH_NOMATCH);
2007 ecode++;
2008 break;
2009
2010 case OP_ANYNL:
2011 if (eptr >= md->end_subject)
2012 {
2013 SCHECK_PARTIAL();
2014 MRRETURN(MATCH_NOMATCH);
2015 }
2016 GETCHARINCTEST(c, eptr);
2017 switch(c)
2018 {
2019 default: MRRETURN(MATCH_NOMATCH);
2020
2021 case 0x000d:
2022 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2023 break;
2024
2025 case 0x000a:
2026 break;
2027
2028 case 0x000b:
2029 case 0x000c:
2030 case 0x0085:
2031 case 0x2028:
2032 case 0x2029:
2033 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2034 break;
2035 }
2036 ecode++;
2037 break;
2038
2039 case OP_NOT_HSPACE:
2040 if (eptr >= md->end_subject)
2041 {
2042 SCHECK_PARTIAL();
2043 MRRETURN(MATCH_NOMATCH);
2044 }
2045 GETCHARINCTEST(c, eptr);
2046 switch(c)
2047 {
2048 default: break;
2049 case 0x09: /* HT */
2050 case 0x20: /* SPACE */
2051 case 0xa0: /* NBSP */
2052 case 0x1680: /* OGHAM SPACE MARK */
2053 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2054 case 0x2000: /* EN QUAD */
2055 case 0x2001: /* EM QUAD */
2056 case 0x2002: /* EN SPACE */
2057 case 0x2003: /* EM SPACE */
2058 case 0x2004: /* THREE-PER-EM SPACE */
2059 case 0x2005: /* FOUR-PER-EM SPACE */
2060 case 0x2006: /* SIX-PER-EM SPACE */
2061 case 0x2007: /* FIGURE SPACE */
2062 case 0x2008: /* PUNCTUATION SPACE */
2063 case 0x2009: /* THIN SPACE */
2064 case 0x200A: /* HAIR SPACE */
2065 case 0x202f: /* NARROW NO-BREAK SPACE */
2066 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2067 case 0x3000: /* IDEOGRAPHIC SPACE */
2068 MRRETURN(MATCH_NOMATCH);
2069 }
2070 ecode++;
2071 break;
2072
2073 case OP_HSPACE:
2074 if (eptr >= md->end_subject)
2075 {
2076 SCHECK_PARTIAL();
2077 MRRETURN(MATCH_NOMATCH);
2078 }
2079 GETCHARINCTEST(c, eptr);
2080 switch(c)
2081 {
2082 default: MRRETURN(MATCH_NOMATCH);
2083 case 0x09: /* HT */
2084 case 0x20: /* SPACE */
2085 case 0xa0: /* NBSP */
2086 case 0x1680: /* OGHAM SPACE MARK */
2087 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2088 case 0x2000: /* EN QUAD */
2089 case 0x2001: /* EM QUAD */
2090 case 0x2002: /* EN SPACE */
2091 case 0x2003: /* EM SPACE */
2092 case 0x2004: /* THREE-PER-EM SPACE */
2093 case 0x2005: /* FOUR-PER-EM SPACE */
2094 case 0x2006: /* SIX-PER-EM SPACE */
2095 case 0x2007: /* FIGURE SPACE */
2096 case 0x2008: /* PUNCTUATION SPACE */
2097 case 0x2009: /* THIN SPACE */
2098 case 0x200A: /* HAIR SPACE */
2099 case 0x202f: /* NARROW NO-BREAK SPACE */
2100 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2101 case 0x3000: /* IDEOGRAPHIC SPACE */
2102 break;
2103 }
2104 ecode++;
2105 break;
2106
2107 case OP_NOT_VSPACE:
2108 if (eptr >= md->end_subject)
2109 {
2110 SCHECK_PARTIAL();
2111 MRRETURN(MATCH_NOMATCH);
2112 }
2113 GETCHARINCTEST(c, eptr);
2114 switch(c)
2115 {
2116 default: break;
2117 case 0x0a: /* LF */
2118 case 0x0b: /* VT */
2119 case 0x0c: /* FF */
2120 case 0x0d: /* CR */
2121 case 0x85: /* NEL */
2122 case 0x2028: /* LINE SEPARATOR */
2123 case 0x2029: /* PARAGRAPH SEPARATOR */
2124 MRRETURN(MATCH_NOMATCH);
2125 }
2126 ecode++;
2127 break;
2128
2129 case OP_VSPACE:
2130 if (eptr >= md->end_subject)
2131 {
2132 SCHECK_PARTIAL();
2133 MRRETURN(MATCH_NOMATCH);
2134 }
2135 GETCHARINCTEST(c, eptr);
2136 switch(c)
2137 {
2138 default: MRRETURN(MATCH_NOMATCH);
2139 case 0x0a: /* LF */
2140 case 0x0b: /* VT */
2141 case 0x0c: /* FF */
2142 case 0x0d: /* CR */
2143 case 0x85: /* NEL */
2144 case 0x2028: /* LINE SEPARATOR */
2145 case 0x2029: /* PARAGRAPH SEPARATOR */
2146 break;
2147 }
2148 ecode++;
2149 break;
2150
2151 #ifdef SUPPORT_UCP
2152 /* Check the next character by Unicode property. We will get here only
2153 if the support is in the binary; otherwise a compile-time error occurs. */
2154
2155 case OP_PROP:
2156 case OP_NOTPROP:
2157 if (eptr >= md->end_subject)
2158 {
2159 SCHECK_PARTIAL();
2160 MRRETURN(MATCH_NOMATCH);
2161 }
2162 GETCHARINCTEST(c, eptr);
2163 {
2164 const ucd_record *prop = GET_UCD(c);
2165
2166 switch(ecode[1])
2167 {
2168 case PT_ANY:
2169 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2170 break;
2171
2172 case PT_LAMP:
2173 if ((prop->chartype == ucp_Lu ||
2174 prop->chartype == ucp_Ll ||
2175 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2176 MRRETURN(MATCH_NOMATCH);
2177 break;
2178
2179 case PT_GC:
2180 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2181 MRRETURN(MATCH_NOMATCH);
2182 break;
2183
2184 case PT_PC:
2185 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2186 MRRETURN(MATCH_NOMATCH);
2187 break;
2188
2189 case PT_SC:
2190 if ((ecode[2] != prop->script) == (op == OP_PROP))
2191 MRRETURN(MATCH_NOMATCH);
2192 break;
2193
2194 /* These are specials */
2195
2196 case PT_ALNUM:
2197 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2198 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2199 MRRETURN(MATCH_NOMATCH);
2200 break;
2201
2202 case PT_SPACE: /* Perl space */
2203 if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2204 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2205 == (op == OP_NOTPROP))
2206 MRRETURN(MATCH_NOMATCH);
2207 break;
2208
2209 case PT_PXSPACE: /* POSIX space */
2210 if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2211 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2212 c == CHAR_FF || c == CHAR_CR)
2213 == (op == OP_NOTPROP))
2214 MRRETURN(MATCH_NOMATCH);
2215 break;
2216
2217 case PT_WORD:
2218 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2219 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2220 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2221 MRRETURN(MATCH_NOMATCH);
2222 break;
2223
2224 /* This should never occur */
2225
2226 default:
2227 RRETURN(PCRE_ERROR_INTERNAL);
2228 }
2229
2230 ecode += 3;
2231 }
2232 break;
2233
2234 /* Match an extended Unicode sequence. We will get here only if the support
2235 is in the binary; otherwise a compile-time error occurs. */
2236
2237 case OP_EXTUNI:
2238 if (eptr >= md->end_subject)
2239 {
2240 SCHECK_PARTIAL();
2241 MRRETURN(MATCH_NOMATCH);
2242 }
2243 GETCHARINCTEST(c, eptr);
2244 {
2245 int category = UCD_CATEGORY(c);
2246 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2247 while (eptr < md->end_subject)
2248 {
2249 int len = 1;
2250 if (!utf8) c = *eptr; else
2251 {
2252 GETCHARLEN(c, eptr, len);
2253 }
2254 category = UCD_CATEGORY(c);
2255 if (category != ucp_M) break;
2256 eptr += len;
2257 }
2258 }
2259 ecode++;
2260 break;
2261 #endif
2262
2263
2264 /* Match a back reference, possibly repeatedly. Look past the end of the
2265 item to see if there is repeat information following. The code is similar
2266 to that for character classes, but repeated for efficiency. Then obey
2267 similar code to character type repeats - written out again for speed.
2268 However, if the referenced string is the empty string, always treat
2269 it as matched, any number of times (otherwise there could be infinite
2270 loops). */
2271
2272 case OP_REF:
2273 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2274 ecode += 3;
2275
2276 /* If the reference is unset, there are two possibilities:
2277
2278 (a) In the default, Perl-compatible state, set the length negative;
2279 this ensures that every attempt at a match fails. We can't just fail
2280 here, because of the possibility of quantifiers with zero minima.
2281
2282 (b) If the JavaScript compatibility flag is set, set the length to zero
2283 so that the back reference matches an empty string.
2284
2285 Otherwise, set the length to the length of what was matched by the
2286 referenced subpattern. */
2287
2288 if (offset >= offset_top || md->offset_vector[offset] < 0)
2289 length = (md->jscript_compat)? 0 : -1;
2290 else
2291 length = md->offset_vector[offset+1] - md->offset_vector[offset];
2292
2293 /* Set up for repetition, or handle the non-repeated case */
2294
2295 switch (*ecode)
2296 {
2297 case OP_CRSTAR:
2298 case OP_CRMINSTAR:
2299 case OP_CRPLUS:
2300 case OP_CRMINPLUS:
2301 case OP_CRQUERY:
2302 case OP_CRMINQUERY:
2303 c = *ecode++ - OP_CRSTAR;
2304 minimize = (c & 1) != 0;
2305 min = rep_min[c]; /* Pick up values from tables; */
2306 max = rep_max[c]; /* zero for max => infinity */
2307 if (max == 0) max = INT_MAX;
2308 break;
2309
2310 case OP_CRRANGE:
2311 case OP_CRMINRANGE:
2312 minimize = (*ecode == OP_CRMINRANGE);
2313 min = GET2(ecode, 1);
2314 max = GET2(ecode, 3);
2315 if (max == 0) max = INT_MAX;
2316 ecode += 5;
2317 break;
2318
2319 default: /* No repeat follows */
2320 if ((length = match_ref(offset, eptr, length, md, ims)) < 0)
2321 {
2322 CHECK_PARTIAL();
2323 MRRETURN(MATCH_NOMATCH);
2324 }
2325 eptr += length;
2326 continue; /* With the main loop */
2327 }
2328
2329 /* Handle repeated back references. If the length of the reference is
2330 zero, just continue with the main loop. */
2331
2332 if (length == 0) continue;
2333
2334 /* First, ensure the minimum number of matches are present. We get back
2335 the length of the reference string explicitly rather than passing the
2336 address of eptr, so that eptr can be a register variable. */
2337
2338 for (i = 1; i <= min; i++)
2339 {
2340 int slength;
2341 if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2342 {
2343 CHECK_PARTIAL();
2344 MRRETURN(MATCH_NOMATCH);
2345 }
2346 eptr += slength;
2347 }
2348
2349 /* If min = max, continue at the same level without recursion.
2350 They are not both allowed to be zero. */
2351
2352 if (min == max) continue;
2353
2354 /* If minimizing, keep trying and advancing the pointer */
2355
2356 if (minimize)
2357 {
2358 for (fi = min;; fi++)
2359 {
2360 int slength;
2361 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2362 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2363 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2364 if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2365 {
2366 CHECK_PARTIAL();
2367 MRRETURN(MATCH_NOMATCH);
2368 }
2369 eptr += slength;
2370 }
2371 /* Control never gets here */
2372 }
2373
2374 /* If maximizing, find the longest string and work backwards */
2375
2376 else
2377 {
2378 pp = eptr;
2379 for (i = min; i < max; i++)
2380 {
2381 int slength;
2382 if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2383 {
2384 CHECK_PARTIAL();
2385 break;
2386 }
2387 eptr += slength;
2388 }
2389 while (eptr >= pp)
2390 {
2391 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2392 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2393 eptr -= length;
2394 }
2395 MRRETURN(MATCH_NOMATCH);
2396 }
2397 /* Control never gets here */
2398
2399 /* Match a bit-mapped character class, possibly repeatedly. This op code is
2400 used when all the characters in the class have values in the range 0-255,
2401 and either the matching is caseful, or the characters are in the range
2402 0-127 when UTF-8 processing is enabled. The only difference between
2403 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2404 encountered.
2405
2406 First, look past the end of the item to see if there is repeat information
2407 following. Then obey similar code to character type repeats - written out
2408 again for speed. */
2409
2410 case OP_NCLASS:
2411 case OP_CLASS:
2412 {
2413 data = ecode + 1; /* Save for matching */
2414 ecode += 33; /* Advance past the item */
2415
2416 switch (*ecode)
2417 {
2418 case OP_CRSTAR:
2419 case OP_CRMINSTAR:
2420 case OP_CRPLUS:
2421 case OP_CRMINPLUS:
2422 case OP_CRQUERY:
2423 case OP_CRMINQUERY:
2424 c = *ecode++ - OP_CRSTAR;
2425 minimize = (c & 1) != 0;
2426 min = rep_min[c]; /* Pick up values from tables; */
2427 max = rep_max[c]; /* zero for max => infinity */
2428 if (max == 0) max = INT_MAX;
2429 break;
2430
2431 case OP_CRRANGE:
2432 case OP_CRMINRANGE:
2433 minimize = (*ecode == OP_CRMINRANGE);
2434 min = GET2(ecode, 1);
2435 max = GET2(ecode, 3);
2436 if (max == 0) max = INT_MAX;
2437 ecode += 5;
2438 break;
2439
2440 default: /* No repeat follows */
2441 min = max = 1;
2442 break;
2443 }
2444
2445 /* First, ensure the minimum number of matches are present. */
2446
2447 #ifdef SUPPORT_UTF8
2448 /* UTF-8 mode */
2449 if (utf8)
2450 {
2451 for (i = 1; i <= min; i++)
2452 {
2453 if (eptr >= md->end_subject)
2454 {
2455 SCHECK_PARTIAL();
2456 MRRETURN(MATCH_NOMATCH);
2457 }
2458 GETCHARINC(c, eptr);
2459 if (c > 255)
2460 {
2461 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2462 }
2463 else
2464 {
2465 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2466 }
2467 }
2468 }
2469 else
2470 #endif
2471 /* Not UTF-8 mode */
2472 {
2473 for (i = 1; i <= min; i++)
2474 {
2475 if (eptr >= md->end_subject)
2476 {
2477 SCHECK_PARTIAL();
2478 MRRETURN(MATCH_NOMATCH);
2479 }
2480 c = *eptr++;
2481 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2482 }
2483 }
2484
2485 /* If max == min we can continue with the main loop without the
2486 need to recurse. */
2487
2488 if (min == max) continue;
2489
2490 /* If minimizing, keep testing the rest of the expression and advancing
2491 the pointer while it matches the class. */
2492
2493 if (minimize)
2494 {
2495 #ifdef SUPPORT_UTF8
2496 /* UTF-8 mode */
2497 if (utf8)
2498 {
2499 for (fi = min;; fi++)
2500 {
2501 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2502 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2503 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2504 if (eptr >= md->end_subject)
2505 {
2506 SCHECK_PARTIAL();
2507 MRRETURN(MATCH_NOMATCH);
2508 }
2509 GETCHARINC(c, eptr);
2510 if (c > 255)
2511 {
2512 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2513 }
2514 else
2515 {
2516 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2517 }
2518 }
2519 }
2520 else
2521 #endif
2522 /* Not UTF-8 mode */
2523 {
2524 for (fi = min;; fi++)
2525 {
2526 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2527 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2528 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2529 if (eptr >= md->end_subject)
2530 {
2531 SCHECK_PARTIAL();
2532 MRRETURN(MATCH_NOMATCH);
2533 }
2534 c = *eptr++;
2535 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2536 }
2537 }
2538 /* Control never gets here */
2539 }
2540
2541 /* If maximizing, find the longest possible run, then work backwards. */
2542
2543 else
2544 {
2545 pp = eptr;
2546
2547 #ifdef SUPPORT_UTF8
2548 /* UTF-8 mode */
2549 if (utf8)
2550 {
2551 for (i = min; i < max; i++)
2552 {
2553 int len = 1;
2554 if (eptr >= md->end_subject)
2555 {
2556 SCHECK_PARTIAL();
2557 break;
2558 }
2559 GETCHARLEN(c, eptr, len);
2560 if (c > 255)
2561 {
2562 if (op == OP_CLASS) break;
2563 }
2564 else
2565 {
2566 if ((data[c/8] & (1 << (c&7))) == 0) break;
2567 }
2568 eptr += len;
2569 }
2570 for (;;)
2571 {
2572 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2573 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2574 if (eptr-- == pp) break; /* Stop if tried at original pos */
2575 BACKCHAR(eptr);
2576 }
2577 }
2578 else
2579 #endif
2580 /* Not UTF-8 mode */
2581 {
2582 for (i = min; i < max; i++)
2583 {
2584 if (eptr >= md->end_subject)
2585 {
2586 SCHECK_PARTIAL();
2587 break;
2588 }
2589 c = *eptr;
2590 if ((data[c/8] & (1 << (c&7))) == 0) break;
2591 eptr++;
2592 }
2593 while (eptr >= pp)
2594 {
2595 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2596 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2597 eptr--;
2598 }
2599 }
2600
2601 MRRETURN(MATCH_NOMATCH);
2602 }
2603 }
2604 /* Control never gets here */
2605
2606
2607 /* Match an extended character class. This opcode is encountered only
2608 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2609 mode, because Unicode properties are supported in non-UTF-8 mode. */
2610
2611 #ifdef SUPPORT_UTF8
2612 case OP_XCLASS:
2613 {
2614 data = ecode + 1 + LINK_SIZE; /* Save for matching */
2615 ecode += GET(ecode, 1); /* Advance past the item */
2616
2617 switch (*ecode)
2618 {
2619 case OP_CRSTAR:
2620 case OP_CRMINSTAR:
2621 case OP_CRPLUS:
2622 case OP_CRMINPLUS:
2623 case OP_CRQUERY:
2624 case OP_CRMINQUERY:
2625 c = *ecode++ - OP_CRSTAR;
2626 minimize = (c & 1) != 0;
2627 min = rep_min[c]; /* Pick up values from tables; */
2628 max = rep_max[c]; /* zero for max => infinity */
2629 if (max == 0) max = INT_MAX;
2630 break;
2631
2632 case OP_CRRANGE:
2633 case OP_CRMINRANGE:
2634 minimize = (*ecode == OP_CRMINRANGE);
2635 min = GET2(ecode, 1);
2636 max = GET2(ecode, 3);
2637 if (max == 0) max = INT_MAX;
2638 ecode += 5;
2639 break;
2640
2641 default: /* No repeat follows */
2642 min = max = 1;
2643 break;
2644 }
2645
2646 /* First, ensure the minimum number of matches are present. */
2647
2648 for (i = 1; i <= min; i++)
2649 {
2650 if (eptr >= md->end_subject)
2651 {
2652 SCHECK_PARTIAL();
2653 MRRETURN(MATCH_NOMATCH);
2654 }
2655 GETCHARINCTEST(c, eptr);
2656 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2657 }
2658
2659 /* If max == min we can continue with the main loop without the
2660 need to recurse. */
2661
2662 if (min == max) continue;
2663
2664 /* If minimizing, keep testing the rest of the expression and advancing
2665 the pointer while it matches the class. */
2666
2667 if (minimize)
2668 {
2669 for (fi = min;; fi++)
2670 {
2671 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2672 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2673 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2674 if (eptr >= md->end_subject)
2675 {
2676 SCHECK_PARTIAL();
2677 MRRETURN(MATCH_NOMATCH);
2678 }
2679 GETCHARINCTEST(c, eptr);
2680 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2681 }
2682 /* Control never gets here */
2683 }
2684
2685 /* If maximizing, find the longest possible run, then work backwards. */
2686
2687 else
2688 {
2689 pp = eptr;
2690 for (i = min; i < max; i++)
2691 {
2692 int len = 1;
2693 if (eptr >= md->end_subject)
2694 {
2695 SCHECK_PARTIAL();
2696 break;
2697 }
2698 GETCHARLENTEST(c, eptr, len);
2699 if (!_pcre_xclass(c, data)) break;
2700 eptr += len;
2701 }
2702 for(;;)
2703 {
2704 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2705 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2706 if (eptr-- == pp) break; /* Stop if tried at original pos */
2707 if (utf8) BACKCHAR(eptr);
2708 }
2709 MRRETURN(MATCH_NOMATCH);
2710 }
2711
2712 /* Control never gets here */
2713 }
2714 #endif /* End of XCLASS */
2715
2716 /* Match a single character, casefully */
2717
2718 case OP_CHAR:
2719 #ifdef SUPPORT_UTF8
2720 if (utf8)
2721 {
2722 length = 1;
2723 ecode++;
2724 GETCHARLEN(fc, ecode, length);
2725 if (length > md->end_subject - eptr)
2726 {
2727 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2728 MRRETURN(MATCH_NOMATCH);
2729 }
2730 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2731 }
2732 else
2733 #endif
2734
2735 /* Non-UTF-8 mode */
2736 {
2737 if (md->end_subject - eptr < 1)
2738 {
2739 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2740 MRRETURN(MATCH_NOMATCH);
2741 }
2742 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2743 ecode += 2;
2744 }
2745 break;
2746
2747 /* Match a single character, caselessly */
2748
2749 case OP_CHARNC:
2750 #ifdef SUPPORT_UTF8
2751 if (utf8)
2752 {
2753 length = 1;
2754 ecode++;
2755 GETCHARLEN(fc, ecode, length);
2756
2757 if (length > md->end_subject - eptr)
2758 {
2759 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2760 MRRETURN(MATCH_NOMATCH);
2761 }
2762
2763 /* If the pattern character's value is < 128, we have only one byte, and
2764 can use the fast lookup table. */
2765
2766 if (fc < 128)
2767 {
2768 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2769 }
2770
2771 /* Otherwise we must pick up the subject character */
2772
2773 else
2774 {
2775 unsigned int dc;
2776 GETCHARINC(dc, eptr);
2777 ecode += length;
2778
2779 /* If we have Unicode property support, we can use it to test the other
2780 case of the character, if there is one. */
2781
2782 if (fc != dc)
2783 {
2784 #ifdef SUPPORT_UCP
2785 if (dc != UCD_OTHERCASE(fc))
2786 #endif
2787 MRRETURN(MATCH_NOMATCH);
2788 }
2789 }
2790 }
2791 else
2792 #endif /* SUPPORT_UTF8 */
2793
2794 /* Non-UTF-8 mode */
2795 {
2796 if (md->end_subject - eptr < 1)
2797 {
2798 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2799 MRRETURN(MATCH_NOMATCH);
2800 }
2801 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2802 ecode += 2;
2803 }
2804 break;
2805
2806 /* Match a single character repeatedly. */
2807
2808 case OP_EXACT:
2809 min = max = GET2(ecode, 1);
2810 ecode += 3;
2811 goto REPEATCHAR;
2812
2813 case OP_POSUPTO:
2814 possessive = TRUE;
2815 /* Fall through */
2816
2817 case OP_UPTO:
2818 case OP_MINUPTO:
2819 min = 0;
2820 max = GET2(ecode, 1);
2821 minimize = *ecode == OP_MINUPTO;
2822 ecode += 3;
2823 goto REPEATCHAR;
2824
2825 case OP_POSSTAR:
2826 possessive = TRUE;
2827 min = 0;
2828 max = INT_MAX;
2829 ecode++;
2830 goto REPEATCHAR;
2831
2832 case OP_POSPLUS:
2833 possessive = TRUE;
2834 min = 1;
2835 max = INT_MAX;
2836 ecode++;
2837 goto REPEATCHAR;
2838
2839 case OP_POSQUERY:
2840 possessive = TRUE;
2841 min = 0;
2842 max = 1;
2843 ecode++;
2844 goto REPEATCHAR;
2845
2846 case OP_STAR:
2847 case OP_MINSTAR:
2848 case OP_PLUS:
2849 case OP_MINPLUS:
2850 case OP_QUERY:
2851 case OP_MINQUERY:
2852 c = *ecode++ - OP_STAR;
2853 minimize = (c & 1) != 0;
2854
2855 min = rep_min[c]; /* Pick up values from tables; */
2856 max = rep_max[c]; /* zero for max => infinity */
2857 if (max == 0) max = INT_MAX;
2858
2859 /* Common code for all repeated single-character matches. */
2860
2861 REPEATCHAR:
2862 #ifdef SUPPORT_UTF8
2863 if (utf8)
2864 {
2865 length = 1;
2866 charptr = ecode;
2867 GETCHARLEN(fc, ecode, length);
2868 ecode += length;
2869
2870 /* Handle multibyte character matching specially here. There is
2871 support for caseless matching if UCP support is present. */
2872
2873 if (length > 1)
2874 {
2875 #ifdef SUPPORT_UCP
2876 unsigned int othercase;
2877 if ((ims & PCRE_CASELESS) != 0 &&
2878 (othercase = UCD_OTHERCASE(fc)) != fc)
2879 oclength = _pcre_ord2utf8(othercase, occhars);
2880 else oclength = 0;
2881 #endif /* SUPPORT_UCP */
2882
2883 for (i = 1; i <= min; i++)
2884 {
2885 if (eptr <= md->end_subject - length &&
2886 memcmp(eptr, charptr, length) == 0) eptr += length;
2887 #ifdef SUPPORT_UCP
2888 else if (oclength > 0 &&
2889 eptr <= md->end_subject - oclength &&
2890 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2891 #endif /* SUPPORT_UCP */
2892 else
2893 {
2894 CHECK_PARTIAL();
2895 MRRETURN(MATCH_NOMATCH);
2896 }
2897 }
2898
2899 if (min == max) continue;
2900
2901 if (minimize)
2902 {
2903 for (fi = min;; fi++)
2904 {
2905 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2906 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2907 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2908 if (eptr <= md->end_subject - length &&
2909 memcmp(eptr, charptr, length) == 0) eptr += length;
2910 #ifdef SUPPORT_UCP
2911 else if (oclength > 0 &&
2912 eptr <= md->end_subject - oclength &&
2913 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2914 #endif /* SUPPORT_UCP */
2915 else
2916 {
2917 CHECK_PARTIAL();
2918 MRRETURN(MATCH_NOMATCH);
2919 }
2920 }
2921 /* Control never gets here */
2922 }
2923
2924 else /* Maximize */
2925 {
2926 pp = eptr;
2927 for (i = min; i < max; i++)
2928 {
2929 if (eptr <= md->end_subject - length &&
2930 memcmp(eptr, charptr, length) == 0) eptr += length;
2931 #ifdef SUPPORT_UCP
2932 else if (oclength > 0 &&
2933 eptr <= md->end_subject - oclength &&
2934 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2935 #endif /* SUPPORT_UCP */
2936 else
2937 {
2938 CHECK_PARTIAL();
2939 break;
2940 }
2941 }
2942
2943 if (possessive) continue;
2944
2945 for(;;)
2946 {
2947 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2948 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2949 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2950 #ifdef SUPPORT_UCP
2951 eptr--;
2952 BACKCHAR(eptr);
2953 #else /* without SUPPORT_UCP */
2954 eptr -= length;
2955 #endif /* SUPPORT_UCP */
2956 }
2957 }
2958 /* Control never gets here */
2959 }
2960
2961 /* If the length of a UTF-8 character is 1, we fall through here, and
2962 obey the code as for non-UTF-8 characters below, though in this case the
2963 value of fc will always be < 128. */
2964 }
2965 else
2966 #endif /* SUPPORT_UTF8 */
2967
2968 /* When not in UTF-8 mode, load a single-byte character. */
2969
2970 fc = *ecode++;
2971
2972 /* The value of fc at this point is always less than 256, though we may or
2973 may not be in UTF-8 mode. The code is duplicated for the caseless and
2974 caseful cases, for speed, since matching characters is likely to be quite
2975 common. First, ensure the minimum number of matches are present. If min =
2976 max, continue at the same level without recursing. Otherwise, if
2977 minimizing, keep trying the rest of the expression and advancing one
2978 matching character if failing, up to the maximum. Alternatively, if
2979 maximizing, find the maximum number of characters and work backwards. */
2980
2981 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2982 max, eptr));
2983
2984 if ((ims & PCRE_CASELESS) != 0)
2985 {
2986 fc = md->lcc[fc];
2987 for (i = 1; i <= min; i++)
2988 {
2989 if (eptr >= md->end_subject)
2990 {
2991 SCHECK_PARTIAL();
2992 MRRETURN(MATCH_NOMATCH);
2993 }
2994 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2995 }
2996 if (min == max) continue;
2997 if (minimize)
2998 {
2999 for (fi = min;; fi++)
3000 {
3001 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
3002 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3003 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3004 if (eptr >= md->end_subject)
3005 {
3006 SCHECK_PARTIAL();
3007 MRRETURN(MATCH_NOMATCH);
3008 }
3009 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3010 }
3011 /* Control never gets here */
3012 }
3013 else /* Maximize */
3014 {
3015 pp = eptr;
3016 for (i = min; i < max; i++)
3017 {
3018 if (eptr >= md->end_subject)
3019 {
3020 SCHECK_PARTIAL();
3021 break;
3022 }
3023 if (fc != md->lcc[*eptr]) break;
3024 eptr++;
3025 }
3026
3027 if (possessive) continue;
3028
3029 while (eptr >= pp)
3030 {
3031 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3032 eptr--;
3033 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3034 }
3035 MRRETURN(MATCH_NOMATCH);
3036 }
3037 /* Control never gets here */
3038 }
3039
3040 /* Caseful comparisons (includes all multi-byte characters) */
3041
3042 else
3043 {
3044 for (i = 1; i <= min; i++)
3045 {
3046 if (eptr >= md->end_subject)
3047 {
3048 SCHECK_PARTIAL();
3049 MRRETURN(MATCH_NOMATCH);
3050 }
3051 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3052 }
3053
3054 if (min == max) continue;
3055
3056 if (minimize)
3057 {
3058 for (fi = min;; fi++)
3059 {
3060 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3061 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3062 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3063 if (eptr >= md->end_subject)
3064 {
3065 SCHECK_PARTIAL();
3066 MRRETURN(MATCH_NOMATCH);
3067 }
3068 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3069 }
3070 /* Control never gets here */
3071 }
3072 else /* Maximize */
3073 {
3074 pp = eptr;
3075 for (i = min; i < max; i++)
3076 {
3077 if (eptr >= md->end_subject)
3078 {
3079 SCHECK_PARTIAL();
3080 break;
3081 }
3082 if (fc != *eptr) break;
3083 eptr++;
3084 }
3085 if (possessive) continue;
3086
3087 while (eptr >= pp)
3088 {
3089 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3090 eptr--;
3091 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3092 }
3093 MRRETURN(MATCH_NOMATCH);
3094 }
3095 }
3096 /* Control never gets here */
3097
3098 /* Match a negated single one-byte character. The character we are
3099 checking can be multibyte. */
3100
3101 case OP_NOT:
3102 if (eptr >= md->end_subject)
3103 {
3104 SCHECK_PARTIAL();
3105 MRRETURN(MATCH_NOMATCH);
3106 }
3107 ecode++;
3108 GETCHARINCTEST(c, eptr);
3109 if ((ims & PCRE_CASELESS) != 0)
3110 {
3111 #ifdef SUPPORT_UTF8
3112 if (c < 256)
3113 #endif
3114 c = md->lcc[c];
3115 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3116 }
3117 else
3118 {
3119 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3120 }
3121 break;
3122
3123 /* Match a negated single one-byte character repeatedly. This is almost a
3124 repeat of the code for a repeated single character, but I haven't found a
3125 nice way of commoning these up that doesn't require a test of the
3126 positive/negative option for each character match. Maybe that wouldn't add
3127 very much to the time taken, but character matching *is* what this is all
3128 about... */
3129
3130 case OP_NOTEXACT:
3131 min = max = GET2(ecode, 1);
3132 ecode += 3;
3133 goto REPEATNOTCHAR;
3134
3135 case OP_NOTUPTO:
3136 case OP_NOTMINUPTO:
3137 min = 0;
3138 max = GET2(ecode, 1);
3139 minimize = *ecode == OP_NOTMINUPTO;
3140 ecode += 3;
3141 goto REPEATNOTCHAR;
3142
3143 case OP_NOTPOSSTAR:
3144 possessive = TRUE;
3145 min = 0;
3146 max = INT_MAX;
3147 ecode++;
3148 goto REPEATNOTCHAR;
3149
3150 case OP_NOTPOSPLUS:
3151 possessive = TRUE;
3152 min = 1;
3153 max = INT_MAX;
3154 ecode++;
3155 goto REPEATNOTCHAR;
3156
3157 case OP_NOTPOSQUERY:
3158 possessive = TRUE;
3159 min = 0;
3160 max = 1;
3161 ecode++;
3162 goto REPEATNOTCHAR;
3163
3164 case OP_NOTPOSUPTO:
3165 possessive = TRUE;
3166 min = 0;
3167 max = GET2(ecode, 1);
3168 ecode += 3;
3169 goto REPEATNOTCHAR;
3170
3171 case OP_NOTSTAR:
3172 case OP_NOTMINSTAR:
3173 case OP_NOTPLUS:
3174 case OP_NOTMINPLUS:
3175 case OP_NOTQUERY:
3176 case OP_NOTMINQUERY:
3177 c = *ecode++ - OP_NOTSTAR;
3178 minimize = (c & 1) != 0;
3179 min = rep_min[c]; /* Pick up values from tables; */
3180 max = rep_max[c]; /* zero for max => infinity */
3181 if (max == 0) max = INT_MAX;
3182
3183 /* Common code for all repeated single-byte matches. */
3184
3185 REPEATNOTCHAR:
3186 fc = *ecode++;
3187
3188 /* The code is duplicated for the caseless and caseful cases, for speed,
3189 since matching characters is likely to be quite common. First, ensure the
3190 minimum number of matches are present. If min = max, continue at the same
3191 level without recursing. Otherwise, if minimizing, keep trying the rest of
3192 the expression and advancing one matching character if failing, up to the
3193 maximum. Alternatively, if maximizing, find the maximum number of
3194 characters and work backwards. */
3195
3196 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3197 max, eptr));
3198
3199 if ((ims & PCRE_CASELESS) != 0)
3200 {
3201 fc = md->lcc[fc];
3202
3203 #ifdef SUPPORT_UTF8
3204 /* UTF-8 mode */
3205 if (utf8)
3206 {
3207 register unsigned int d;
3208 for (i = 1; i <= min; i++)
3209 {
3210 if (eptr >= md->end_subject)
3211 {
3212 SCHECK_PARTIAL();
3213 MRRETURN(MATCH_NOMATCH);
3214 }
3215 GETCHARINC(d, eptr);
3216 if (d < 256) d = md->lcc[d];
3217 if (fc == d) MRRETURN(MATCH_NOMATCH);
3218 }
3219 }
3220 else
3221 #endif
3222
3223 /* Not UTF-8 mode */
3224 {
3225 for (i = 1; i <= min; i++)
3226 {
3227 if (eptr >= md->end_subject)
3228 {
3229 SCHECK_PARTIAL();
3230 MRRETURN(MATCH_NOMATCH);
3231 }
3232 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3233 }
3234 }
3235
3236 if (min == max) continue;
3237
3238 if (minimize)
3239 {
3240 #ifdef SUPPORT_UTF8
3241 /* UTF-8 mode */
3242 if (utf8)
3243 {
3244 register unsigned int d;
3245 for (fi = min;; fi++)
3246 {
3247 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3248 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3249 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3250 if (eptr >= md->end_subject)
3251 {
3252 SCHECK_PARTIAL();
3253 MRRETURN(MATCH_NOMATCH);
3254 }
3255 GETCHARINC(d, eptr);
3256 if (d < 256) d = md->lcc[d];
3257 if (fc == d) MRRETURN(MATCH_NOMATCH);
3258 }
3259 }
3260 else
3261 #endif
3262 /* Not UTF-8 mode */
3263 {
3264 for (fi = min;; fi++)
3265 {
3266 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3267 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3268 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3269 if (eptr >= md->end_subject)
3270 {
3271 SCHECK_PARTIAL();
3272 MRRETURN(MATCH_NOMATCH);
3273 }
3274 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3275 }
3276 }
3277 /* Control never gets here */
3278 }
3279
3280 /* Maximize case */
3281
3282 else
3283 {
3284 pp = eptr;
3285
3286 #ifdef SUPPORT_UTF8
3287 /* UTF-8 mode */
3288 if (utf8)
3289 {
3290 register unsigned int d;
3291 for (i = min; i < max; i++)
3292 {
3293 int len = 1;
3294 if (eptr >= md->end_subject)
3295 {
3296 SCHECK_PARTIAL();
3297 break;
3298 }
3299 GETCHARLEN(d, eptr, len);
3300 if (d < 256) d = md->lcc[d];
3301 if (fc == d) break;
3302 eptr += len;
3303 }
3304 if (possessive) continue;
3305 for(;;)
3306 {
3307 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3308 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3309 if (eptr-- == pp) break; /* Stop if tried at original pos */
3310 BACKCHAR(eptr);
3311 }
3312 }
3313 else
3314 #endif
3315 /* Not UTF-8 mode */
3316 {
3317 for (i = min; i < max; i++)
3318 {
3319 if (eptr >= md->end_subject)
3320 {
3321 SCHECK_PARTIAL();
3322 break;
3323 }
3324 if (fc == md->lcc[*eptr]) break;
3325 eptr++;
3326 }
3327 if (possessive) continue;
3328 while (eptr >= pp)
3329 {
3330 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3331 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3332 eptr--;
3333 }
3334 }
3335
3336 MRRETURN(MATCH_NOMATCH);
3337 }
3338 /* Control never gets here */
3339 }
3340
3341 /* Caseful comparisons */
3342
3343 else
3344 {
3345 #ifdef SUPPORT_UTF8
3346 /* UTF-8 mode */
3347 if (utf8)
3348 {
3349 register unsigned int d;
3350 for (i = 1; i <= min; i++)
3351 {
3352 if (eptr >= md->end_subject)
3353 {
3354 SCHECK_PARTIAL();
3355 MRRETURN(MATCH_NOMATCH);
3356 }
3357 GETCHARINC(d, eptr);
3358 if (fc == d) MRRETURN(MATCH_NOMATCH);
3359 }
3360 }
3361 else
3362 #endif
3363 /* Not UTF-8 mode */
3364 {
3365 for (i = 1; i <= min; i++)
3366 {
3367 if (eptr >= md->end_subject)
3368 {
3369 SCHECK_PARTIAL();
3370 MRRETURN(MATCH_NOMATCH);
3371 }
3372 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3373 }
3374 }
3375
3376 if (min == max) continue;
3377
3378 if (minimize)
3379 {
3380 #ifdef SUPPORT_UTF8
3381 /* UTF-8 mode */
3382 if (utf8)
3383 {
3384 register unsigned int d;
3385 for (fi = min;; fi++)
3386 {
3387 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3388 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3389 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3390 if (eptr >= md->end_subject)
3391 {
3392 SCHECK_PARTIAL();
3393 MRRETURN(MATCH_NOMATCH);
3394 }
3395 GETCHARINC(d, eptr);
3396 if (fc == d) MRRETURN(MATCH_NOMATCH);
3397 }
3398 }
3399 else
3400 #endif
3401 /* Not UTF-8 mode */
3402 {
3403 for (fi = min;; fi++)
3404 {
3405 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3406 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3407 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3408 if (eptr >= md->end_subject)
3409 {
3410 SCHECK_PARTIAL();
3411 MRRETURN(MATCH_NOMATCH);
3412 }
3413 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3414 }
3415 }
3416 /* Control never gets here */
3417 }
3418
3419 /* Maximize case */
3420
3421 else
3422 {
3423 pp = eptr;
3424
3425 #ifdef SUPPORT_UTF8
3426 /* UTF-8 mode */
3427 if (utf8)
3428 {
3429 register unsigned int d;
3430 for (i = min; i < max; i++)
3431 {
3432 int len = 1;
3433 if (eptr >= md->end_subject)
3434 {
3435 SCHECK_PARTIAL();
3436 break;
3437 }
3438 GETCHARLEN(d, eptr, len);
3439 if (fc == d) break;
3440 eptr += len;
3441 }
3442 if (possessive) continue;
3443 for(;;)
3444 {
3445 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3446 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3447 if (eptr-- == pp) break; /* Stop if tried at original pos */
3448 BACKCHAR(eptr);
3449 }
3450 }
3451 else
3452 #endif
3453 /* Not UTF-8 mode */
3454 {
3455 for (i = min; i < max; i++)
3456 {
3457 if (eptr >= md->end_subject)
3458 {
3459 SCHECK_PARTIAL();
3460 break;
3461 }
3462 if (fc == *eptr) break;
3463 eptr++;
3464 }
3465 if (possessive) continue;
3466 while (eptr >= pp)
3467 {
3468 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3469 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3470 eptr--;
3471 }
3472 }
3473
3474 MRRETURN(MATCH_NOMATCH);
3475 }
3476 }
3477 /* Control never gets here */
3478
3479 /* Match a single character type repeatedly; several different opcodes
3480 share code. This is very similar to the code for single characters, but we
3481 repeat it in the interests of efficiency. */
3482
3483 case OP_TYPEEXACT:
3484 min = max = GET2(ecode, 1);
3485 minimize = TRUE;
3486 ecode += 3;
3487 goto REPEATTYPE;
3488
3489 case OP_TYPEUPTO:
3490 case OP_TYPEMINUPTO:
3491 min = 0;
3492 max = GET2(ecode, 1);
3493 minimize = *ecode == OP_TYPEMINUPTO;
3494 ecode += 3;
3495 goto REPEATTYPE;
3496
3497 case OP_TYPEPOSSTAR:
3498 possessive = TRUE;
3499 min = 0;
3500 max = INT_MAX;
3501 ecode++;
3502 goto REPEATTYPE;
3503
3504 case OP_TYPEPOSPLUS:
3505 possessive = TRUE;
3506 min = 1;
3507 max = INT_MAX;
3508 ecode++;
3509 goto REPEATTYPE;
3510
3511 case OP_TYPEPOSQUERY:
3512 possessive = TRUE;
3513 min = 0;
3514 max = 1;
3515 ecode++;
3516 goto REPEATTYPE;
3517
3518 case OP_TYPEPOSUPTO:
3519 possessive = TRUE;
3520 min = 0;
3521 max = GET2(ecode, 1);
3522 ecode += 3;
3523 goto REPEATTYPE;
3524
3525 case OP_TYPESTAR:
3526 case OP_TYPEMINSTAR:
3527 case OP_TYPEPLUS:
3528 case OP_TYPEMINPLUS:
3529 case OP_TYPEQUERY:
3530 case OP_TYPEMINQUERY:
3531 c = *ecode++ - OP_TYPESTAR;
3532 minimize = (c & 1) != 0;
3533 min = rep_min[c]; /* Pick up values from tables; */
3534 max = rep_max[c]; /* zero for max => infinity */
3535 if (max == 0) max = INT_MAX;
3536
3537 /* Common code for all repeated single character type matches. Note that
3538 in UTF-8 mode, '.' matches a character of any length, but for the other
3539 character types, the valid characters are all one-byte long. */
3540
3541 REPEATTYPE:
3542 ctype = *ecode++; /* Code for the character type */
3543
3544 #ifdef SUPPORT_UCP
3545 if (ctype == OP_PROP || ctype == OP_NOTPROP)
3546 {
3547 prop_fail_result = ctype == OP_NOTPROP;
3548 prop_type = *ecode++;
3549 prop_value = *ecode++;
3550 }
3551 else prop_type = -1;
3552 #endif
3553
3554 /* First, ensure the minimum number of matches are present. Use inline
3555 code for maximizing the speed, and do the type test once at the start
3556 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3557 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3558 and single-bytes. */
3559
3560 if (min > 0)
3561 {
3562 #ifdef SUPPORT_UCP
3563 if (prop_type >= 0)
3564 {
3565 switch(prop_type)
3566 {
3567 case PT_ANY:
3568 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3569 for (i = 1; i <= min; i++)
3570 {
3571 if (eptr >= md->end_subject)
3572 {
3573 SCHECK_PARTIAL();
3574 MRRETURN(MATCH_NOMATCH);
3575 }
3576 GETCHARINCTEST(c, eptr);
3577 }
3578 break;
3579
3580 case PT_LAMP:
3581 for (i = 1; i <= min; i++)
3582 {
3583 if (eptr >= md->end_subject)
3584 {
3585 SCHECK_PARTIAL();
3586 MRRETURN(MATCH_NOMATCH);
3587 }
3588 GETCHARINCTEST(c, eptr);
3589 prop_chartype = UCD_CHARTYPE(c);
3590 if ((prop_chartype == ucp_Lu ||
3591 prop_chartype == ucp_Ll ||
3592 prop_chartype == ucp_Lt) == prop_fail_result)
3593 MRRETURN(MATCH_NOMATCH);
3594 }
3595 break;
3596
3597 case PT_GC:
3598 for (i = 1; i <= min; i++)
3599 {
3600 if (eptr >= md->end_subject)
3601 {
3602 SCHECK_PARTIAL();
3603 MRRETURN(MATCH_NOMATCH);
3604 }
3605 GETCHARINCTEST(c, eptr);
3606 prop_category = UCD_CATEGORY(c);
3607 if ((prop_category == prop_value) == prop_fail_result)
3608 MRRETURN(MATCH_NOMATCH);
3609 }
3610 break;
3611
3612 case PT_PC:
3613 for (i = 1; i <= min; i++)
3614 {
3615 if (eptr >= md->end_subject)
3616 {
3617 SCHECK_PARTIAL();
3618 MRRETURN(MATCH_NOMATCH);
3619 }
3620 GETCHARINCTEST(c, eptr);
3621 prop_chartype = UCD_CHARTYPE(c);
3622 if ((prop_chartype == prop_value) == prop_fail_result)
3623 MRRETURN(MATCH_NOMATCH);
3624 }
3625 break;
3626
3627 case PT_SC:
3628 for (i = 1; i <= min; i++)
3629 {
3630 if (eptr >= md->end_subject)
3631 {
3632 SCHECK_PARTIAL();
3633 MRRETURN(MATCH_NOMATCH);
3634 }
3635 GETCHARINCTEST(c, eptr);
3636 prop_script = UCD_SCRIPT(c);
3637 if ((prop_script == prop_value) == prop_fail_result)
3638 MRRETURN(MATCH_NOMATCH);
3639 }
3640 break;
3641
3642 case PT_ALNUM:
3643 for (i = 1; i <= min; i++)
3644 {
3645 if (eptr >= md->end_subject)
3646 {
3647 SCHECK_PARTIAL();
3648 MRRETURN(MATCH_NOMATCH);
3649 }
3650 GETCHARINCTEST(c, eptr);
3651 prop_category = UCD_CATEGORY(c);
3652 if ((prop_category == ucp_L || prop_category == ucp_N)
3653 == prop_fail_result)
3654 MRRETURN(MATCH_NOMATCH);
3655 }
3656 break;
3657
3658 case PT_SPACE: /* Perl space */
3659 for (i = 1; i <= min; i++)
3660 {
3661 if (eptr >= md->end_subject)
3662 {
3663 SCHECK_PARTIAL();
3664 MRRETURN(MATCH_NOMATCH);
3665 }
3666 GETCHARINCTEST(c, eptr);
3667 prop_category = UCD_CATEGORY(c);
3668 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3669 c == CHAR_FF || c == CHAR_CR)
3670 == prop_fail_result)
3671 MRRETURN(MATCH_NOMATCH);
3672 }
3673 break;
3674
3675 case PT_PXSPACE: /* POSIX space */
3676 for (i = 1; i <= min; i++)
3677 {
3678 if (eptr >= md->end_subject)
3679 {
3680 SCHECK_PARTIAL();
3681 MRRETURN(MATCH_NOMATCH);
3682 }
3683 GETCHARINCTEST(c, eptr);
3684 prop_category = UCD_CATEGORY(c);
3685 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3686 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3687 == prop_fail_result)
3688 MRRETURN(MATCH_NOMATCH);
3689 }
3690 break;
3691
3692 case PT_WORD:
3693 for (i = 1; i <= min; i++)
3694 {
3695 if (eptr >= md->end_subject)
3696 {
3697 SCHECK_PARTIAL();
3698 MRRETURN(MATCH_NOMATCH);
3699 }
3700 GETCHARINCTEST(c, eptr);
3701 prop_category = UCD_CATEGORY(c);
3702 if ((prop_category == ucp_L || prop_category == ucp_N ||
3703 c == CHAR_UNDERSCORE)
3704 == prop_fail_result)
3705 MRRETURN(MATCH_NOMATCH);
3706 }
3707 break;
3708
3709 /* This should not occur */
3710
3711 default:
3712 RRETURN(PCRE_ERROR_INTERNAL);
3713 }
3714 }
3715
3716 /* Match extended Unicode sequences. We will get here only if the
3717 support is in the binary; otherwise a compile-time error occurs. */
3718
3719 else if (ctype == OP_EXTUNI)
3720 {
3721 for (i = 1; i <= min; i++)
3722 {
3723 if (eptr >= md->end_subject)
3724 {
3725 SCHECK_PARTIAL();
3726 MRRETURN(MATCH_NOMATCH);
3727 }
3728 GETCHARINCTEST(c, eptr);
3729 prop_category = UCD_CATEGORY(c);
3730 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3731 while (eptr < md->end_subject)
3732 {
3733 int len = 1;
3734 if (!utf8) c = *eptr;
3735 else { GETCHARLEN(c, eptr, len); }
3736 prop_category = UCD_CATEGORY(c);
3737 if (prop_category != ucp_M) break;
3738 eptr += len;
3739 }
3740 }
3741 }
3742
3743 else
3744 #endif /* SUPPORT_UCP */
3745
3746 /* Handle all other cases when the coding is UTF-8 */
3747
3748 #ifdef SUPPORT_UTF8
3749 if (utf8) switch(ctype)
3750 {
3751 case OP_ANY:
3752 for (i = 1; i <= min; i++)
3753 {
3754 if (eptr >= md->end_subject)
3755 {
3756 SCHECK_PARTIAL();
3757 MRRETURN(MATCH_NOMATCH);
3758 }
3759 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3760 eptr++;
3761 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3762 }
3763 break;
3764
3765 case OP_ALLANY:
3766 for (i = 1; i <= min; i++)
3767 {
3768 if (eptr >= md->end_subject)
3769 {
3770 SCHECK_PARTIAL();
3771 MRRETURN(MATCH_NOMATCH);
3772 }
3773 eptr++;
3774 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3775 }
3776 break;
3777
3778 case OP_ANYBYTE:
3779 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3780 eptr += min;
3781 break;
3782
3783 case OP_ANYNL:
3784 for (i = 1; i <= min; i++)
3785 {
3786 if (eptr >= md->end_subject)
3787 {
3788 SCHECK_PARTIAL();
3789 MRRETURN(MATCH_NOMATCH);
3790 }
3791 GETCHARINC(c, eptr);
3792 switch(c)
3793 {
3794 default: MRRETURN(MATCH_NOMATCH);
3795
3796 case 0x000d:
3797 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3798 break;
3799
3800 case 0x000a:
3801 break;
3802
3803 case 0x000b:
3804 case 0x000c:
3805 case 0x0085:
3806 case 0x2028:
3807 case 0x2029:
3808 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3809 break;
3810 }
3811 }
3812 break;
3813
3814 case OP_NOT_HSPACE:
3815 for (i = 1; i <= min; i++)
3816 {
3817 if (eptr >= md->end_subject)
3818 {
3819 SCHECK_PARTIAL();
3820 MRRETURN(MATCH_NOMATCH);
3821 }
3822 GETCHARINC(c, eptr);
3823 switch(c)
3824 {
3825 default: break;
3826 case 0x09: /* HT */
3827 case 0x20: /* SPACE */
3828 case 0xa0: /* NBSP */
3829 case 0x1680: /* OGHAM SPACE MARK */
3830 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3831 case 0x2000: /* EN QUAD */
3832 case 0x2001: /* EM QUAD */
3833 case 0x2002: /* EN SPACE */
3834 case 0x2003: /* EM SPACE */
3835 case 0x2004: /* THREE-PER-EM SPACE */
3836 case 0x2005: /* FOUR-PER-EM SPACE */
3837 case 0x2006: /* SIX-PER-EM SPACE */
3838 case 0x2007: /* FIGURE SPACE */
3839 case 0x2008: /* PUNCTUATION SPACE */
3840 case 0x2009: /* THIN SPACE */
3841 case 0x200A: /* HAIR SPACE */
3842 case 0x202f: /* NARROW NO-BREAK SPACE */
3843 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3844 case 0x3000: /* IDEOGRAPHIC SPACE */
3845 MRRETURN(MATCH_NOMATCH);
3846 }
3847 }
3848 break;
3849
3850 case OP_HSPACE:
3851 for (i = 1; i <= min; i++)
3852 {
3853 if (eptr >= md->end_subject)
3854 {
3855 SCHECK_PARTIAL();
3856 MRRETURN(MATCH_NOMATCH);
3857 }
3858 GETCHARINC(c, eptr);
3859 switch(c)
3860 {
3861 default: MRRETURN(MATCH_NOMATCH);
3862 case 0x09: /* HT */
3863 case 0x20: /* SPACE */
3864 case 0xa0: /* NBSP */
3865 case 0x1680: /* OGHAM SPACE MARK */
3866 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3867 case 0x2000: /* EN QUAD */
3868 case 0x2001: /* EM QUAD */
3869 case 0x2002: /* EN SPACE */
3870 case 0x2003: /* EM SPACE */
3871 case 0x2004: /* THREE-PER-EM SPACE */
3872 case 0x2005: /* FOUR-PER-EM SPACE */
3873 case 0x2006: /* SIX-PER-EM SPACE */
3874 case 0x2007: /* FIGURE SPACE */
3875 case 0x2008: /* PUNCTUATION SPACE */
3876 case 0x2009: /* THIN SPACE */
3877 case 0x200A: /* HAIR SPACE */
3878 case 0x202f: /* NARROW NO-BREAK SPACE */
3879 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3880 case 0x3000: /* IDEOGRAPHIC SPACE */
3881 break;
3882 }
3883 }
3884 break;
3885
3886 case OP_NOT_VSPACE:
3887 for (i = 1; i <= min; i++)
3888 {
3889 if (eptr >= md->end_subject)
3890 {
3891 SCHECK_PARTIAL();
3892 MRRETURN(MATCH_NOMATCH);
3893 }
3894 GETCHARINC(c, eptr);
3895 switch(c)
3896 {
3897 default: break;
3898 case 0x0a: /* LF */
3899 case 0x0b: /* VT */
3900 case 0x0c: /* FF */
3901 case 0x0d: /* CR */
3902 case 0x85: /* NEL */
3903 case 0x2028: /* LINE SEPARATOR */
3904 case 0x2029: /* PARAGRAPH SEPARATOR */
3905 MRRETURN(MATCH_NOMATCH);
3906 }
3907 }
3908 break;
3909
3910 case OP_VSPACE:
3911 for (i = 1; i <= min; i++)
3912 {
3913 if (eptr >= md->end_subject)
3914 {
3915 SCHECK_PARTIAL();
3916 MRRETURN(MATCH_NOMATCH);
3917 }
3918 GETCHARINC(c, eptr);
3919 switch(c)
3920 {
3921 default: MRRETURN(MATCH_NOMATCH);
3922 case 0x0a: /* LF */
3923 case 0x0b: /* VT */
3924 case 0x0c: /* FF */
3925 case 0x0d: /* CR */
3926 case 0x85: /* NEL */
3927 case 0x2028: /* LINE SEPARATOR */
3928 case 0x2029: /* PARAGRAPH SEPARATOR */
3929 break;
3930 }
3931 }
3932 break;
3933
3934 case OP_NOT_DIGIT:
3935 for (i = 1; i <= min; i++)
3936 {
3937 if (eptr >= md->end_subject)
3938 {
3939 SCHECK_PARTIAL();
3940 MRRETURN(MATCH_NOMATCH);
3941 }
3942 GETCHARINC(c, eptr);
3943 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3944 MRRETURN(MATCH_NOMATCH);
3945 }
3946 break;
3947
3948 case OP_DIGIT:
3949 for (i = 1; i <= min; i++)
3950 {
3951 if (eptr >= md->end_subject)
3952 {
3953 SCHECK_PARTIAL();
3954 MRRETURN(MATCH_NOMATCH);
3955 }
3956 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3957 MRRETURN(MATCH_NOMATCH);
3958 /* No need to skip more bytes - we know it's a 1-byte character */
3959 }
3960 break;
3961
3962 case OP_NOT_WHITESPACE:
3963 for (i = 1; i <= min; i++)
3964 {
3965 if (eptr >= md->end_subject)
3966 {
3967 SCHECK_PARTIAL();
3968 MRRETURN(MATCH_NOMATCH);
3969 }
3970 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3971 MRRETURN(MATCH_NOMATCH);
3972 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3973 }
3974 break;
3975
3976 case OP_WHITESPACE:
3977 for (i = 1; i <= min; i++)
3978 {
3979 if (eptr >= md->end_subject)
3980 {
3981 SCHECK_PARTIAL();
3982 MRRETURN(MATCH_NOMATCH);
3983 }
3984 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3985 MRRETURN(MATCH_NOMATCH);
3986 /* No need to skip more bytes - we know it's a 1-byte character */
3987 }
3988 break;
3989
3990 case OP_NOT_WORDCHAR:
3991 for (i = 1; i <= min; i++)
3992 {
3993 if (eptr >= md->end_subject)
3994 {
3995 SCHECK_PARTIAL();
3996 MRRETURN(MATCH_NOMATCH);
3997 }
3998 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3999 MRRETURN(MATCH_NOMATCH);
4000 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
4001 }
4002 break;
4003
4004 case OP_WORDCHAR:
4005 for (i = 1; i <= min; i++)
4006 {
4007 if (eptr >= md->end_subject)
4008 {
4009 SCHECK_PARTIAL();
4010 MRRETURN(MATCH_NOMATCH);
4011 }
4012 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
4013 MRRETURN(MATCH_NOMATCH);
4014 /* No need to skip more bytes - we know it's a 1-byte character */
4015 }
4016 break;
4017
4018 default:
4019 RRETURN(PCRE_ERROR_INTERNAL);
4020 } /* End switch(ctype) */
4021
4022 else
4023 #endif /* SUPPORT_UTF8 */
4024
4025 /* Code for the non-UTF-8 case for minimum matching of operators other
4026 than OP_PROP and OP_NOTPROP. */
4027
4028 switch(ctype)
4029 {
4030 case OP_ANY:
4031 for (i = 1; i <= min; i++)
4032 {
4033 if (eptr >= md->end_subject)
4034 {
4035 SCHECK_PARTIAL();
4036 MRRETURN(MATCH_NOMATCH);
4037 }
4038 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
4039 eptr++;
4040 }
4041 break;
4042
4043 case OP_ALLANY:
4044 if (eptr > md->end_subject - min)
4045 {
4046 SCHECK_PARTIAL();
4047 MRRETURN(MATCH_NOMATCH);
4048 }
4049 eptr += min;
4050 break;
4051
4052 case OP_ANYBYTE:
4053 if (eptr > md->end_subject - min)
4054 {
4055 SCHECK_PARTIAL();
4056 MRRETURN(MATCH_NOMATCH);
4057 }
4058 eptr += min;
4059 break;
4060
4061 case OP_ANYNL:
4062 for (i = 1; i <= min; i++)
4063 {
4064 if (eptr >= md->end_subject)
4065 {
4066 SCHECK_PARTIAL();
4067 MRRETURN(MATCH_NOMATCH);
4068 }
4069 switch(*eptr++)
4070 {
4071 default: MRRETURN(MATCH_NOMATCH);
4072
4073 case 0x000d:
4074 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4075 break;
4076
4077 case 0x000a:
4078 break;
4079
4080 case 0x000b:
4081 case 0x000c:
4082 case 0x0085:
4083 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4084 break;
4085 }
4086 }
4087 break;
4088
4089 case OP_NOT_HSPACE:
4090 for (i = 1; i <= min; i++)
4091 {
4092 if (eptr >= md->end_subject)
4093 {
4094 SCHECK_PARTIAL();
4095 MRRETURN(MATCH_NOMATCH);
4096 }
4097 switch(*eptr++)
4098 {
4099 default: break;
4100 case 0x09: /* HT */
4101 case 0x20: /* SPACE */
4102 case 0xa0: /* NBSP */
4103 MRRETURN(MATCH_NOMATCH);
4104 }
4105 }
4106 break;
4107
4108 case OP_HSPACE:
4109 for (i = 1; i <= min; i++)
4110 {
4111 if (eptr >= md->end_subject)
4112 {
4113 SCHECK_PARTIAL();
4114 MRRETURN(MATCH_NOMATCH);
4115 }
4116 switch(*eptr++)
4117 {
4118 default: MRRETURN(MATCH_NOMATCH);
4119 case 0x09: /* HT */
4120 case 0x20: /* SPACE */
4121 case 0xa0: /* NBSP */
4122 break;
4123 }
4124 }
4125 break;
4126
4127 case OP_NOT_VSPACE:
4128 for (i = 1; i <= min; i++)
4129 {
4130 if (eptr >= md->end_subject)
4131 {
4132 SCHECK_PARTIAL();
4133 MRRETURN(MATCH_NOMATCH);
4134 }
4135 switch(*eptr++)
4136 {
4137 default: break;
4138 case 0x0a: /* LF */
4139 case 0x0b: /* VT */
4140 case 0x0c: /* FF */
4141 case 0x0d: /* CR */
4142 case 0x85: /* NEL */
4143 MRRETURN(MATCH_NOMATCH);
4144 }
4145 }
4146 break;
4147
4148 case OP_VSPACE:
4149 for (i = 1; i <= min; i++)
4150 {
4151 if (eptr >= md->end_subject)
4152 {
4153 SCHECK_PARTIAL();
4154 MRRETURN(MATCH_NOMATCH);
4155 }
4156 switch(*eptr++)
4157 {
4158 default: MRRETURN(MATCH_NOMATCH);
4159 case 0x0a: /* LF */
4160 case 0x0b: /* VT */
4161 case 0x0c: /* FF */
4162 case 0x0d: /* CR */
4163 case 0x85: /* NEL */
4164 break;
4165 }
4166 }
4167 break;
4168
4169 case OP_NOT_DIGIT:
4170 for (i = 1; i <= min; i++)
4171 {
4172 if (eptr >= md->end_subject)
4173 {
4174 SCHECK_PARTIAL();
4175 MRRETURN(MATCH_NOMATCH);
4176 }
4177 if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4178 }
4179 break;
4180
4181 case OP_DIGIT:
4182 for (i = 1; i <= min; i++)
4183 {
4184 if (eptr >= md->end_subject)
4185 {
4186 SCHECK_PARTIAL();
4187 MRRETURN(MATCH_NOMATCH);
4188 }
4189 if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4190 }
4191 break;
4192
4193 case OP_NOT_WHITESPACE:
4194 for (i = 1; i <= min; i++)
4195 {
4196 if (eptr >= md->end_subject)
4197 {
4198 SCHECK_PARTIAL();
4199 MRRETURN(MATCH_NOMATCH);
4200 }
4201 if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4202 }
4203 break;
4204
4205 case OP_WHITESPACE:
4206 for (i = 1; i <= min; i++)
4207 {
4208 if (eptr >= md->end_subject)
4209 {
4210 SCHECK_PARTIAL();
4211 MRRETURN(MATCH_NOMATCH);
4212 }
4213 if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4214 }
4215 break;
4216
4217 case OP_NOT_WORDCHAR:
4218 for (i = 1; i <= min; i++)
4219 {
4220 if (eptr >= md->end_subject)
4221 {
4222 SCHECK_PARTIAL();
4223 MRRETURN(MATCH_NOMATCH);
4224 }
4225 if ((md->ctypes[*eptr++] & ctype_word) != 0)
4226 MRRETURN(MATCH_NOMATCH);
4227 }
4228 break;
4229
4230 case OP_WORDCHAR:
4231 for (i = 1; i <= min; i++)
4232 {
4233 if (eptr >= md->end_subject)
4234 {
4235 SCHECK_PARTIAL();
4236 MRRETURN(MATCH_NOMATCH);
4237 }
4238 if ((md->ctypes[*eptr++] & ctype_word) == 0)
4239 MRRETURN(MATCH_NOMATCH);
4240 }
4241 break;
4242
4243 default:
4244 RRETURN(PCRE_ERROR_INTERNAL);
4245 }
4246 }
4247
4248 /* If min = max, continue at the same level without recursing */
4249
4250 if (min == max) continue;
4251
4252 /* If minimizing, we have to test the rest of the pattern before each
4253 subsequent match. Again, separate the UTF-8 case for speed, and also
4254 separate the UCP cases. */
4255
4256 if (minimize)
4257 {
4258 #ifdef SUPPORT_UCP
4259 if (prop_type >= 0)
4260 {
4261 switch(prop_type)
4262 {
4263 case PT_ANY:
4264 for (fi = min;; fi++)
4265 {
4266 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
4267 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4268 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4269 if (eptr >= md->end_subject)
4270 {
4271 SCHECK_PARTIAL();
4272 MRRETURN(MATCH_NOMATCH);
4273 }
4274 GETCHARINCTEST(c, eptr);
4275 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4276 }
4277 /* Control never gets here */
4278
4279 case PT_LAMP:
4280 for (fi = min;; fi++)
4281 {
4282 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
4283 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4284 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4285 if (eptr >= md->end_subject)
4286 {
4287 SCHECK_PARTIAL();
4288 MRRETURN(MATCH_NOMATCH);
4289 }
4290 GETCHARINCTEST(c, eptr);
4291 prop_chartype = UCD_CHARTYPE(c);
4292 if ((prop_chartype == ucp_Lu ||
4293 prop_chartype == ucp_Ll ||
4294 prop_chartype == ucp_Lt) == prop_fail_result)
4295 MRRETURN(MATCH_NOMATCH);
4296 }
4297 /* Control never gets here */
4298
4299 case PT_GC:
4300 for (fi = min;; fi++)
4301 {
4302 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
4303 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4304 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4305 if (eptr >= md->end_subject)
4306 {
4307 SCHECK_PARTIAL();
4308 MRRETURN(MATCH_NOMATCH);
4309 }
4310 GETCHARINCTEST(c, eptr);
4311 prop_category = UCD_CATEGORY(c);
4312 if ((prop_category == prop_value) == prop_fail_result)
4313 MRRETURN(MATCH_NOMATCH);
4314 }
4315 /* Control never gets here */
4316
4317 case PT_PC:
4318 for (fi = min;; fi++)
4319 {
4320 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4321 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4322 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4323 if (eptr >= md->end_subject)
4324 {
4325 SCHECK_PARTIAL();
4326 MRRETURN(MATCH_NOMATCH);
4327 }
4328 GETCHARINCTEST(c, eptr);
4329 prop_chartype = UCD_CHARTYPE(c);
4330 if ((prop_chartype == prop_value) == prop_fail_result)
4331 MRRETURN(MATCH_NOMATCH);
4332 }
4333 /* Control never gets here */
4334
4335 case PT_SC:
4336 for (fi = min;; fi++)
4337 {
4338 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
4339 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4340 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4341 if (eptr >= md->end_subject)
4342 {
4343 SCHECK_PARTIAL();
4344 MRRETURN(MATCH_NOMATCH);
4345 }
4346 GETCHARINCTEST(c, eptr);
4347 prop_script = UCD_SCRIPT(c);
4348 if ((prop_script == prop_value) == prop_fail_result)
4349 MRRETURN(MATCH_NOMATCH);
4350 }
4351 /* Control never gets here */
4352
4353 case PT_ALNUM:
4354 for (fi = min;; fi++)
4355 {
4356 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
4357 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4358 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4359 if (eptr >= md->end_subject)
4360 {
4361 SCHECK_PARTIAL();
4362 MRRETURN(MATCH_NOMATCH);
4363 }
4364 GETCHARINCTEST(c, eptr);
4365 prop_category = UCD_CATEGORY(c);
4366 if ((prop_category == ucp_L || prop_category == ucp_N)
4367 == prop_fail_result)
4368 MRRETURN(MATCH_NOMATCH);
4369 }
4370 /* Control never gets here */
4371
4372 case PT_SPACE: /* Perl space */
4373 for (fi = min;; fi++)
4374 {
4375 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
4376 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4377 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4378 if (eptr >= md->end_subject)
4379 {
4380 SCHECK_PARTIAL();
4381 MRRETURN(MATCH_NOMATCH);
4382 }
4383 GETCHARINCTEST(c, eptr);
4384 prop_category = UCD_CATEGORY(c);
4385 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4386 c == CHAR_FF || c == CHAR_CR)
4387 == prop_fail_result)
4388 MRRETURN(MATCH_NOMATCH);
4389 }
4390 /* Control never gets here */
4391
4392 case PT_PXSPACE: /* POSIX space */
4393 for (fi = min;; fi++)
4394 {
4395 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
4396 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4397 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4398 if (eptr >= md->end_subject)
4399 {
4400 SCHECK_PARTIAL();
4401 MRRETURN(MATCH_NOMATCH);
4402 }
4403 GETCHARINCTEST(c, eptr);
4404 prop_category = UCD_CATEGORY(c);
4405 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4406 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4407 == prop_fail_result)
4408 MRRETURN(MATCH_NOMATCH);
4409 }
4410 /* Control never gets here */
4411
4412 case PT_WORD:
4413 for (fi = min;; fi++)
4414 {
4415 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
4416 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4417 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4418 if (eptr >= md->end_subject)
4419 {
4420 SCHECK_PARTIAL();
4421 MRRETURN(MATCH_NOMATCH);
4422 }
4423 GETCHARINCTEST(c, eptr);
4424 prop_category = UCD_CATEGORY(c);
4425 if ((prop_category == ucp_L ||
4426 prop_category == ucp_N ||
4427 c == CHAR_UNDERSCORE)
4428 == prop_fail_result)
4429 MRRETURN(MATCH_NOMATCH);
4430 }
4431 /* Control never gets here */
4432
4433 /* This should never occur */
4434
4435 default:
4436 RRETURN(PCRE_ERROR_INTERNAL);
4437 }
4438 }
4439
4440 /* Match extended Unicode sequences. We will get here only if the
4441 support is in the binary; otherwise a compile-time error occurs. */
4442
4443 else if (ctype == OP_EXTUNI)
4444 {
4445 for (fi = min;; fi++)
4446 {
4447 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
4448 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4449 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4450 if (eptr >= md->end_subject)
4451 {
4452 SCHECK_PARTIAL();
4453 MRRETURN(MATCH_NOMATCH);
4454 }
4455 GETCHARINCTEST(c, eptr);
4456 prop_category = UCD_CATEGORY(c);
4457 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
4458 while (eptr < md->end_subject)
4459 {
4460 int len = 1;
4461 if (!utf8) c = *eptr;
4462 else { GETCHARLEN(c, eptr, len); }
4463 prop_category = UCD_CATEGORY(c);
4464 if (prop_category != ucp_M) break;
4465 eptr += len;
4466 }
4467 }
4468 }
4469
4470 else
4471 #endif /* SUPPORT_UCP */
4472
4473 #ifdef SUPPORT_UTF8
4474 /* UTF-8 mode */
4475 if (utf8)
4476 {
4477 for (fi = min;; fi++)
4478 {
4479 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
4480 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4481 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4482 if (eptr >= md->end_subject)
4483 {
4484 SCHECK_PARTIAL();
4485 MRRETURN(MATCH_NOMATCH);
4486 }
4487 if (ctype == OP_ANY && IS_NEWLINE(eptr))
4488 MRRETURN(MATCH_NOMATCH);
4489 GETCHARINC(c, eptr);
4490 switch(ctype)
4491 {
4492 case OP_ANY: /* This is the non-NL case */
4493 case OP_ALLANY:
4494 case OP_ANYBYTE:
4495 break;
4496
4497 case OP_ANYNL:
4498 switch(c)
4499 {
4500 default: MRRETURN(MATCH_NOMATCH);
4501 case 0x000d:
4502 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4503 break;
4504 case 0x000a:
4505 break;
4506
4507 case 0x000b:
4508 case 0x000c:
4509 case 0x0085:
4510 case 0x2028:
4511 case 0x2029:
4512 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4513 break;
4514 }
4515 break;
4516
4517 case OP_NOT_HSPACE:
4518 switch(c)
4519 {
4520 default: break;
4521 case 0x09: /* HT */
4522 case 0x20: /* SPACE */
4523 case 0xa0: /* NBSP */
4524 case 0x1680: /* OGHAM SPACE MARK */
4525 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4526 case 0x2000: /* EN QUAD */
4527 case 0x2001: /* EM QUAD */
4528 case 0x2002: /* EN SPACE */
4529 case 0x2003: /* EM SPACE */
4530 case 0x2004: /* THREE-PER-EM SPACE */
4531 case 0x2005: /* FOUR-PER-EM SPACE */
4532 case 0x2006: /* SIX-PER-EM SPACE */
4533 case 0x2007: /* FIGURE SPACE */
4534 case 0x2008: /* PUNCTUATION SPACE */
4535 case 0x2009: /* THIN SPACE */
4536 case 0x200A: /* HAIR SPACE */
4537 case 0x202f: /* NARROW NO-BREAK SPACE */
4538 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4539 case 0x3000: /* IDEOGRAPHIC SPACE */
4540 MRRETURN(MATCH_NOMATCH);
4541 }
4542 break;
4543
4544 case OP_HSPACE:
4545 switch(c)
4546 {
4547 default: MRRETURN(MATCH_NOMATCH);
4548 case 0x09: /* HT */
4549 case 0x20: /* SPACE */
4550 case 0xa0: /* NBSP */
4551 case 0x1680: /* OGHAM SPACE MARK */
4552 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4553 case 0x2000: /* EN QUAD */
4554 case 0x2001: /* EM QUAD */
4555 case 0x2002: /* EN SPACE */
4556 case 0x2003: /* EM SPACE */
4557 case 0x2004: /* THREE-PER-EM SPACE */
4558 case 0x2005: /* FOUR-PER-EM SPACE */
4559 case 0x2006: /* SIX-PER-EM SPACE */
4560 case 0x2007: /* FIGURE SPACE */
4561 case 0x2008: /* PUNCTUATION SPACE */
4562 case 0x2009: /* THIN SPACE */
4563 case 0x200A: /* HAIR SPACE */
4564 case 0x202f: /* NARROW NO-BREAK SPACE */
4565 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4566 case 0x3000: /* IDEOGRAPHIC SPACE */
4567 break;
4568 }
4569 break;
4570
4571 case OP_NOT_VSPACE:
4572 switch(c)
4573 {
4574 default: break;
4575 case 0x0a: /* LF */
4576 case 0x0b: /* VT */
4577 case 0x0c: /* FF */
4578 case 0x0d: /* CR */
4579 case 0x85: /* NEL */
4580 case 0x2028: /* LINE SEPARATOR */
4581 case 0x2029: /* PARAGRAPH SEPARATOR */
4582 MRRETURN(MATCH_NOMATCH);
4583 }
4584 break;
4585
4586 case OP_VSPACE:
4587 switch(c)
4588 {
4589 default: MRRETURN(MATCH_NOMATCH);
4590 case 0x0a: /* LF */
4591 case 0x0b: /* VT */
4592 case 0x0c: /* FF */
4593 case 0x0d: /* CR */
4594 case 0x85: /* NEL */
4595 case 0x2028: /* LINE SEPARATOR */
4596 case 0x2029: /* PARAGRAPH SEPARATOR */
4597 break;
4598 }
4599 break;
4600
4601 case OP_NOT_DIGIT:
4602 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
4603 MRRETURN(MATCH_NOMATCH);
4604 break;
4605
4606 case OP_DIGIT:
4607 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
4608 MRRETURN(MATCH_NOMATCH);
4609 break;
4610
4611 case OP_NOT_WHITESPACE:
4612 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
4613 MRRETURN(MATCH_NOMATCH);
4614 break;
4615
4616 case OP_WHITESPACE:
4617 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
4618 MRRETURN(MATCH_NOMATCH);
4619 break;
4620
4621 case OP_NOT_WORDCHAR:
4622 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
4623 MRRETURN(MATCH_NOMATCH);
4624 break;
4625
4626 case OP_WORDCHAR:
4627 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
4628 MRRETURN(MATCH_NOMATCH);
4629 break;
4630
4631 default:
4632 RRETURN(PCRE_ERROR_INTERNAL);
4633 }
4634 }
4635 }
4636 else
4637 #endif
4638 /* Not UTF-8 mode */
4639 {
4640 for (fi = min;; fi++)
4641 {
4642 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4643 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4644 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4645 if (eptr >= md->end_subject)
4646 {
4647 SCHECK_PARTIAL();
4648 MRRETURN(MATCH_NOMATCH);
4649 }
4650 if (ctype == OP_ANY && IS_NEWLINE(eptr))
4651 MRRETURN(MATCH_NOMATCH);
4652 c = *eptr++;
4653 switch(ctype)
4654 {
4655 case OP_ANY: /* This is the non-NL case */
4656 case OP_ALLANY:
4657 case OP_ANYBYTE:
4658 break;
4659
4660 case OP_ANYNL:
4661 switch(c)
4662 {
4663 default: MRRETURN(MATCH_NOMATCH);
4664 case 0x000d:
4665 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4666 break;
4667
4668 case 0x000a:
4669 break;
4670
4671 case 0x000b:
4672 case 0x000c:
4673 case 0x0085:
4674 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4675 break;
4676 }
4677 break;
4678
4679 case OP_NOT_HSPACE:
4680 switch(c)
4681 {
4682 default: break;
4683 case 0x09: /* HT */
4684 case 0x20: /* SPACE */
4685 case 0xa0: /* NBSP */
4686 MRRETURN(MATCH_NOMATCH);
4687 }
4688 break;
4689
4690 case OP_HSPACE:
4691 switch(c)
4692 {
4693 default: MRRETURN(MATCH_NOMATCH);
4694 case 0x09: /* HT */
4695 case 0x20: /* SPACE */
4696 case 0xa0: /* NBSP */
4697 break;
4698 }
4699 break;
4700
4701 case OP_NOT_VSPACE:
4702 switch(c)
4703 {
4704 default: break;
4705 case 0x0a: /* LF */
4706 case 0x0b: /* VT */
4707 case 0x0c: /* FF */
4708 case 0x0d: /* CR */
4709 case 0x85: /* NEL */
4710 MRRETURN(MATCH_NOMATCH);
4711 }
4712 break;
4713
4714 case OP_VSPACE:
4715 switch(c)
4716 {
4717 default: MRRETURN(MATCH_NOMATCH);
4718 case 0x0a: /* LF */
4719 case 0x0b: /* VT */
4720 case 0x0c: /* FF */
4721 case 0x0d: /* CR */
4722 case 0x85: /* NEL */
4723 break;
4724 }
4725 break;
4726
4727 case OP_NOT_DIGIT:
4728 if ((md->ctypes[c] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4729 break;
4730
4731 case OP_DIGIT:
4732 if ((md->ctypes[c] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4733 break;
4734
4735 case OP_NOT_WHITESPACE:
4736 if ((md->ctypes[c] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4737 break;
4738
4739 case OP_WHITESPACE:
4740 if ((md->ctypes[c] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4741 break;
4742
4743 case OP_NOT_WORDCHAR:
4744 if ((md->ctypes[c] & ctype_word) != 0) MRRETURN(MATCH_NOMATCH);
4745 break;
4746
4747 case OP_WORDCHAR:
4748 if ((md->ctypes[c] & ctype_word) == 0) MRRETURN(MATCH_NOMATCH);
4749 break;
4750
4751 default:
4752 RRETURN(PCRE_ERROR_INTERNAL);
4753 }
4754 }
4755 }
4756 /* Control never gets here */
4757 }
4758
4759 /* If maximizing, it is worth using inline code for speed, doing the type
4760 test once at the start (i.e. keep it out of the loop). Again, keep the
4761 UTF-8 and UCP stuff separate. */
4762
4763 else
4764 {
4765 pp = eptr; /* Remember where we started */
4766
4767 #ifdef SUPPORT_UCP
4768 if (prop_type >= 0)
4769 {
4770 switch(prop_type)
4771 {
4772 case PT_ANY:
4773 for (i = min; i < max; i++)
4774 {
4775 int len = 1;
4776 if (eptr >= md->end_subject)
4777 {
4778 SCHECK_PARTIAL();
4779 break;
4780 }
4781 GETCHARLENTEST(c, eptr, len);
4782 if (prop_fail_result) break;
4783 eptr+= len;
4784 }
4785 break;
4786
4787 case PT_LAMP:
4788 for (i = min; i < max; i++)
4789 {
4790 int len = 1;
4791 if (eptr >= md->end_subject)
4792 {
4793 SCHECK_PARTIAL();
4794 break;
4795 }
4796 GETCHARLENTEST(c, eptr, len);
4797 prop_chartype = UCD_CHARTYPE(c);
4798 if ((prop_chartype == ucp_Lu ||
4799 prop_chartype == ucp_Ll ||
4800 prop_chartype == ucp_Lt) == prop_fail_result)
4801 break;
4802 eptr+= len;
4803 }
4804 break;
4805
4806 case PT_GC:
4807 for (i = min; i < max; i++)
4808 {
4809 int len = 1;
4810 if (eptr >= md->end_subject)
4811 {
4812 SCHECK_PARTIAL();
4813 break;
4814 }
4815 GETCHARLENTEST(c, eptr, len);
4816 prop_category = UCD_CATEGORY(c);
4817 if ((prop_category == prop_value) == prop_fail_result)
4818 break;
4819 eptr+= len;
4820 }
4821 break;
4822
4823 case PT_PC:
4824 for (i = min; i < max; i++)
4825 {
4826 int len = 1;
4827 if (eptr >= md->end_subject)
4828 {
4829 SCHECK_PARTIAL();
4830 break;
4831 }
4832 GETCHARLENTEST(c, eptr, len);
4833 prop_chartype = UCD_CHARTYPE(c);
4834 if ((prop_chartype == prop_value) == prop_fail_result)
4835 break;
4836 eptr+= len;
4837 }
4838 break;
4839
4840 case PT_SC:
4841 for (i = min; i < max; i++)
4842 {
4843 int len = 1;
4844 if (eptr >= md->end_subject)
4845 {
4846 SCHECK_PARTIAL();
4847 break;
4848 }
4849 GETCHARLENTEST(c, eptr, len);
4850 prop_script = UCD_SCRIPT(c);
4851 if ((prop_script == prop_value) == prop_fail_result)
4852 break;
4853 eptr+= len;
4854 }
4855 break;
4856
4857 case PT_ALNUM:
4858 for (i = min; i < max; i++)
4859 {
4860 int len = 1;
4861 if (eptr >= md->end_subject)
4862 {
4863 SCHECK_PARTIAL();
4864 break;
4865 }
4866 GETCHARLENTEST(c, eptr, len);
4867 prop_category = UCD_CATEGORY(c);
4868 if ((prop_category == ucp_L || prop_category == ucp_N)
4869 == prop_fail_result)
4870 break;
4871 eptr+= len;
4872 }
4873 break;
4874
4875 case PT_SPACE: /* Perl space */
4876 for (i = min; i < max; i++)
4877 {
4878 int len = 1;
4879 if (eptr >= md->end_subject)
4880 {
4881 SCHECK_PARTIAL();
4882 break;
4883 }
4884 GETCHARLENTEST(c, eptr, len);
4885 prop_category = UCD_CATEGORY(c);
4886 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4887 c == CHAR_FF || c == CHAR_CR)
4888 == prop_fail_result)
4889 break;
4890 eptr+= len;
4891 }
4892 break;
4893
4894 case PT_PXSPACE: /* POSIX space */
4895 for (i = min; i < max; i++)
4896 {
4897 int len = 1;
4898 if (eptr >= md->end_subject)
4899 {
4900 SCHECK_PARTIAL();
4901 break;
4902 }
4903 GETCHARLENTEST(c, eptr, len);
4904 prop_category = UCD_CATEGORY(c);
4905 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4906 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4907 == prop_fail_result)
4908 break;
4909 eptr+= len;
4910 }
4911 break;
4912
4913 case PT_WORD:
4914 for (i = min; i < max; i++)
4915 {
4916 int len = 1;
4917 if (eptr >= md->end_subject)
4918 {
4919 SCHECK_PARTIAL();
4920 break;
4921 }
4922 GETCHARLENTEST(c, eptr, len);
4923 prop_category = UCD_CATEGORY(c);
4924 if ((prop_category == ucp_L || prop_category == ucp_N ||
4925 c == CHAR_UNDERSCORE) == prop_fail_result)
4926 break;
4927 eptr+= len;
4928 }
4929 break;
4930
4931 default:
4932 RRETURN(PCRE_ERROR_INTERNAL);
4933 }
4934
4935 /* eptr is now past the end of the maximum run */
4936
4937 if (possessive) continue;
4938 for(;;)
4939 {
4940 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
4941 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4942 if (eptr-- == pp) break; /* Stop if tried at original pos */
4943 if (utf8) BACKCHAR(eptr);
4944 }
4945 }
4946
4947 /* Match extended Unicode sequences. We will get here only if the
4948 support is in the binary; otherwise a compile-time error occurs. */
4949
4950 else if (ctype == OP_EXTUNI)
4951 {
4952 for (i = min; i < max; i++)
4953 {
4954 if (eptr >= md->end_subject)
4955 {
4956 SCHECK_PARTIAL();
4957 break;
4958 }
4959 GETCHARINCTEST(c, eptr);
4960 prop_category = UCD_CATEGORY(c);
4961 if (prop_category == ucp_M) break;
4962 while (eptr < md->end_subject)
4963 {
4964 int len = 1;
4965 if (!utf8) c = *eptr; else
4966 {
4967 GETCHARLEN(c, eptr, len);
4968 }
4969 prop_category = UCD_CATEGORY(c);
4970 if (prop_category != ucp_M) break;
4971 eptr += len;
4972 }
4973 }
4974
4975 /* eptr is now past the end of the maximum run */
4976
4977 if (possessive) continue;
4978
4979 for(;;)
4980 {
4981 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
4982 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4983 if (eptr-- == pp) break; /* Stop if tried at original pos */
4984 for (;;) /* Move back over one extended */
4985 {
4986 int len = 1;
4987 if (!utf8) c = *eptr; else
4988 {
4989 BACKCHAR(eptr);
4990 GETCHARLEN(c, eptr, len);
4991 }
4992 prop_category = UCD_CATEGORY(c);
4993 if (prop_category != ucp_M) break;
4994 eptr--;
4995 }
4996 }
4997 }
4998
4999 else
5000 #endif /* SUPPORT_UCP */
5001
5002 #ifdef SUPPORT_UTF8
5003 /* UTF-8 mode */
5004
5005 if (utf8)
5006 {
5007 switch(ctype)
5008 {
5009 case OP_ANY:
5010 if (max < INT_MAX)
5011 {
5012 for (i = min; i < max; i++)
5013 {
5014 if (eptr >= md->end_subject)
5015 {
5016 SCHECK_PARTIAL();
5017 break;
5018 }
5019 if (IS_NEWLINE(eptr)) break;
5020 eptr++;
5021 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5022 }
5023 }
5024
5025 /* Handle unlimited UTF-8 repeat */
5026
5027 else
5028 {
5029 for (i = min; i < max; i++)
5030 {
5031 if (eptr >= md->end_subject)
5032 {
5033 SCHECK_PARTIAL();
5034 break;
5035 }
5036 if (IS_NEWLINE(eptr)) break;
5037 eptr++;
5038 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5039 }
5040 }
5041 break;
5042
5043 case OP_ALLANY:
5044 if (max < INT_MAX)
5045 {
5046 for (i = min; i < max; i++)
5047 {
5048 if (eptr >= md->end_subject)
5049 {
5050 SCHECK_PARTIAL();
5051 break;
5052 }
5053 eptr++;
5054 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5055 }
5056 }
5057 else eptr = md->end_subject; /* Unlimited UTF-8 repeat */
5058 break;
5059
5060 /* The byte case is the same as non-UTF8 */
5061
5062 case OP_ANYBYTE:
5063 c = max - min;
5064 if (c > (unsigned int)(md->end_subject - eptr))
5065 {
5066 eptr = md->end_subject;
5067 SCHECK_PARTIAL();
5068 }
5069 else eptr += c;
5070 break;
5071
5072 case OP_ANYNL:
5073 for (i = min; i < max; i++)
5074 {
5075 int len = 1;
5076 if (eptr >= md->end_subject)
5077 {
5078 SCHECK_PARTIAL();
5079 break;
5080 }
5081 GETCHARLEN(c, eptr, len);
5082 if (c == 0x000d)
5083 {
5084 if (++eptr >= md->end_subject) break;
5085 if (*eptr == 0x000a) eptr++;
5086 }
5087 else
5088 {
5089 if (c != 0x000a &&
5090 (md->bsr_anycrlf ||
5091 (c != 0x000b && c != 0x000c &&
5092 c != 0x0085 && c != 0x2028 && c != 0x2029)))
5093 break;
5094 eptr += len;
5095 }
5096 }
5097 break;
5098
5099 case OP_NOT_HSPACE:
5100 case OP_HSPACE:
5101 for (i = min; i < max; i++)
5102 {
5103 BOOL gotspace;
5104 int len = 1;
5105 if (eptr >= md->end_subject)
5106 {
5107 SCHECK_PARTIAL();
5108 break;
5109 }
5110 GETCHARLEN(c, eptr, len);
5111 switch(c)
5112 {
5113 default: gotspace = FALSE; break;
5114 case 0x09: /* HT */
5115 case 0x20: /* SPACE */
5116 case 0xa0: /* NBSP */
5117 case 0x1680: /* OGHAM SPACE MARK */
5118 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5119 case 0x2000: /* EN QUAD */
5120 case 0x2001: /* EM QUAD */
5121 case 0x2002: /* EN SPACE */
5122 case 0x2003: /* EM SPACE */
5123 case 0x2004: /* THREE-PER-EM SPACE */
5124 case 0x2005: /* FOUR-PER-EM SPACE */
5125 case 0x2006: /* SIX-PER-EM SPACE */
5126 case 0x2007: /* FIGURE SPACE */
5127 case 0x2008: /* PUNCTUATION SPACE */
5128 case 0x2009: /* THIN SPACE */
5129 case 0x200A: /* HAIR SPACE */
5130 case 0x202f: /* NARROW NO-BREAK SPACE */
5131 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5132 case 0x3000: /* IDEOGRAPHIC SPACE */
5133 gotspace = TRUE;
5134 break;
5135 }
5136 if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5137 eptr += len;
5138 }
5139 break;
5140
5141 case OP_NOT_VSPACE:
5142 case OP_VSPACE:
5143 for (i = min; i < max; i++)
5144 {
5145 BOOL gotspace;
5146 int len = 1;
5147 if (eptr >= md->end_subject)
5148 {
5149 SCHECK_PARTIAL();
5150 break;
5151 }
5152 GETCHARLEN(c, eptr, len);
5153 switch(c)
5154 {
5155 default: gotspace = FALSE; break;
5156 case 0x0a: /* LF */
5157 case 0x0b: /* VT */
5158 case 0x0c: /* FF */
5159 case 0x0d: /* CR */
5160 case 0x85: /* NEL */
5161 case 0x2028: /* LINE SEPARATOR */
5162 case 0x2029: /* PARAGRAPH SEPARATOR */
5163 gotspace = TRUE;
5164 break;
5165 }
5166 if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5167 eptr += len;
5168 }
5169 break;
5170
5171 case OP_NOT_DIGIT:
5172 for (i = min; i < max; i++)
5173 {
5174 int len = 1;
5175 if (eptr >= md->end_subject)
5176 {
5177 SCHECK_PARTIAL();
5178 break;
5179 }
5180 GETCHARLEN(c, eptr, len);
5181 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5182 eptr+= len;
5183 }
5184 break;
5185
5186 case OP_DIGIT:
5187 for (i = min; i < max; i++)
5188 {
5189 int len = 1;
5190 if (eptr >= md->end_subject)
5191 {
5192 SCHECK_PARTIAL();
5193 break;
5194 }
5195 GETCHARLEN(c, eptr, len);
5196 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5197 eptr+= len;
5198 }
5199 break;
5200
5201 case OP_NOT_WHITESPACE:
5202 for (i = min; i < max; i++)
5203 {
5204 int len = 1;
5205 if (eptr >= md->end_subject)
5206 {
5207 SCHECK_PARTIAL();
5208 break;
5209 }
5210 GETCHARLEN(c, eptr, len);
5211 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5212 eptr+= len;
5213 }
5214 break;
5215
5216 case OP_WHITESPACE:
5217 for (i = min; i < max; i++)
5218 {
5219 int len = 1;
5220 if (eptr >= md->end_subject)
5221 {
5222 SCHECK_PARTIAL();
5223 break;
5224 }
5225 GETCHARLEN(c, eptr, len);
5226 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5227 eptr+= len;
5228 }
5229 break;
5230
5231 case OP_NOT_WORDCHAR:
5232 for (i = min; i < max; i++)
5233 {
5234 int len = 1;
5235 if (eptr >= md->end_subject)
5236 {
5237 SCHECK_PARTIAL();
5238 break;
5239 }
5240 GETCHARLEN(c, eptr, len);
5241 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5242 eptr+= len;
5243 }
5244 break;
5245
5246 case OP_WORDCHAR:
5247 for (i = min; i < max; i++)
5248 {
5249 int len = 1;
5250 if (eptr >= md->end_subject)
5251 {
5252 SCHECK_PARTIAL();
5253 break;
5254 }
5255 GETCHARLEN(c, eptr, len);
5256 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5257 eptr+= len;
5258 }
5259 break;
5260
5261 default:
5262 RRETURN(PCRE_ERROR_INTERNAL);
5263 }
5264
5265 /* eptr is now past the end of the maximum run. If possessive, we are
5266 done (no backing up). Otherwise, match at this position; anything other
5267 than no match is immediately returned. For nomatch, back up one
5268 character, unless we are matching \R and the last thing matched was
5269 \r\n, in which case, back up two bytes. */
5270
5271 if (possessive) continue;
5272 for(;;)
5273 {
5274 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
5275 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5276 if (eptr-- == pp) break; /* Stop if tried at original pos */
5277 BACKCHAR(eptr);
5278 if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' &&
5279 eptr[-1] == '\r') eptr--;
5280 }
5281 }
5282 else
5283 #endif /* SUPPORT_UTF8 */
5284
5285 /* Not UTF-8 mode */
5286 {
5287 switch(ctype)
5288 {
5289 case OP_ANY:
5290 for (i = min; i < max; i++)
5291 {
5292 if (eptr >= md->end_subject)
5293 {
5294 SCHECK_PARTIAL();
5295 break;
5296 }
5297 if (IS_NEWLINE(eptr)) break;
5298 eptr++;
5299 }
5300 break;
5301
5302 case OP_ALLANY:
5303 case OP_ANYBYTE:
5304 c = max - min;
5305 if (c > (unsigned int)(md->end_subject - eptr))
5306 {
5307 eptr = md->end_subject;
5308 SCHECK_PARTIAL();
5309 }
5310 else eptr += c;
5311 break;
5312
5313 case OP_ANYNL:
5314 for (i = min; i < max; i++)
5315 {
5316 if (eptr >= md->end_subject)
5317 {
5318 SCHECK_PARTIAL();
5319 break;
5320 }
5321 c = *eptr;
5322 if (c == 0x000d)
5323 {
5324 if (++eptr >= md->end_subject) break;
5325 if (*eptr == 0x000a) eptr++;
5326 }
5327 else
5328 {
5329 if (c != 0x000a &&
5330 (md->bsr_anycrlf ||
5331 (c != 0x000b && c != 0x000c && c != 0x0085)))
5332 break;
5333 eptr++;
5334 }
5335 }
5336 break;
5337
5338 case OP_NOT_HSPACE:
5339 for (i = min; i < max; i++)
5340 {
5341 if (eptr >= md->end_subject)
5342 {
5343 SCHECK_PARTIAL();
5344 break;
5345 }
5346 c = *eptr;
5347 if (c == 0x09 || c == 0x20 || c == 0xa0) break;
5348 eptr++;
5349 }
5350 break;
5351
5352 case OP_HSPACE:
5353 for (i = min; i < max; i++)
5354 {
5355 if (eptr >= md->end_subject)
5356 {
5357 SCHECK_PARTIAL();
5358 break;
5359 }
5360 c = *eptr;
5361 if (c != 0x09 && c != 0x20 && c != 0xa0) break;
5362 eptr++;
5363 }
5364 break;
5365
5366 case OP_NOT_VSPACE:
5367 for (i = min; i < max; i++)
5368 {
5369 if (eptr >= md->end_subject)
5370 {
5371 SCHECK_PARTIAL();
5372 break;
5373 }
5374 c = *eptr;
5375 if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
5376 break;
5377 eptr++;
5378 }
5379 break;
5380
5381 case OP_VSPACE:
5382 for (i = min; i < max; i++)
5383 {
5384 if (eptr >= md->end_subject)
5385 {
5386 SCHECK_PARTIAL();
5387 break;
5388 }
5389 c = *eptr;
5390 if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
5391 break;
5392 eptr++;
5393 }
5394 break;
5395
5396 case OP_NOT_DIGIT:
5397 for (i = min; i < max; i++)
5398 {
5399 if (eptr >= md->end_subject)
5400 {
5401 SCHECK_PARTIAL();
5402 break;
5403 }
5404 if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
5405 eptr++;
5406 }
5407 break;
5408
5409 case OP_DIGIT:
5410 for (i = min; i < max; i++)
5411 {
5412 if (eptr >= md->end_subject)
5413 {
5414 SCHECK_PARTIAL();
5415 break;
5416 }
5417 if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
5418 eptr++;
5419 }
5420 break;
5421
5422 case OP_NOT_WHITESPACE:
5423 for (i = min; i < max; i++)
5424 {
5425 if (eptr >= md->end_subject)
5426 {
5427 SCHECK_PARTIAL();
5428 break;
5429 }
5430 if ((md->ctypes[*eptr] & ctype_space) != 0) break;
5431 eptr++;
5432 }
5433 break;
5434
5435 case OP_WHITESPACE:
5436 for (i = min; i < max; i++)
5437 {
5438 if (eptr >= md->end_subject)
5439 {
5440 SCHECK_PARTIAL();
5441 break;
5442 }
5443 if ((md->ctypes[*eptr] & ctype_space) == 0) break;
5444 eptr++;
5445 }
5446 break;
5447
5448 case OP_NOT_WORDCHAR:
5449 for (i = min; i < max; i++)
5450 {
5451 if (eptr >= md->end_subject)
5452 {
5453 SCHECK_PARTIAL();
5454 break;
5455 }
5456 if ((md->ctypes[*eptr] & ctype_word) != 0) break;
5457 eptr++;
5458 }
5459 break;
5460
5461 case OP_WORDCHAR:
5462 for (i = min; i < max; i++)
5463 {
5464 if (eptr >= md->end_subject)
5465 {
5466 SCHECK_PARTIAL();
5467 break;
5468 }
5469 if ((md->ctypes[*eptr] & ctype_word) == 0) break;
5470 eptr++;
5471 }
5472 break;
5473
5474 default:
5475 RRETURN(PCRE_ERROR_INTERNAL);
5476 }
5477
5478 /* eptr is now past the end of the maximum run. If possessive, we are
5479 done (no backing up). Otherwise, match at this position; anything other
5480 than no match is immediately returned. For nomatch, back up one
5481 character (byte), unless we are matching \R and the last thing matched
5482 was \r\n, in which case, back up two bytes. */
5483
5484 if (possessive) continue;
5485 while (eptr >= pp)
5486 {
5487 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
5488 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5489 eptr--;
5490 if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' &&
5491 eptr[-1] == '\r') eptr--;
5492 }
5493 }
5494
5495 /* Get here if we can't make it match with any permitted repetitions */
5496
5497 MRRETURN(MATCH_NOMATCH);
5498 }
5499 /* Control never gets here */
5500
5501 /* There's been some horrible disaster. Arrival here can only mean there is
5502 something seriously wrong in the code above or the OP_xxx definitions. */
5503
5504 default:
5505 DPRINTF(("Unknown opcode %d\n", *ecode));
5506 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
5507 }
5508
5509 /* Do not stick any code in here without much thought; it is assumed
5510 that "continue" in the code above comes out to here to repeat the main
5511 loop. */
5512
5513 } /* End of main loop */
5514 /* Control never reaches here */
5515
5516
5517 /* When compiling to use the heap rather than the stack for recursive calls to
5518 match(), the RRETURN() macro jumps here. The number that is saved in
5519 frame->Xwhere indicates which label we actually want to return to. */
5520
5521 #ifdef NO_RECURSE
5522 #define LBL(val) case val: goto L_RM##val;
5523 HEAP_RETURN:
5524 switch (frame->Xwhere)
5525 {
5526 LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
5527 LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5528 LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5529 LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5530 LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
5531 #ifdef SUPPORT_UTF8
5532 LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5533 LBL(32) LBL(34) LBL(42) LBL(46)
5534 #ifdef SUPPORT_UCP
5535 LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5536 LBL(59) LBL(60) LBL(61) LBL(62)
5537 #endif /* SUPPORT_UCP */
5538 #endif /* SUPPORT_UTF8 */
5539 default:
5540 DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
5541 return PCRE_ERROR_INTERNAL;
5542 }
5543 #undef LBL
5544 #endif /* NO_RECURSE */
5545 }
5546
5547
5548 /***************************************************************************
5549 ****************************************************************************
5550 RECURSION IN THE match() FUNCTION
5551
5552 Undefine all the macros that were defined above to handle this. */
5553
5554 #ifdef NO_RECURSE
5555 #undef eptr
5556 #undef ecode
5557 #undef mstart
5558 #undef offset_top
5559 #undef ims
5560 #undef eptrb
5561 #undef flags
5562
5563 #undef callpat
5564 #undef charptr
5565 #undef data
5566 #undef next
5567 #undef pp
5568 #undef prev
5569 #undef saved_eptr
5570
5571 #undef new_recursive
5572
5573 #undef cur_is_word
5574 #undef condition
5575 #undef prev_is_word
5576
5577 #undef original_ims
5578
5579 #undef ctype
5580 #undef length
5581 #undef max
5582 #undef min
5583 #undef number
5584 #undef offset
5585 #undef op
5586 #undef save_capture_last
5587 #undef save_offset1
5588 #undef save_offset2
5589 #undef save_offset3
5590 #undef stacksave
5591
5592 #undef newptrb
5593
5594 #endif
5595
5596 /* These two are defined as macros in both cases */
5597
5598 #undef fc
5599 #undef fi
5600
5601 /***************************************************************************
5602 ***************************************************************************/
5603
5604
5605
5606 /*************************************************
5607 * Execute a Regular Expression *
5608 *************************************************/
5609
5610 /* This function applies a compiled re to a subject string and picks out
5611 portions of the string if it matches. Two elements in the vector are set for
5612 each substring: the offsets to the start and end of the substring.
5613
5614 Arguments:
5615 argument_re points to the compiled expression
5616 extra_data points to extra data or is NULL
5617 subject points to the subject string
5618 length length of subject string (may contain binary zeros)
5619 start_offset where to start in the subject string
5620 options option bits
5621 offsets points to a vector of ints to be filled in with offsets
5622 offsetcount the number of elements in the vector
5623
5624 Returns: > 0 => success; value is the number of elements filled in
5625 = 0 => success, but offsets is not big enough
5626 -1 => failed to match
5627 < -1 => some kind of unexpected problem
5628 */
5629
5630 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
5631 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
5632 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5633 int offsetcount)
5634 {
5635 int rc, resetcount, ocount;
5636 int first_byte = -1;
5637 int req_byte = -1;
5638 int req_byte2 = -1;
5639 int newline;
5640 unsigned long int ims;
5641 BOOL using_temporary_offsets = FALSE;
5642 BOOL anchored;
5643 BOOL startline;
5644 BOOL firstline;
5645 BOOL first_byte_caseless = FALSE;
5646 BOOL req_byte_caseless = FALSE;
5647 BOOL utf8;
5648 match_data match_block;
5649 match_data *md = &match_block;
5650 const uschar *tables;
5651 const uschar *start_bits = NULL;
5652 USPTR start_match = (USPTR)subject + start_offset;
5653 USPTR end_subject;
5654 USPTR start_partial = NULL;
5655 USPTR req_byte_ptr = start_match - 1;
5656
5657 pcre_study_data internal_study;
5658 const pcre_study_data *study;
5659
5660 real_pcre internal_re;
5661 const real_pcre *external_re = (const real_pcre *)argument_re;
5662 const real_pcre *re = external_re;
5663
5664 /* Plausibility checks */
5665
5666 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
5667 if (re == NULL || subject == NULL ||
5668 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5669 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5670 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5671
5672 /* This information is for finding all the numbers associated with a given
5673 name, for condition testing. */
5674
5675 md->name_table = (uschar *)re + re->name_table_offset;
5676 md->name_count = re->name_count;
5677 md->name_entry_size = re->name_entry_size;
5678
5679 /* Fish out the optional data from the extra_data structure, first setting
5680 the default values. */
5681
5682 study = NULL;
5683 md->match_limit = MATCH_LIMIT;
5684 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
5685 md->callout_data = NULL;
5686
5687 /* The table pointer is always in native byte order. */
5688
5689 tables = external_re->tables;
5690
5691 if (extra_data != NULL)
5692 {
5693 register unsigned int flags = extra_data->flags;
5694 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
5695 study = (const pcre_study_data *)extra_data->study_data;
5696 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
5697 md->match_limit = extra_data->match_limit;
5698 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
5699 md->match_limit_recursion = extra_data->match_limit_recursion;
5700 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
5701 md->callout_data = extra_data->callout_data;
5702 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
5703 }
5704
5705 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
5706 is a feature that makes it possible to save compiled regex and re-use them
5707 in other programs later. */
5708
5709 if (tables == NULL) tables = _pcre_default_tables;
5710
5711 /* Check that the first field in the block is the magic number. If it is not,
5712 test for a regex that was compiled on a host of opposite endianness. If this is
5713 the case, flipped values are put in internal_re and internal_study if there was
5714 study data too. */
5715
5716 if (re->magic_number != MAGIC_NUMBER)
5717 {
5718 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
5719 if (re == NULL) return PCRE_ERROR_BADMAGIC;
5720 if (study != NULL) study = &internal_study;
5721 }
5722
5723 /* Set up other data */
5724
5725 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
5726 startline = (re->flags & PCRE_STARTLINE) != 0;
5727 firstline = (re->options & PCRE_FIRSTLINE) != 0;
5728
5729 /* The code starts after the real_pcre block and the capture name table. */
5730
5731 md->start_code = (const uschar *)external_re + re->name_table_offset +
5732 re->name_count * re->name_entry_size;
5733
5734 md->start_subject = (USPTR)subject;
5735 md->start_offset = start_offset;
5736 md->end_subject = md->start_subject + length;
5737 end_subject = md->end_subject;
5738
5739 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
5740 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5741 md->use_ucp = (re->options & PCRE_UCP) != 0;
5742 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5743
5744 md->notbol = (options & PCRE_NOTBOL) != 0;
5745 md->noteol = (options & PCRE_NOTEOL) != 0;
5746 md->notempty = (options & PCRE_NOTEMPTY) != 0;
5747 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
5748 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5749 ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5750 md->hitend = FALSE;
5751 md->mark = NULL; /* In case never set */
5752
5753 md->recursive = NULL; /* No recursion at top level */
5754
5755 md->lcc = tables + lcc_offset;
5756 md->ctypes = tables + ctypes_offset;
5757
5758 /* Handle different \R options. */
5759
5760 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
5761 {
5762 case 0:
5763 if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
5764 md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
5765 else
5766 #ifdef BSR_ANYCRLF
5767 md->bsr_anycrlf = TRUE;
5768 #else
5769 md->bsr_anycrlf = FALSE;
5770 #endif
5771 break;
5772
5773 case PCRE_BSR_ANYCRLF:
5774 md->bsr_anycrlf = TRUE;
5775 break;
5776
5777 case PCRE_BSR_UNICODE:
5778 md->bsr_anycrlf = FALSE;
5779 break;
5780
5781 default: return PCRE_ERROR_BADNEWLINE;
5782 }
5783
5784 /* Handle different types of newline. The three bits give eight cases. If
5785 nothing is set at run time, whatever was used at compile time applies. */
5786
5787 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
5788 (pcre_uint32)options) & PCRE_NEWLINE_BITS)
5789 {
5790 case 0: newline = NEWLINE; break; /* Compile-time default */
5791 case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
5792 case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
5793 case PCRE_NEWLINE_CR+
5794 PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
5795 case PCRE_NEWLINE_ANY: newline = -1; break;
5796 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5797 default: return PCRE_ERROR_BADNEWLINE;
5798 }
5799
5800 if (newline == -2)
5801 {
5802 md->nltype = NLTYPE_ANYCRLF;
5803 }
5804 else if (newline < 0)
5805 {
5806 md->nltype = NLTYPE_ANY;
5807 }
5808 else
5809 {
5810 md->nltype = NLTYPE_FIXED;
5811 if (newline > 255)
5812 {
5813 md->nllen = 2;
5814 md->nl[0] = (newline >> 8) & 255;
5815 md->nl[1] = newline & 255;
5816 }
5817 else
5818 {
5819 md->nllen = 1;
5820 md->nl[0] = newline;
5821 }
5822 }
5823
5824 /* Partial matching was originally supported only for a restricted set of
5825 regexes; from release 8.00 there are no restrictions, but the bits are still
5826 defined (though never set). So there's no harm in leaving this code. */
5827
5828 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5829 return PCRE_ERROR_BADPARTIAL;
5830
5831 /* Check a UTF-8 string if required. Pass back the character offset and error
5832 code if a results vector is available. */
5833
5834 #ifdef SUPPORT_UTF8
5835 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5836 {
5837 int errorcode;
5838 int tb = _pcre_valid_utf8((USPTR)subject, length, &errorcode);
5839 if (tb >= 0)
5840 {
5841 if (offsetcount >= 2)
5842 {
5843 offsets[0] = tb;
5844 offsets[1] = errorcode;
5845 }
5846 return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
5847 PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5848 }
5849 if (start_offset > 0 && start_offset < length)
5850 {
5851 tb = ((USPTR)subject)[start_offset] & 0xc0;
5852 if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
5853 }
5854 }
5855 #endif
5856
5857 /* The ims options can vary during the matching as a result of the presence
5858 of (?ims) items in the pattern. They are kept in a local variable so that
5859 restoring at the exit of a group is easy. */
5860
5861 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
5862
5863 /* If the expression has got more back references than the offsets supplied can
5864 hold, we get a temporary chunk of working store to use during the matching.
5865 Otherwise, we can use the vector supplied, rounding down its size to a multiple
5866 of 3. */
5867
5868 ocount = offsetcount - (offsetcount % 3);
5869
5870 if (re->top_backref > 0 && re->top_backref >= ocount/3)
5871 {
5872 ocount = re->top_backref * 3 + 3;
5873 md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
5874 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
5875 using_temporary_offsets = TRUE;
5876 DPRINTF(("Got memory to hold back references\n"));
5877 }
5878 else md->offset_vector = offsets;
5879
5880 md->offset_end = ocount;
5881 md->offset_max = (2*ocount)/3;
5882 md->offset_overflow = FALSE;
5883 md->capture_last = -1;
5884
5885 /* Compute the minimum number of offsets that we need to reset each time. Doing
5886 this makes a huge difference to execution time when there aren't many brackets
5887 in the pattern. */
5888
5889 resetcount = 2 + re->top_bracket * 2;
5890 if (resetcount > offsetcount) resetcount = ocount;
5891
5892 /* Reset the working variable associated with each extraction. These should
5893 never be used unless previously set, but they get saved and restored, and so we
5894 initialize them to avoid reading uninitialized locations. */
5895
5896 if (md->offset_vector != NULL)
5897 {
5898 register int *iptr = md->offset_vector + ocount;
5899 register int *iend = iptr - resetcount/2 + 1;
5900 while (--iptr >= iend) *iptr = -1;
5901 }
5902
5903 /* Set up the first character to match, if available. The first_byte value is
5904 never set for an anchored regular expression, but the anchoring may be forced
5905 at run time, so we have to test for anchoring. The first char may be unset for
5906 an unanchored pattern, of course. If there's no first char and the pattern was
5907 studied, there may be a bitmap of possible first characters. */
5908
5909 if (!anchored)
5910 {
5911 if ((re->flags & PCRE_FIRSTSET) != 0)
5912 {
5913 first_byte = re->first_byte & 255;
5914 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
5915 first_byte = md->lcc[first_byte];
5916 }
5917 else
5918 if (!startline && study != NULL &&
5919 (study->flags & PCRE_STUDY_MAPPED) != 0)
5920 start_bits = study->start_bits;
5921 }
5922
5923 /* For anchored or unanchored matches, there may be a "last known required
5924 character" set. */
5925
5926 if ((re->flags & PCRE_REQCHSET) != 0)
5927 {
5928 req_byte = re->req_byte & 255;
5929 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
5930 req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
5931 }
5932
5933
5934 /* ==========================================================================*/
5935
5936 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
5937 the loop runs just once. */
5938
5939 for(;;)
5940 {
5941 USPTR save_end_subject = end_subject;
5942 USPTR new_start_match;
5943
5944 /* Reset the maximum number of extractions we might see. */
5945
5946 if (md->offset_vector != NULL)
5947 {
5948 register int *iptr = md->offset_vector;
5949 register int *iend = iptr + resetcount;
5950 while (iptr < iend) *iptr++ = -1;
5951 }
5952
5953 /* If firstline is TRUE, the start of the match is constrained to the first
5954 line of a multiline string. That is, the match must be before or at the first
5955 newline. Implement this by temporarily adjusting end_subject so that we stop
5956 scanning at a newline. If the match fails at the newline, later code breaks
5957 this loop. */
5958
5959 if (firstline)
5960 {
5961 USPTR t = start_match;
5962 #ifdef SUPPORT_UTF8
5963 if (utf8)
5964 {
5965 while (t < md->end_subject && !IS_NEWLINE(t))
5966 {
5967 t++;
5968 while (t < end_subject && (*t & 0xc0) == 0x80) t++;
5969 }
5970 }
5971 else
5972 #endif
5973 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
5974 end_subject = t;
5975 }
5976
5977 /* There are some optimizations that avoid running the match if a known
5978 starting point is not found, or if a known later character is not present.
5979 However, there is an option that disables these, for testing and for ensuring
5980 that all callouts do actually occur. The option can be set in the regex by
5981 (*NO_START_OPT) or passed in match-time options. */
5982
5983 if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
5984 {
5985 /* Advance to a unique first byte if there is one. */
5986
5987 if (first_byte >= 0)
5988 {
5989 if (first_byte_caseless)
5990 while (start_match < end_subject && md->lcc[*start_match] != first_byte)
5991 start_match++;
5992 else
5993 while (start_match < end_subject && *start_match != first_byte)
5994 start_match++;
5995 }
5996
5997 /* Or to just after a linebreak for a multiline match */
5998
5999 else if (startline)
6000 {
6001 if (start_match > md->start_subject + start_offset)
6002 {
6003 #ifdef SUPPORT_UTF8
6004 if (utf8)
6005 {
6006 while (start_match < end_subject && !WAS_NEWLINE(start_match))
6007 {
6008 start_match++;
6009 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
6010 start_match++;
6011 }
6012 }
6013 else
6014 #endif
6015 while (start_match < end_subject && !WAS_NEWLINE(start_match))
6016 start_match++;
6017
6018 /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6019 and we are now at a LF, advance the match position by one more character.
6020 */
6021
6022 if (start_match[-1] == CHAR_CR &&
6023 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6024 start_match < end_subject &&
6025 *start_match == CHAR_NL)
6026 start_match++;
6027 }
6028 }
6029
6030 /* Or to a non-unique first byte after study */
6031
6032 else if (start_bits != NULL)
6033 {
6034 while (start_match < end_subject)
6035 {
6036 register unsigned int c = *start_match;
6037 if ((start_bits[c/8] & (1 << (c&7))) == 0)
6038 {
6039 start_match++;
6040 #ifdef SUPPORT_UTF8
6041 if (utf8)
6042 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
6043 start_match++;
6044 #endif
6045 }
6046 else break;
6047 }
6048 }
6049 } /* Starting optimizations */
6050
6051 /* Restore fudged end_subject */
6052
6053 end_subject = save_end_subject;
6054
6055 /* The following two optimizations are disabled for partial matching or if
6056 disabling is explicitly requested. */
6057
6058 if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6059 {
6060 /* If the pattern was studied, a minimum subject length may be set. This is
6061 a lower bound; no actual string of that length may actually match the
6062 pattern. Although the value is, strictly, in characters, we treat it as
6063 bytes to avoid spending too much time in this optimization. */
6064
6065 if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6066 (pcre_uint32)(end_subject - start_match) < study->minlength)
6067 {
6068 rc = MATCH_NOMATCH;
6069 break;
6070 }
6071
6072 /* If req_byte is set, we know that that character must appear in the
6073 subject for the match to succeed. If the first character is set, req_byte
6074 must be later in the subject; otherwise the test starts at the match point.
6075 This optimization can save a huge amount of backtracking in patterns with
6076 nested unlimited repeats that aren't going to match. Writing separate code
6077 for cased/caseless versions makes it go faster, as does using an
6078 autoincrement and backing off on a match.
6079
6080 HOWEVER: when the subject string is very, very long, searching to its end
6081 can take a long time, and give bad performance on quite ordinary patterns.
6082 This showed up when somebody was matching something like /^\d+C/ on a
6083 32-megabyte string... so we don't do this when the string is sufficiently
6084 long. */
6085
6086 if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
6087 {
6088 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
6089
6090 /* We don't need to repeat the search if we haven't yet reached the
6091 place we found it at last time. */
6092
6093 if (p > req_byte_ptr)
6094 {
6095 if (req_byte_caseless)
6096 {
6097 while (p < end_subject)
6098 {
6099 register int pp = *p++;
6100 if (pp == req_byte || pp == req_byte2) { p--; break; }
6101 }
6102 }
6103 else
6104 {
6105 while (p < end_subject)
6106 {
6107 if (*p++ == req_byte) { p--; break; }
6108 }
6109 }
6110
6111 /* If we can't find the required character, break the matching loop,
6112 forcing a match failure. */
6113
6114 if (p >= end_subject)
6115 {
6116 rc = MATCH_NOMATCH;
6117 break;
6118 }
6119
6120 /* If we have found the required character, save the point where we
6121 found it, so that we don't search again next time round the loop if
6122 the start hasn't passed this character yet. */
6123
6124 req_byte_ptr = p;
6125 }
6126 }
6127 }
6128
6129 #ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */
6130 printf(">>>> Match against: ");
6131 pchars(start_match, end_subject - start_match, TRUE, md);
6132 printf("\n");
6133 #endif
6134
6135 /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6136 first starting point for which a partial match was found. */
6137
6138 md->start_match_ptr = start_match;
6139 md->start_used_ptr = start_match;
6140 md->match_call_count = 0;
6141 rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
6142 0, 0);
6143 if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6144
6145 switch(rc)
6146 {
6147 /* SKIP passes back the next starting point explicitly, but if it is the
6148 same as the match we have just done, treat it as NOMATCH. */
6149
6150 case MATCH_SKIP:
6151 if (md->start_match_ptr != start_match)
6152 {
6153 new_start_match = md->start_match_ptr;
6154 break;
6155 }
6156 /* Fall through */
6157
6158 /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6159 the SKIP's arg was not found. We also treat this as NOMATCH. */
6160
6161 case MATCH_SKIP_ARG:
6162 /* Fall through */
6163
6164 /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6165 exactly like PRUNE. */
6166
6167 case MATCH_NOMATCH:
6168 case MATCH_PRUNE:
6169 case MATCH_THEN:
6170 new_start_match = start_match + 1;
6171 #ifdef SUPPORT_UTF8
6172 if (utf8)
6173 while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
6174 new_start_match++;
6175 #endif
6176 break;
6177
6178 /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6179
6180 case MATCH_COMMIT:
6181 rc = MATCH_NOMATCH;
6182 goto ENDLOOP;
6183
6184 /* Any other return is either a match, or some kind of error. */
6185
6186 default:
6187 goto ENDLOOP;
6188 }
6189
6190 /* Control reaches here for the various types of "no match at this point"
6191 result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6192
6193 rc = MATCH_NOMATCH;
6194
6195 /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6196 newline in the subject (though it may continue over the newline). Therefore,
6197 if we have just failed to match, starting at a newline, do not continue. */
6198
6199 if (firstline && IS_NEWLINE(start_match)) break;
6200
6201 /* Advance to new matching position */
6202
6203 start_match = new_start_match;
6204
6205 /* Break the loop if the pattern is anchored or if we have passed the end of
6206 the subject. */
6207
6208 if (anchored || start_match > end_subject) break;
6209
6210 /* If we have just passed a CR and we are now at a LF, and the pattern does
6211 not contain any explicit matches for \r or \n, and the newline option is CRLF
6212 or ANY or ANYCRLF, advance the match position by one more character. */
6213
6214 if (start_match[-1] == CHAR_CR &&
6215 start_match < end_subject &&
6216 *start_match == CHAR_NL &&
6217 (re->flags & PCRE_HASCRORLF) == 0 &&
6218 (md->nltype == NLTYPE_ANY ||
6219 md->nltype == NLTYPE_ANYCRLF ||
6220 md->nllen == 2))
6221 start_match++;
6222
6223 md->mark = NULL; /* Reset for start of next match attempt */
6224 } /* End of for(;;) "bumpalong" loop */
6225
6226 /* ==========================================================================*/
6227
6228 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
6229 conditions is true:
6230
6231 (1) The pattern is anchored or the match was failed by (*COMMIT);
6232
6233 (2) We are past the end of the subject;
6234
6235 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
6236 this option requests that a match occur at or before the first newline in
6237 the subject.
6238
6239 When we have a match and the offset vector is big enough to deal with any
6240 backreferences, captured substring offsets will already be set up. In the case
6241 where we had to get some local store to hold offsets for backreference
6242 processing, copy those that we can. In this case there need not be overflow if
6243 certain parts of the pattern were not used, even though there are more
6244 capturing parentheses than vector slots. */
6245
6246 ENDLOOP:
6247
6248 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
6249 {
6250 if (using_temporary_offsets)
6251 {
6252 if (offsetcount >= 4)
6253 {
6254 memcpy(offsets + 2, md->offset_vector + 2,
6255 (offsetcount - 2) * sizeof(int));
6256 DPRINTF(("Copied offsets from temporary memory\n"));
6257 }
6258 if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
6259 DPRINTF(("Freeing temporary memory\n"));
6260 (pcre_free)(md->offset_vector);
6261 }
6262
6263 /* Set the return code to the number of captured strings, or 0 if there are
6264 too many to fit into the vector. */
6265
6266 rc = md->offset_overflow? 0 : md->end_offset_top/2;
6267
6268 /* If there is space, set up the whole thing as substring 0. The value of
6269 md->start_match_ptr might be modified if \K was encountered on the success
6270 matching path. */
6271
6272 if (offsetcount < 2) rc = 0; else
6273 {
6274 offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6275 offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6276 }
6277
6278 DPRINTF((">>>> returning %d\n", rc));
6279 goto RETURN_MARK;
6280 }
6281
6282 /* Control gets here if there has been an error, or if the overall match
6283 attempt has failed at all permitted starting positions. */
6284
6285 if (using_temporary_offsets)
6286 {
6287 DPRINTF(("Freeing temporary memory\n"));
6288 (pcre_free)(md->offset_vector);
6289 }
6290
6291 /* For anything other than nomatch or partial match, just return the code. */
6292
6293 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
6294 {
6295 DPRINTF((">>>> error: returning %d\n", rc));
6296 return rc;
6297 }
6298
6299 /* Handle partial matches - disable any mark data */
6300
6301 if (start_partial != NULL)
6302 {
6303 DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
6304 md->mark = NULL;
6305 if (offsetcount > 1)
6306 {
6307 offsets[0] = (int)(start_partial - (USPTR)subject);
6308 offsets[1] = (int)(end_subject - (USPTR)subject);
6309 }
6310 rc = PCRE_ERROR_PARTIAL;
6311 }
6312
6313 /* This is the classic nomatch case */
6314
6315 else
6316 {
6317 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
6318 rc = PCRE_ERROR_NOMATCH;
6319 }
6320
6321 /* Return the MARK data if it has been requested. */
6322
6323 RETURN_MARK:
6324
6325 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6326 *(extra_data->mark) = (unsigned char *)(md->mark);
6327 return rc;
6328 }
6329
6330 /* End of pcre_exec.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5