/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 595 - (show annotations)
Mon May 2 10:33:29 2011 UTC (4 years, 3 months ago) by ph10
File MIME type: text/plain
File size: 187622 byte(s)
Error occurred while calculating annotation data.
Fix problems with caseless reference matching in UTF-8 mode when the 
upper/lower case characters have different lengths.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2010 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains pcre_exec(), the externally visible function that does
42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43 possible. There are also some static supporting functions. */
44
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48
49 #define NLBLOCK md /* Block containing newline information */
50 #define PSSTART start_subject /* Field containing processed string start */
51 #define PSEND end_subject /* Field containing processed string end */
52
53 #include "pcre_internal.h"
54
55 /* Undefine some potentially clashing cpp symbols */
56
57 #undef min
58 #undef max
59
60 /* Flag bits for the match() function */
61
62 #define match_condassert 0x01 /* Called to check a condition assertion */
63 #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64
65 /* Non-error returns from the match() function. Error returns are externally
66 defined PCRE_ERROR_xxx codes, which are all negative. */
67
68 #define MATCH_MATCH 1
69 #define MATCH_NOMATCH 0
70
71 /* Special internal returns from the match() function. Make them sufficiently
72 negative to avoid the external error codes. */
73
74 #define MATCH_ACCEPT (-999)
75 #define MATCH_COMMIT (-998)
76 #define MATCH_PRUNE (-997)
77 #define MATCH_SKIP (-996)
78 #define MATCH_SKIP_ARG (-995)
79 #define MATCH_THEN (-994)
80
81 /* This is a convenience macro for code that occurs many times. */
82
83 #define MRRETURN(ra) \
84 { \
85 md->mark = markptr; \
86 RRETURN(ra); \
87 }
88
89 /* Maximum number of ints of offset to save on the stack for recursive calls.
90 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91 because the offset vector is always a multiple of 3 long. */
92
93 #define REC_STACK_SAVE_MAX 30
94
95 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
96
97 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
98 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
99
100
101
102 #ifdef PCRE_DEBUG
103 /*************************************************
104 * Debugging function to print chars *
105 *************************************************/
106
107 /* Print a sequence of chars in printable format, stopping at the end of the
108 subject if the requested.
109
110 Arguments:
111 p points to characters
112 length number to print
113 is_subject TRUE if printing from within md->start_subject
114 md pointer to matching data block, if is_subject is TRUE
115
116 Returns: nothing
117 */
118
119 static void
120 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121 {
122 unsigned int c;
123 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124 while (length-- > 0)
125 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
126 }
127 #endif
128
129
130
131 /*************************************************
132 * Match a back-reference *
133 *************************************************/
134
135 /* Normally, if a back reference hasn't been set, the length that is passed is
136 negative, so the match always fails. However, in JavaScript compatibility mode,
137 the length passed is zero. Note that in caseless UTF-8 mode, the number of
138 subject bytes matched may be different to the number of reference bytes.
139
140 Arguments:
141 offset index into the offset vector
142 eptr pointer into the subject
143 length length of reference to be matched (number of bytes)
144 md points to match data block
145 ims the ims flags
146
147 Returns: < 0 if not matched, otherwise the number of subject bytes matched
148 */
149
150 static int
151 match_ref(int offset, register USPTR eptr, int length, match_data *md,
152 unsigned long int ims)
153 {
154 USPTR eptr_start = eptr;
155 register USPTR p = md->start_subject + md->offset_vector[offset];
156
157 #ifdef PCRE_DEBUG
158 if (eptr >= md->end_subject)
159 printf("matching subject <null>");
160 else
161 {
162 printf("matching subject ");
163 pchars(eptr, length, TRUE, md);
164 }
165 printf(" against backref ");
166 pchars(p, length, FALSE, md);
167 printf("\n");
168 #endif
169
170 /* Always fail if reference not set (and not JavaScript compatible). */
171
172 if (length < 0) return -1;
173
174 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175 properly if Unicode properties are supported. Otherwise, we can check only
176 ASCII characters. */
177
178 if ((ims & PCRE_CASELESS) != 0)
179 {
180 #ifdef SUPPORT_UTF8
181 #ifdef SUPPORT_UCP
182 if (md->utf8)
183 {
184 /* Match characters up to the end of the reference. NOTE: the number of
185 bytes matched may differ, because there are some characters whose upper and
186 lower case versions code as different numbers of bytes. For example, U+023A
187 (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188 a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189 the latter. It is important, therefore, to check the length along the
190 reference, not along the subject (earlier code did this wrong). */
191
192 USPTR endptr = p + length;
193 while (p < endptr)
194 {
195 int c, d;
196 GETCHARINC(c, eptr);
197 GETCHARINC(d, p);
198 if (c != d && c != UCD_OTHERCASE(d)) return -1;
199 }
200 }
201 else
202 #endif
203 #endif
204
205 /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
206 is no UCP support. */
207
208 while (length-- > 0)
209 { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
210 }
211
212 /* In the caseful case, we can just compare the bytes, whether or not we
213 are in UTF-8 mode. */
214
215 else
216 { while (length-- > 0) if (*p++ != *eptr++) return -1; }
217
218 return eptr - eptr_start;
219 }
220
221
222
223 /***************************************************************************
224 ****************************************************************************
225 RECURSION IN THE match() FUNCTION
226
227 The match() function is highly recursive, though not every recursive call
228 increases the recursive depth. Nevertheless, some regular expressions can cause
229 it to recurse to a great depth. I was writing for Unix, so I just let it call
230 itself recursively. This uses the stack for saving everything that has to be
231 saved for a recursive call. On Unix, the stack can be large, and this works
232 fine.
233
234 It turns out that on some non-Unix-like systems there are problems with
235 programs that use a lot of stack. (This despite the fact that every last chip
236 has oodles of memory these days, and techniques for extending the stack have
237 been known for decades.) So....
238
239 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
240 calls by keeping local variables that need to be preserved in blocks of memory
241 obtained from malloc() instead instead of on the stack. Macros are used to
242 achieve this so that the actual code doesn't look very different to what it
243 always used to.
244
245 The original heap-recursive code used longjmp(). However, it seems that this
246 can be very slow on some operating systems. Following a suggestion from Stan
247 Switzer, the use of longjmp() has been abolished, at the cost of having to
248 provide a unique number for each call to RMATCH. There is no way of generating
249 a sequence of numbers at compile time in C. I have given them names, to make
250 them stand out more clearly.
251
252 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
253 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
254 tests. Furthermore, not using longjmp() means that local dynamic variables
255 don't have indeterminate values; this has meant that the frame size can be
256 reduced because the result can be "passed back" by straight setting of the
257 variable instead of being passed in the frame.
258 ****************************************************************************
259 ***************************************************************************/
260
261 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
262 below must be updated in sync. */
263
264 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
265 RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
266 RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
267 RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
268 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
269 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
270 RM61, RM62 };
271
272 /* These versions of the macros use the stack, as normal. There are debugging
273 versions and production versions. Note that the "rw" argument of RMATCH isn't
274 actually used in this definition. */
275
276 #ifndef NO_RECURSE
277 #define REGISTER register
278
279 #ifdef PCRE_DEBUG
280 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
281 { \
282 printf("match() called in line %d\n", __LINE__); \
283 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
284 printf("to line %d\n", __LINE__); \
285 }
286 #define RRETURN(ra) \
287 { \
288 printf("match() returned %d from line %d ", ra, __LINE__); \
289 return ra; \
290 }
291 #else
292 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
293 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
294 #define RRETURN(ra) return ra
295 #endif
296
297 #else
298
299
300 /* These versions of the macros manage a private stack on the heap. Note that
301 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
302 argument of match(), which never changes. */
303
304 #define REGISTER
305
306 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
307 {\
308 heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
309 if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
310 frame->Xwhere = rw; \
311 newframe->Xeptr = ra;\
312 newframe->Xecode = rb;\
313 newframe->Xmstart = mstart;\
314 newframe->Xmarkptr = markptr;\
315 newframe->Xoffset_top = rc;\
316 newframe->Xims = re;\
317 newframe->Xeptrb = rf;\
318 newframe->Xflags = rg;\
319 newframe->Xrdepth = frame->Xrdepth + 1;\
320 newframe->Xprevframe = frame;\
321 frame = newframe;\
322 DPRINTF(("restarting from line %d\n", __LINE__));\
323 goto HEAP_RECURSE;\
324 L_##rw:\
325 DPRINTF(("jumped back to line %d\n", __LINE__));\
326 }
327
328 #define RRETURN(ra)\
329 {\
330 heapframe *oldframe = frame;\
331 frame = oldframe->Xprevframe;\
332 (pcre_stack_free)(oldframe);\
333 if (frame != NULL)\
334 {\
335 rrc = ra;\
336 goto HEAP_RETURN;\
337 }\
338 return ra;\
339 }
340
341
342 /* Structure for remembering the local variables in a private frame */
343
344 typedef struct heapframe {
345 struct heapframe *Xprevframe;
346
347 /* Function arguments that may change */
348
349 USPTR Xeptr;
350 const uschar *Xecode;
351 USPTR Xmstart;
352 USPTR Xmarkptr;
353 int Xoffset_top;
354 long int Xims;
355 eptrblock *Xeptrb;
356 int Xflags;
357 unsigned int Xrdepth;
358
359 /* Function local variables */
360
361 USPTR Xcallpat;
362 #ifdef SUPPORT_UTF8
363 USPTR Xcharptr;
364 #endif
365 USPTR Xdata;
366 USPTR Xnext;
367 USPTR Xpp;
368 USPTR Xprev;
369 USPTR Xsaved_eptr;
370
371 recursion_info Xnew_recursive;
372
373 BOOL Xcur_is_word;
374 BOOL Xcondition;
375 BOOL Xprev_is_word;
376
377 unsigned long int Xoriginal_ims;
378
379 #ifdef SUPPORT_UCP
380 int Xprop_type;
381 int Xprop_value;
382 int Xprop_fail_result;
383 int Xprop_category;
384 int Xprop_chartype;
385 int Xprop_script;
386 int Xoclength;
387 uschar Xocchars[8];
388 #endif
389
390 int Xcodelink;
391 int Xctype;
392 unsigned int Xfc;
393 int Xfi;
394 int Xlength;
395 int Xmax;
396 int Xmin;
397 int Xnumber;
398 int Xoffset;
399 int Xop;
400 int Xsave_capture_last;
401 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
402 int Xstacksave[REC_STACK_SAVE_MAX];
403
404 eptrblock Xnewptrb;
405
406 /* Where to jump back to */
407
408 int Xwhere;
409
410 } heapframe;
411
412 #endif
413
414
415 /***************************************************************************
416 ***************************************************************************/
417
418
419
420 /*************************************************
421 * Match from current position *
422 *************************************************/
423
424 /* This function is called recursively in many circumstances. Whenever it
425 returns a negative (error) response, the outer incarnation must also return the
426 same response. */
427
428 /* These macros pack up tests that are used for partial matching, and which
429 appears several times in the code. We set the "hit end" flag if the pointer is
430 at the end of the subject and also past the start of the subject (i.e.
431 something has been matched). For hard partial matching, we then return
432 immediately. The second one is used when we already know we are past the end of
433 the subject. */
434
435 #define CHECK_PARTIAL()\
436 if (md->partial != 0 && eptr >= md->end_subject && \
437 eptr > md->start_used_ptr) \
438 { \
439 md->hitend = TRUE; \
440 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
441 }
442
443 #define SCHECK_PARTIAL()\
444 if (md->partial != 0 && eptr > md->start_used_ptr) \
445 { \
446 md->hitend = TRUE; \
447 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
448 }
449
450
451 /* Performance note: It might be tempting to extract commonly used fields from
452 the md structure (e.g. utf8, end_subject) into individual variables to improve
453 performance. Tests using gcc on a SPARC disproved this; in the first case, it
454 made performance worse.
455
456 Arguments:
457 eptr pointer to current character in subject
458 ecode pointer to current position in compiled code
459 mstart pointer to the current match start position (can be modified
460 by encountering \K)
461 markptr pointer to the most recent MARK name, or NULL
462 offset_top current top pointer
463 md pointer to "static" info for the match
464 ims current /i, /m, and /s options
465 eptrb pointer to chain of blocks containing eptr at start of
466 brackets - for testing for empty matches
467 flags can contain
468 match_condassert - this is an assertion condition
469 match_cbegroup - this is the start of an unlimited repeat
470 group that can match an empty string
471 rdepth the recursion depth
472
473 Returns: MATCH_MATCH if matched ) these values are >= 0
474 MATCH_NOMATCH if failed to match )
475 a negative MATCH_xxx value for PRUNE, SKIP, etc
476 a negative PCRE_ERROR_xxx value if aborted by an error condition
477 (e.g. stopped by repeated call or recursion limit)
478 */
479
480 static int
481 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
482 const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
483 eptrblock *eptrb, int flags, unsigned int rdepth)
484 {
485 /* These variables do not need to be preserved over recursion in this function,
486 so they can be ordinary variables in all cases. Mark some of them with
487 "register" because they are used a lot in loops. */
488
489 register int rrc; /* Returns from recursive calls */
490 register int i; /* Used for loops not involving calls to RMATCH() */
491 register unsigned int c; /* Character values not kept over RMATCH() calls */
492 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
493
494 BOOL minimize, possessive; /* Quantifier options */
495 int condcode;
496
497 /* When recursion is not being used, all "local" variables that have to be
498 preserved over calls to RMATCH() are part of a "frame" which is obtained from
499 heap storage. Set up the top-level frame here; others are obtained from the
500 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
501
502 #ifdef NO_RECURSE
503 heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
504 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
505 frame->Xprevframe = NULL; /* Marks the top level */
506
507 /* Copy in the original argument variables */
508
509 frame->Xeptr = eptr;
510 frame->Xecode = ecode;
511 frame->Xmstart = mstart;
512 frame->Xmarkptr = markptr;
513 frame->Xoffset_top = offset_top;
514 frame->Xims = ims;
515 frame->Xeptrb = eptrb;
516 frame->Xflags = flags;
517 frame->Xrdepth = rdepth;
518
519 /* This is where control jumps back to to effect "recursion" */
520
521 HEAP_RECURSE:
522
523 /* Macros make the argument variables come from the current frame */
524
525 #define eptr frame->Xeptr
526 #define ecode frame->Xecode
527 #define mstart frame->Xmstart
528 #define markptr frame->Xmarkptr
529 #define offset_top frame->Xoffset_top
530 #define ims frame->Xims
531 #define eptrb frame->Xeptrb
532 #define flags frame->Xflags
533 #define rdepth frame->Xrdepth
534
535 /* Ditto for the local variables */
536
537 #ifdef SUPPORT_UTF8
538 #define charptr frame->Xcharptr
539 #endif
540 #define callpat frame->Xcallpat
541 #define codelink frame->Xcodelink
542 #define data frame->Xdata
543 #define next frame->Xnext
544 #define pp frame->Xpp
545 #define prev frame->Xprev
546 #define saved_eptr frame->Xsaved_eptr
547
548 #define new_recursive frame->Xnew_recursive
549
550 #define cur_is_word frame->Xcur_is_word
551 #define condition frame->Xcondition
552 #define prev_is_word frame->Xprev_is_word
553
554 #define original_ims frame->Xoriginal_ims
555
556 #ifdef SUPPORT_UCP
557 #define prop_type frame->Xprop_type
558 #define prop_value frame->Xprop_value
559 #define prop_fail_result frame->Xprop_fail_result
560 #define prop_category frame->Xprop_category
561 #define prop_chartype frame->Xprop_chartype
562 #define prop_script frame->Xprop_script
563 #define oclength frame->Xoclength
564 #define occhars frame->Xocchars
565 #endif
566
567 #define ctype frame->Xctype
568 #define fc frame->Xfc
569 #define fi frame->Xfi
570 #define length frame->Xlength
571 #define max frame->Xmax
572 #define min frame->Xmin
573 #define number frame->Xnumber
574 #define offset frame->Xoffset
575 #define op frame->Xop
576 #define save_capture_last frame->Xsave_capture_last
577 #define save_offset1 frame->Xsave_offset1
578 #define save_offset2 frame->Xsave_offset2
579 #define save_offset3 frame->Xsave_offset3
580 #define stacksave frame->Xstacksave
581
582 #define newptrb frame->Xnewptrb
583
584 /* When recursion is being used, local variables are allocated on the stack and
585 get preserved during recursion in the normal way. In this environment, fi and
586 i, and fc and c, can be the same variables. */
587
588 #else /* NO_RECURSE not defined */
589 #define fi i
590 #define fc c
591
592
593 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
594 const uschar *charptr; /* in small blocks of the code. My normal */
595 #endif /* style of coding would have declared */
596 const uschar *callpat; /* them within each of those blocks. */
597 const uschar *data; /* However, in order to accommodate the */
598 const uschar *next; /* version of this code that uses an */
599 USPTR pp; /* external "stack" implemented on the */
600 const uschar *prev; /* heap, it is easier to declare them all */
601 USPTR saved_eptr; /* here, so the declarations can be cut */
602 /* out in a block. The only declarations */
603 recursion_info new_recursive; /* within blocks below are for variables */
604 /* that do not have to be preserved over */
605 BOOL cur_is_word; /* a recursive call to RMATCH(). */
606 BOOL condition;
607 BOOL prev_is_word;
608
609 unsigned long int original_ims;
610
611 #ifdef SUPPORT_UCP
612 int prop_type;
613 int prop_value;
614 int prop_fail_result;
615 int prop_category;
616 int prop_chartype;
617 int prop_script;
618 int oclength;
619 uschar occhars[8];
620 #endif
621
622 int codelink;
623 int ctype;
624 int length;
625 int max;
626 int min;
627 int number;
628 int offset;
629 int op;
630 int save_capture_last;
631 int save_offset1, save_offset2, save_offset3;
632 int stacksave[REC_STACK_SAVE_MAX];
633
634 eptrblock newptrb;
635 #endif /* NO_RECURSE */
636
637 /* These statements are here to stop the compiler complaining about unitialized
638 variables. */
639
640 #ifdef SUPPORT_UCP
641 prop_value = 0;
642 prop_fail_result = 0;
643 #endif
644
645
646 /* This label is used for tail recursion, which is used in a few cases even
647 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
648 used. Thanks to Ian Taylor for noticing this possibility and sending the
649 original patch. */
650
651 TAIL_RECURSE:
652
653 /* OK, now we can get on with the real code of the function. Recursive calls
654 are specified by the macro RMATCH and RRETURN is used to return. When
655 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
656 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
657 defined). However, RMATCH isn't like a function call because it's quite a
658 complicated macro. It has to be used in one particular way. This shouldn't,
659 however, impact performance when true recursion is being used. */
660
661 #ifdef SUPPORT_UTF8
662 utf8 = md->utf8; /* Local copy of the flag */
663 #else
664 utf8 = FALSE;
665 #endif
666
667 /* First check that we haven't called match() too many times, or that we
668 haven't exceeded the recursive call limit. */
669
670 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
671 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
672
673 original_ims = ims; /* Save for resetting on ')' */
674
675 /* At the start of a group with an unlimited repeat that may match an empty
676 string, the match_cbegroup flag is set. When this is the case, add the current
677 subject pointer to the chain of such remembered pointers, to be checked when we
678 hit the closing ket, in order to break infinite loops that match no characters.
679 When match() is called in other circumstances, don't add to the chain. The
680 match_cbegroup flag must NOT be used with tail recursion, because the memory
681 block that is used is on the stack, so a new one may be required for each
682 match(). */
683
684 if ((flags & match_cbegroup) != 0)
685 {
686 newptrb.epb_saved_eptr = eptr;
687 newptrb.epb_prev = eptrb;
688 eptrb = &newptrb;
689 }
690
691 /* Now start processing the opcodes. */
692
693 for (;;)
694 {
695 minimize = possessive = FALSE;
696 op = *ecode;
697
698 switch(op)
699 {
700 case OP_MARK:
701 markptr = ecode + 2;
702 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
703 ims, eptrb, flags, RM55);
704
705 /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
706 argument, and we must check whether that argument matches this MARK's
707 argument. It is passed back in md->start_match_ptr (an overloading of that
708 variable). If it does match, we reset that variable to the current subject
709 position and return MATCH_SKIP. Otherwise, pass back the return code
710 unaltered. */
711
712 if (rrc == MATCH_SKIP_ARG &&
713 strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
714 {
715 md->start_match_ptr = eptr;
716 RRETURN(MATCH_SKIP);
717 }
718
719 if (md->mark == NULL) md->mark = markptr;
720 RRETURN(rrc);
721
722 case OP_FAIL:
723 MRRETURN(MATCH_NOMATCH);
724
725 /* COMMIT overrides PRUNE, SKIP, and THEN */
726
727 case OP_COMMIT:
728 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
729 ims, eptrb, flags, RM52);
730 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
731 rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
732 rrc != MATCH_THEN)
733 RRETURN(rrc);
734 MRRETURN(MATCH_COMMIT);
735
736 /* PRUNE overrides THEN */
737
738 case OP_PRUNE:
739 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
740 ims, eptrb, flags, RM51);
741 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
742 MRRETURN(MATCH_PRUNE);
743
744 case OP_PRUNE_ARG:
745 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
746 ims, eptrb, flags, RM56);
747 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
748 md->mark = ecode + 2;
749 RRETURN(MATCH_PRUNE);
750
751 /* SKIP overrides PRUNE and THEN */
752
753 case OP_SKIP:
754 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
755 ims, eptrb, flags, RM53);
756 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
757 RRETURN(rrc);
758 md->start_match_ptr = eptr; /* Pass back current position */
759 MRRETURN(MATCH_SKIP);
760
761 case OP_SKIP_ARG:
762 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
763 ims, eptrb, flags, RM57);
764 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
765 RRETURN(rrc);
766
767 /* Pass back the current skip name by overloading md->start_match_ptr and
768 returning the special MATCH_SKIP_ARG return code. This will either be
769 caught by a matching MARK, or get to the top, where it is treated the same
770 as PRUNE. */
771
772 md->start_match_ptr = ecode + 2;
773 RRETURN(MATCH_SKIP_ARG);
774
775 /* For THEN (and THEN_ARG) we pass back the address of the bracket or
776 the alt that is at the start of the current branch. This makes it possible
777 to skip back past alternatives that precede the THEN within the current
778 branch. */
779
780 case OP_THEN:
781 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
782 ims, eptrb, flags, RM54);
783 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
784 md->start_match_ptr = ecode - GET(ecode, 1);
785 MRRETURN(MATCH_THEN);
786
787 case OP_THEN_ARG:
788 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
789 offset_top, md, ims, eptrb, flags, RM58);
790 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
791 md->start_match_ptr = ecode - GET(ecode, 1);
792 md->mark = ecode + LINK_SIZE + 2;
793 RRETURN(MATCH_THEN);
794
795 /* Handle a capturing bracket. If there is space in the offset vector, save
796 the current subject position in the working slot at the top of the vector.
797 We mustn't change the current values of the data slot, because they may be
798 set from a previous iteration of this group, and be referred to by a
799 reference inside the group.
800
801 If the bracket fails to match, we need to restore this value and also the
802 values of the final offsets, in case they were set by a previous iteration
803 of the same bracket.
804
805 If there isn't enough space in the offset vector, treat this as if it were
806 a non-capturing bracket. Don't worry about setting the flag for the error
807 case here; that is handled in the code for KET. */
808
809 case OP_CBRA:
810 case OP_SCBRA:
811 number = GET2(ecode, 1+LINK_SIZE);
812 offset = number << 1;
813
814 #ifdef PCRE_DEBUG
815 printf("start bracket %d\n", number);
816 printf("subject=");
817 pchars(eptr, 16, TRUE, md);
818 printf("\n");
819 #endif
820
821 if (offset < md->offset_max)
822 {
823 save_offset1 = md->offset_vector[offset];
824 save_offset2 = md->offset_vector[offset+1];
825 save_offset3 = md->offset_vector[md->offset_end - number];
826 save_capture_last = md->capture_last;
827
828 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
829 md->offset_vector[md->offset_end - number] =
830 (int)(eptr - md->start_subject);
831
832 flags = (op == OP_SCBRA)? match_cbegroup : 0;
833 do
834 {
835 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
836 ims, eptrb, flags, RM1);
837 if (rrc != MATCH_NOMATCH &&
838 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
839 RRETURN(rrc);
840 md->capture_last = save_capture_last;
841 ecode += GET(ecode, 1);
842 }
843 while (*ecode == OP_ALT);
844
845 DPRINTF(("bracket %d failed\n", number));
846
847 md->offset_vector[offset] = save_offset1;
848 md->offset_vector[offset+1] = save_offset2;
849 md->offset_vector[md->offset_end - number] = save_offset3;
850
851 if (rrc != MATCH_THEN) md->mark = markptr;
852 RRETURN(MATCH_NOMATCH);
853 }
854
855 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
856 as a non-capturing bracket. */
857
858 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
859 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
860
861 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
862
863 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
864 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
865
866 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
867 final alternative within the brackets, we would return the result of a
868 recursive call to match() whatever happened. We can reduce stack usage by
869 turning this into a tail recursion, except in the case when match_cbegroup
870 is set.*/
871
872 case OP_BRA:
873 case OP_SBRA:
874 DPRINTF(("start non-capturing bracket\n"));
875 flags = (op >= OP_SBRA)? match_cbegroup : 0;
876 for (;;)
877 {
878 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
879 {
880 if (flags == 0) /* Not a possibly empty group */
881 {
882 ecode += _pcre_OP_lengths[*ecode];
883 DPRINTF(("bracket 0 tail recursion\n"));
884 goto TAIL_RECURSE;
885 }
886
887 /* Possibly empty group; can't use tail recursion. */
888
889 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
890 eptrb, flags, RM48);
891 if (rrc == MATCH_NOMATCH) md->mark = markptr;
892 RRETURN(rrc);
893 }
894
895 /* For non-final alternatives, continue the loop for a NOMATCH result;
896 otherwise return. */
897
898 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
899 eptrb, flags, RM2);
900 if (rrc != MATCH_NOMATCH &&
901 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
902 RRETURN(rrc);
903 ecode += GET(ecode, 1);
904 }
905 /* Control never reaches here. */
906
907 /* Conditional group: compilation checked that there are no more than
908 two branches. If the condition is false, skipping the first branch takes us
909 past the end if there is only one branch, but that's OK because that is
910 exactly what going to the ket would do. As there is only one branch to be
911 obeyed, we can use tail recursion to avoid using another stack frame. */
912
913 case OP_COND:
914 case OP_SCOND:
915 codelink= GET(ecode, 1);
916
917 /* Because of the way auto-callout works during compile, a callout item is
918 inserted between OP_COND and an assertion condition. */
919
920 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
921 {
922 if (pcre_callout != NULL)
923 {
924 pcre_callout_block cb;
925 cb.version = 1; /* Version 1 of the callout block */
926 cb.callout_number = ecode[LINK_SIZE+2];
927 cb.offset_vector = md->offset_vector;
928 cb.subject = (PCRE_SPTR)md->start_subject;
929 cb.subject_length = (int)(md->end_subject - md->start_subject);
930 cb.start_match = (int)(mstart - md->start_subject);
931 cb.current_position = (int)(eptr - md->start_subject);
932 cb.pattern_position = GET(ecode, LINK_SIZE + 3);
933 cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
934 cb.capture_top = offset_top/2;
935 cb.capture_last = md->capture_last;
936 cb.callout_data = md->callout_data;
937 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
938 if (rrc < 0) RRETURN(rrc);
939 }
940 ecode += _pcre_OP_lengths[OP_CALLOUT];
941 }
942
943 condcode = ecode[LINK_SIZE+1];
944
945 /* Now see what the actual condition is */
946
947 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
948 {
949 if (md->recursive == NULL) /* Not recursing => FALSE */
950 {
951 condition = FALSE;
952 ecode += GET(ecode, 1);
953 }
954 else
955 {
956 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
957 condition = (recno == RREF_ANY || recno == md->recursive->group_num);
958
959 /* If the test is for recursion into a specific subpattern, and it is
960 false, but the test was set up by name, scan the table to see if the
961 name refers to any other numbers, and test them. The condition is true
962 if any one is set. */
963
964 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
965 {
966 uschar *slotA = md->name_table;
967 for (i = 0; i < md->name_count; i++)
968 {
969 if (GET2(slotA, 0) == recno) break;
970 slotA += md->name_entry_size;
971 }
972
973 /* Found a name for the number - there can be only one; duplicate
974 names for different numbers are allowed, but not vice versa. First
975 scan down for duplicates. */
976
977 if (i < md->name_count)
978 {
979 uschar *slotB = slotA;
980 while (slotB > md->name_table)
981 {
982 slotB -= md->name_entry_size;
983 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
984 {
985 condition = GET2(slotB, 0) == md->recursive->group_num;
986 if (condition) break;
987 }
988 else break;
989 }
990
991 /* Scan up for duplicates */
992
993 if (!condition)
994 {
995 slotB = slotA;
996 for (i++; i < md->name_count; i++)
997 {
998 slotB += md->name_entry_size;
999 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1000 {
1001 condition = GET2(slotB, 0) == md->recursive->group_num;
1002 if (condition) break;
1003 }
1004 else break;
1005 }
1006 }
1007 }
1008 }
1009
1010 /* Chose branch according to the condition */
1011
1012 ecode += condition? 3 : GET(ecode, 1);
1013 }
1014 }
1015
1016 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
1017 {
1018 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
1019 condition = offset < offset_top && md->offset_vector[offset] >= 0;
1020
1021 /* If the numbered capture is unset, but the reference was by name,
1022 scan the table to see if the name refers to any other numbers, and test
1023 them. The condition is true if any one is set. This is tediously similar
1024 to the code above, but not close enough to try to amalgamate. */
1025
1026 if (!condition && condcode == OP_NCREF)
1027 {
1028 int refno = offset >> 1;
1029 uschar *slotA = md->name_table;
1030
1031 for (i = 0; i < md->name_count; i++)
1032 {
1033 if (GET2(slotA, 0) == refno) break;
1034 slotA += md->name_entry_size;
1035 }
1036
1037 /* Found a name for the number - there can be only one; duplicate names
1038 for different numbers are allowed, but not vice versa. First scan down
1039 for duplicates. */
1040
1041 if (i < md->name_count)
1042 {
1043 uschar *slotB = slotA;
1044 while (slotB > md->name_table)
1045 {
1046 slotB -= md->name_entry_size;
1047 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1048 {
1049 offset = GET2(slotB, 0) << 1;
1050 condition = offset < offset_top &&
1051 md->offset_vector[offset] >= 0;
1052 if (condition) break;
1053 }
1054 else break;
1055 }
1056
1057 /* Scan up for duplicates */
1058
1059 if (!condition)
1060 {
1061 slotB = slotA;
1062 for (i++; i < md->name_count; i++)
1063 {
1064 slotB += md->name_entry_size;
1065 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1066 {
1067 offset = GET2(slotB, 0) << 1;
1068 condition = offset < offset_top &&
1069 md->offset_vector[offset] >= 0;
1070 if (condition) break;
1071 }
1072 else break;
1073 }
1074 }
1075 }
1076 }
1077
1078 /* Chose branch according to the condition */
1079
1080 ecode += condition? 3 : GET(ecode, 1);
1081 }
1082
1083 else if (condcode == OP_DEF) /* DEFINE - always false */
1084 {
1085 condition = FALSE;
1086 ecode += GET(ecode, 1);
1087 }
1088
1089 /* The condition is an assertion. Call match() to evaluate it - setting
1090 the final argument match_condassert causes it to stop at the end of an
1091 assertion. */
1092
1093 else
1094 {
1095 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1096 match_condassert, RM3);
1097 if (rrc == MATCH_MATCH)
1098 {
1099 condition = TRUE;
1100 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1101 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1102 }
1103 else if (rrc != MATCH_NOMATCH &&
1104 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1105 {
1106 RRETURN(rrc); /* Need braces because of following else */
1107 }
1108 else
1109 {
1110 condition = FALSE;
1111 ecode += codelink;
1112 }
1113 }
1114
1115 /* We are now at the branch that is to be obeyed. As there is only one,
1116 we can use tail recursion to avoid using another stack frame, except when
1117 match_cbegroup is required for an unlimited repeat of a possibly empty
1118 group. If the second alternative doesn't exist, we can just plough on. */
1119
1120 if (condition || *ecode == OP_ALT)
1121 {
1122 ecode += 1 + LINK_SIZE;
1123 if (op == OP_SCOND) /* Possibly empty group */
1124 {
1125 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1126 RRETURN(rrc);
1127 }
1128 else /* Group must match something */
1129 {
1130 flags = 0;
1131 goto TAIL_RECURSE;
1132 }
1133 }
1134 else /* Condition false & no alternative */
1135 {
1136 ecode += 1 + LINK_SIZE;
1137 }
1138 break;
1139
1140
1141 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1142 to close any currently open capturing brackets. */
1143
1144 case OP_CLOSE:
1145 number = GET2(ecode, 1);
1146 offset = number << 1;
1147
1148 #ifdef PCRE_DEBUG
1149 printf("end bracket %d at *ACCEPT", number);
1150 printf("\n");
1151 #endif
1152
1153 md->capture_last = number;
1154 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1155 {
1156 md->offset_vector[offset] =
1157 md->offset_vector[md->offset_end - number];
1158 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1159 if (offset_top <= offset) offset_top = offset + 2;
1160 }
1161 ecode += 3;
1162 break;
1163
1164
1165 /* End of the pattern, either real or forced. If we are in a top-level
1166 recursion, we should restore the offsets appropriately and continue from
1167 after the call. */
1168
1169 case OP_ACCEPT:
1170 case OP_END:
1171 if (md->recursive != NULL && md->recursive->group_num == 0)
1172 {
1173 recursion_info *rec = md->recursive;
1174 DPRINTF(("End of pattern in a (?0) recursion\n"));
1175 md->recursive = rec->prevrec;
1176 memmove(md->offset_vector, rec->offset_save,
1177 rec->saved_max * sizeof(int));
1178 offset_top = rec->save_offset_top;
1179 ims = original_ims;
1180 ecode = rec->after_call;
1181 break;
1182 }
1183
1184 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1185 set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1186 the subject. In both cases, backtracking will then try other alternatives,
1187 if any. */
1188
1189 if (eptr == mstart &&
1190 (md->notempty ||
1191 (md->notempty_atstart &&
1192 mstart == md->start_subject + md->start_offset)))
1193 MRRETURN(MATCH_NOMATCH);
1194
1195 /* Otherwise, we have a match. */
1196
1197 md->end_match_ptr = eptr; /* Record where we ended */
1198 md->end_offset_top = offset_top; /* and how many extracts were taken */
1199 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1200
1201 /* For some reason, the macros don't work properly if an expression is
1202 given as the argument to MRRETURN when the heap is in use. */
1203
1204 rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1205 MRRETURN(rrc);
1206
1207 /* Change option settings */
1208
1209 case OP_OPT:
1210 ims = ecode[1];
1211 ecode += 2;
1212 DPRINTF(("ims set to %02lx\n", ims));
1213 break;
1214
1215 /* Assertion brackets. Check the alternative branches in turn - the
1216 matching won't pass the KET for an assertion. If any one branch matches,
1217 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1218 start of each branch to move the current point backwards, so the code at
1219 this level is identical to the lookahead case. */
1220
1221 case OP_ASSERT:
1222 case OP_ASSERTBACK:
1223 do
1224 {
1225 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1226 RM4);
1227 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1228 {
1229 mstart = md->start_match_ptr; /* In case \K reset it */
1230 break;
1231 }
1232 if (rrc != MATCH_NOMATCH &&
1233 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1234 RRETURN(rrc);
1235 ecode += GET(ecode, 1);
1236 }
1237 while (*ecode == OP_ALT);
1238 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1239
1240 /* If checking an assertion for a condition, return MATCH_MATCH. */
1241
1242 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1243
1244 /* Continue from after the assertion, updating the offsets high water
1245 mark, since extracts may have been taken during the assertion. */
1246
1247 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1248 ecode += 1 + LINK_SIZE;
1249 offset_top = md->end_offset_top;
1250 continue;
1251
1252 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1253 PRUNE, or COMMIT means we must assume failure without checking subsequent
1254 branches. */
1255
1256 case OP_ASSERT_NOT:
1257 case OP_ASSERTBACK_NOT:
1258 do
1259 {
1260 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1261 RM5);
1262 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1263 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1264 {
1265 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1266 break;
1267 }
1268 if (rrc != MATCH_NOMATCH &&
1269 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1270 RRETURN(rrc);
1271 ecode += GET(ecode,1);
1272 }
1273 while (*ecode == OP_ALT);
1274
1275 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1276
1277 ecode += 1 + LINK_SIZE;
1278 continue;
1279
1280 /* Move the subject pointer back. This occurs only at the start of
1281 each branch of a lookbehind assertion. If we are too close to the start to
1282 move back, this match function fails. When working with UTF-8 we move
1283 back a number of characters, not bytes. */
1284
1285 case OP_REVERSE:
1286 #ifdef SUPPORT_UTF8
1287 if (utf8)
1288 {
1289 i = GET(ecode, 1);
1290 while (i-- > 0)
1291 {
1292 eptr--;
1293 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1294 BACKCHAR(eptr);
1295 }
1296 }
1297 else
1298 #endif
1299
1300 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1301
1302 {
1303 eptr -= GET(ecode, 1);
1304 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1305 }
1306
1307 /* Save the earliest consulted character, then skip to next op code */
1308
1309 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1310 ecode += 1 + LINK_SIZE;
1311 break;
1312
1313 /* The callout item calls an external function, if one is provided, passing
1314 details of the match so far. This is mainly for debugging, though the
1315 function is able to force a failure. */
1316
1317 case OP_CALLOUT:
1318 if (pcre_callout != NULL)
1319 {
1320 pcre_callout_block cb;
1321 cb.version = 1; /* Version 1 of the callout block */
1322 cb.callout_number = ecode[1];
1323 cb.offset_vector = md->offset_vector;
1324 cb.subject = (PCRE_SPTR)md->start_subject;
1325 cb.subject_length = (int)(md->end_subject - md->start_subject);
1326 cb.start_match = (int)(mstart - md->start_subject);
1327 cb.current_position = (int)(eptr - md->start_subject);
1328 cb.pattern_position = GET(ecode, 2);
1329 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1330 cb.capture_top = offset_top/2;
1331 cb.capture_last = md->capture_last;
1332 cb.callout_data = md->callout_data;
1333 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1334 if (rrc < 0) RRETURN(rrc);
1335 }
1336 ecode += 2 + 2*LINK_SIZE;
1337 break;
1338
1339 /* Recursion either matches the current regex, or some subexpression. The
1340 offset data is the offset to the starting bracket from the start of the
1341 whole pattern. (This is so that it works from duplicated subpatterns.)
1342
1343 If there are any capturing brackets started but not finished, we have to
1344 save their starting points and reinstate them after the recursion. However,
1345 we don't know how many such there are (offset_top records the completed
1346 total) so we just have to save all the potential data. There may be up to
1347 65535 such values, which is too large to put on the stack, but using malloc
1348 for small numbers seems expensive. As a compromise, the stack is used when
1349 there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1350 is used. A problem is what to do if the malloc fails ... there is no way of
1351 returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1352 values on the stack, and accept that the rest may be wrong.
1353
1354 There are also other values that have to be saved. We use a chained
1355 sequence of blocks that actually live on the stack. Thanks to Robin Houston
1356 for the original version of this logic. */
1357
1358 case OP_RECURSE:
1359 {
1360 callpat = md->start_code + GET(ecode, 1);
1361 new_recursive.group_num = (callpat == md->start_code)? 0 :
1362 GET2(callpat, 1 + LINK_SIZE);
1363
1364 /* Add to "recursing stack" */
1365
1366 new_recursive.prevrec = md->recursive;
1367 md->recursive = &new_recursive;
1368
1369 /* Find where to continue from afterwards */
1370
1371 ecode += 1 + LINK_SIZE;
1372 new_recursive.after_call = ecode;
1373
1374 /* Now save the offset data. */
1375
1376 new_recursive.saved_max = md->offset_end;
1377 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1378 new_recursive.offset_save = stacksave;
1379 else
1380 {
1381 new_recursive.offset_save =
1382 (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1383 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1384 }
1385
1386 memcpy(new_recursive.offset_save, md->offset_vector,
1387 new_recursive.saved_max * sizeof(int));
1388 new_recursive.save_offset_top = offset_top;
1389
1390 /* OK, now we can do the recursion. For each top-level alternative we
1391 restore the offset and recursion data. */
1392
1393 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1394 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1395 do
1396 {
1397 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1398 md, ims, eptrb, flags, RM6);
1399 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1400 {
1401 DPRINTF(("Recursion matched\n"));
1402 md->recursive = new_recursive.prevrec;
1403 if (new_recursive.offset_save != stacksave)
1404 (pcre_free)(new_recursive.offset_save);
1405 MRRETURN(MATCH_MATCH);
1406 }
1407 else if (rrc != MATCH_NOMATCH &&
1408 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1409 {
1410 DPRINTF(("Recursion gave error %d\n", rrc));
1411 if (new_recursive.offset_save != stacksave)
1412 (pcre_free)(new_recursive.offset_save);
1413 RRETURN(rrc);
1414 }
1415
1416 md->recursive = &new_recursive;
1417 memcpy(md->offset_vector, new_recursive.offset_save,
1418 new_recursive.saved_max * sizeof(int));
1419 callpat += GET(callpat, 1);
1420 }
1421 while (*callpat == OP_ALT);
1422
1423 DPRINTF(("Recursion didn't match\n"));
1424 md->recursive = new_recursive.prevrec;
1425 if (new_recursive.offset_save != stacksave)
1426 (pcre_free)(new_recursive.offset_save);
1427 MRRETURN(MATCH_NOMATCH);
1428 }
1429 /* Control never reaches here */
1430
1431 /* "Once" brackets are like assertion brackets except that after a match,
1432 the point in the subject string is not moved back. Thus there can never be
1433 a move back into the brackets. Friedl calls these "atomic" subpatterns.
1434 Check the alternative branches in turn - the matching won't pass the KET
1435 for this kind of subpattern. If any one branch matches, we carry on as at
1436 the end of a normal bracket, leaving the subject pointer, but resetting
1437 the start-of-match value in case it was changed by \K. */
1438
1439 case OP_ONCE:
1440 prev = ecode;
1441 saved_eptr = eptr;
1442
1443 do
1444 {
1445 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1446 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1447 {
1448 mstart = md->start_match_ptr;
1449 break;
1450 }
1451 if (rrc != MATCH_NOMATCH &&
1452 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1453 RRETURN(rrc);
1454 ecode += GET(ecode,1);
1455 }
1456 while (*ecode == OP_ALT);
1457
1458 /* If hit the end of the group (which could be repeated), fail */
1459
1460 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1461
1462 /* Continue as from after the assertion, updating the offsets high water
1463 mark, since extracts may have been taken. */
1464
1465 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1466
1467 offset_top = md->end_offset_top;
1468 eptr = md->end_match_ptr;
1469
1470 /* For a non-repeating ket, just continue at this level. This also
1471 happens for a repeating ket if no characters were matched in the group.
1472 This is the forcible breaking of infinite loops as implemented in Perl
1473 5.005. If there is an options reset, it will get obeyed in the normal
1474 course of events. */
1475
1476 if (*ecode == OP_KET || eptr == saved_eptr)
1477 {
1478 ecode += 1+LINK_SIZE;
1479 break;
1480 }
1481
1482 /* The repeating kets try the rest of the pattern or restart from the
1483 preceding bracket, in the appropriate order. The second "call" of match()
1484 uses tail recursion, to avoid using another stack frame. We need to reset
1485 any options that changed within the bracket before re-running it, so
1486 check the next opcode. */
1487
1488 if (ecode[1+LINK_SIZE] == OP_OPT)
1489 {
1490 ims = (ims & ~PCRE_IMS) | ecode[4];
1491 DPRINTF(("ims set to %02lx at group repeat\n", ims));
1492 }
1493
1494 if (*ecode == OP_KETRMIN)
1495 {
1496 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1497 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1498 ecode = prev;
1499 flags = 0;
1500 goto TAIL_RECURSE;
1501 }
1502 else /* OP_KETRMAX */
1503 {
1504 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1505 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1506 ecode += 1 + LINK_SIZE;
1507 flags = 0;
1508 goto TAIL_RECURSE;
1509 }
1510 /* Control never gets here */
1511
1512 /* An alternation is the end of a branch; scan along to find the end of the
1513 bracketed group and go to there. */
1514
1515 case OP_ALT:
1516 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1517 break;
1518
1519 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1520 indicating that it may occur zero times. It may repeat infinitely, or not
1521 at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1522 with fixed upper repeat limits are compiled as a number of copies, with the
1523 optional ones preceded by BRAZERO or BRAMINZERO. */
1524
1525 case OP_BRAZERO:
1526 {
1527 next = ecode+1;
1528 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1529 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1530 do next += GET(next,1); while (*next == OP_ALT);
1531 ecode = next + 1 + LINK_SIZE;
1532 }
1533 break;
1534
1535 case OP_BRAMINZERO:
1536 {
1537 next = ecode+1;
1538 do next += GET(next, 1); while (*next == OP_ALT);
1539 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1540 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1541 ecode++;
1542 }
1543 break;
1544
1545 case OP_SKIPZERO:
1546 {
1547 next = ecode+1;
1548 do next += GET(next,1); while (*next == OP_ALT);
1549 ecode = next + 1 + LINK_SIZE;
1550 }
1551 break;
1552
1553 /* End of a group, repeated or non-repeating. */
1554
1555 case OP_KET:
1556 case OP_KETRMIN:
1557 case OP_KETRMAX:
1558 prev = ecode - GET(ecode, 1);
1559
1560 /* If this was a group that remembered the subject start, in order to break
1561 infinite repeats of empty string matches, retrieve the subject start from
1562 the chain. Otherwise, set it NULL. */
1563
1564 if (*prev >= OP_SBRA)
1565 {
1566 saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1567 eptrb = eptrb->epb_prev; /* Backup to previous group */
1568 }
1569 else saved_eptr = NULL;
1570
1571 /* If we are at the end of an assertion group or an atomic group, stop
1572 matching and return MATCH_MATCH, but record the current high water mark for
1573 use by positive assertions. We also need to record the match start in case
1574 it was changed by \K. */
1575
1576 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1577 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1578 *prev == OP_ONCE)
1579 {
1580 md->end_match_ptr = eptr; /* For ONCE */
1581 md->end_offset_top = offset_top;
1582 md->start_match_ptr = mstart;
1583 MRRETURN(MATCH_MATCH);
1584 }
1585
1586 /* For capturing groups we have to check the group number back at the start
1587 and if necessary complete handling an extraction by setting the offsets and
1588 bumping the high water mark. Note that whole-pattern recursion is coded as
1589 a recurse into group 0, so it won't be picked up here. Instead, we catch it
1590 when the OP_END is reached. Other recursion is handled here. */
1591
1592 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1593 {
1594 number = GET2(prev, 1+LINK_SIZE);
1595 offset = number << 1;
1596
1597 #ifdef PCRE_DEBUG
1598 printf("end bracket %d", number);
1599 printf("\n");
1600 #endif
1601
1602 md->capture_last = number;
1603 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1604 {
1605 md->offset_vector[offset] =
1606 md->offset_vector[md->offset_end - number];
1607 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1608 if (offset_top <= offset) offset_top = offset + 2;
1609 }
1610
1611 /* Handle a recursively called group. Restore the offsets
1612 appropriately and continue from after the call. */
1613
1614 if (md->recursive != NULL && md->recursive->group_num == number)
1615 {
1616 recursion_info *rec = md->recursive;
1617 DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1618 md->recursive = rec->prevrec;
1619 memcpy(md->offset_vector, rec->offset_save,
1620 rec->saved_max * sizeof(int));
1621 offset_top = rec->save_offset_top;
1622 ecode = rec->after_call;
1623 ims = original_ims;
1624 break;
1625 }
1626 }
1627
1628 /* For both capturing and non-capturing groups, reset the value of the ims
1629 flags, in case they got changed during the group. */
1630
1631 ims = original_ims;
1632 DPRINTF(("ims reset to %02lx\n", ims));
1633
1634 /* For a non-repeating ket, just continue at this level. This also
1635 happens for a repeating ket if no characters were matched in the group.
1636 This is the forcible breaking of infinite loops as implemented in Perl
1637 5.005. If there is an options reset, it will get obeyed in the normal
1638 course of events. */
1639
1640 if (*ecode == OP_KET || eptr == saved_eptr)
1641 {
1642 ecode += 1 + LINK_SIZE;
1643 break;
1644 }
1645
1646 /* The repeating kets try the rest of the pattern or restart from the
1647 preceding bracket, in the appropriate order. In the second case, we can use
1648 tail recursion to avoid using another stack frame, unless we have an
1649 unlimited repeat of a group that can match an empty string. */
1650
1651 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1652
1653 if (*ecode == OP_KETRMIN)
1654 {
1655 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1656 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1657 if (flags != 0) /* Could match an empty string */
1658 {
1659 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1660 RRETURN(rrc);
1661 }
1662 ecode = prev;
1663 goto TAIL_RECURSE;
1664 }
1665 else /* OP_KETRMAX */
1666 {
1667 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1668 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1669 ecode += 1 + LINK_SIZE;
1670 flags = 0;
1671 goto TAIL_RECURSE;
1672 }
1673 /* Control never gets here */
1674
1675 /* Start of subject unless notbol, or after internal newline if multiline */
1676
1677 case OP_CIRC:
1678 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1679 if ((ims & PCRE_MULTILINE) != 0)
1680 {
1681 if (eptr != md->start_subject &&
1682 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1683 MRRETURN(MATCH_NOMATCH);
1684 ecode++;
1685 break;
1686 }
1687 /* ... else fall through */
1688
1689 /* Start of subject assertion */
1690
1691 case OP_SOD:
1692 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1693 ecode++;
1694 break;
1695
1696 /* Start of match assertion */
1697
1698 case OP_SOM:
1699 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1700 ecode++;
1701 break;
1702
1703 /* Reset the start of match point */
1704
1705 case OP_SET_SOM:
1706 mstart = eptr;
1707 ecode++;
1708 break;
1709
1710 /* Assert before internal newline if multiline, or before a terminating
1711 newline unless endonly is set, else end of subject unless noteol is set. */
1712
1713 case OP_DOLL:
1714 if ((ims & PCRE_MULTILINE) != 0)
1715 {
1716 if (eptr < md->end_subject)
1717 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1718 else
1719 {
1720 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1721 SCHECK_PARTIAL();
1722 }
1723 ecode++;
1724 break;
1725 }
1726 else /* Not multiline */
1727 {
1728 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1729 if (!md->endonly) goto ASSERT_NL_OR_EOS;
1730 }
1731
1732 /* ... else fall through for endonly */
1733
1734 /* End of subject assertion (\z) */
1735
1736 case OP_EOD:
1737 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1738 SCHECK_PARTIAL();
1739 ecode++;
1740 break;
1741
1742 /* End of subject or ending \n assertion (\Z) */
1743
1744 case OP_EODN:
1745 ASSERT_NL_OR_EOS:
1746 if (eptr < md->end_subject &&
1747 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1748 MRRETURN(MATCH_NOMATCH);
1749
1750 /* Either at end of string or \n before end. */
1751
1752 SCHECK_PARTIAL();
1753 ecode++;
1754 break;
1755
1756 /* Word boundary assertions */
1757
1758 case OP_NOT_WORD_BOUNDARY:
1759 case OP_WORD_BOUNDARY:
1760 {
1761
1762 /* Find out if the previous and current characters are "word" characters.
1763 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1764 be "non-word" characters. Remember the earliest consulted character for
1765 partial matching. */
1766
1767 #ifdef SUPPORT_UTF8
1768 if (utf8)
1769 {
1770 /* Get status of previous character */
1771
1772 if (eptr == md->start_subject) prev_is_word = FALSE; else
1773 {
1774 USPTR lastptr = eptr - 1;
1775 while((*lastptr & 0xc0) == 0x80) lastptr--;
1776 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1777 GETCHAR(c, lastptr);
1778 #ifdef SUPPORT_UCP
1779 if (md->use_ucp)
1780 {
1781 if (c == '_') prev_is_word = TRUE; else
1782 {
1783 int cat = UCD_CATEGORY(c);
1784 prev_is_word = (cat == ucp_L || cat == ucp_N);
1785 }
1786 }
1787 else
1788 #endif
1789 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1790 }
1791
1792 /* Get status of next character */
1793
1794 if (eptr >= md->end_subject)
1795 {
1796 SCHECK_PARTIAL();
1797 cur_is_word = FALSE;
1798 }
1799 else
1800 {
1801 GETCHAR(c, eptr);
1802 #ifdef SUPPORT_UCP
1803 if (md->use_ucp)
1804 {
1805 if (c == '_') cur_is_word = TRUE; else
1806 {
1807 int cat = UCD_CATEGORY(c);
1808 cur_is_word = (cat == ucp_L || cat == ucp_N);
1809 }
1810 }
1811 else
1812 #endif
1813 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1814 }
1815 }
1816 else
1817 #endif
1818
1819 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1820 consistency with the behaviour of \w we do use it in this case. */
1821
1822 {
1823 /* Get status of previous character */
1824
1825 if (eptr == md->start_subject) prev_is_word = FALSE; else
1826 {
1827 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1828 #ifdef SUPPORT_UCP
1829 if (md->use_ucp)
1830 {
1831 c = eptr[-1];
1832 if (c == '_') prev_is_word = TRUE; else
1833 {
1834 int cat = UCD_CATEGORY(c);
1835 prev_is_word = (cat == ucp_L || cat == ucp_N);
1836 }
1837 }
1838 else
1839 #endif
1840 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1841 }
1842
1843 /* Get status of next character */
1844
1845 if (eptr >= md->end_subject)
1846 {
1847 SCHECK_PARTIAL();
1848 cur_is_word = FALSE;
1849 }
1850 else
1851 #ifdef SUPPORT_UCP
1852 if (md->use_ucp)
1853 {
1854 c = *eptr;
1855 if (c == '_') cur_is_word = TRUE; else
1856 {
1857 int cat = UCD_CATEGORY(c);
1858 cur_is_word = (cat == ucp_L || cat == ucp_N);
1859 }
1860 }
1861 else
1862 #endif
1863 cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1864 }
1865
1866 /* Now see if the situation is what we want */
1867
1868 if ((*ecode++ == OP_WORD_BOUNDARY)?
1869 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1870 MRRETURN(MATCH_NOMATCH);
1871 }
1872 break;
1873
1874 /* Match a single character type; inline for speed */
1875
1876 case OP_ANY:
1877 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1878 /* Fall through */
1879
1880 case OP_ALLANY:
1881 if (eptr++ >= md->end_subject)
1882 {
1883 SCHECK_PARTIAL();
1884 MRRETURN(MATCH_NOMATCH);
1885 }
1886 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1887 ecode++;
1888 break;
1889
1890 /* Match a single byte, even in UTF-8 mode. This opcode really does match
1891 any byte, even newline, independent of the setting of PCRE_DOTALL. */
1892
1893 case OP_ANYBYTE:
1894 if (eptr++ >= md->end_subject)
1895 {
1896 SCHECK_PARTIAL();
1897 MRRETURN(MATCH_NOMATCH);
1898 }
1899 ecode++;
1900 break;
1901
1902 case OP_NOT_DIGIT:
1903 if (eptr >= md->end_subject)
1904 {
1905 SCHECK_PARTIAL();
1906 MRRETURN(MATCH_NOMATCH);
1907 }
1908 GETCHARINCTEST(c, eptr);
1909 if (
1910 #ifdef SUPPORT_UTF8
1911 c < 256 &&
1912 #endif
1913 (md->ctypes[c] & ctype_digit) != 0
1914 )
1915 MRRETURN(MATCH_NOMATCH);
1916 ecode++;
1917 break;
1918
1919 case OP_DIGIT:
1920 if (eptr >= md->end_subject)
1921 {
1922 SCHECK_PARTIAL();
1923 MRRETURN(MATCH_NOMATCH);
1924 }
1925 GETCHARINCTEST(c, eptr);
1926 if (
1927 #ifdef SUPPORT_UTF8
1928 c >= 256 ||
1929 #endif
1930 (md->ctypes[c] & ctype_digit) == 0
1931 )
1932 MRRETURN(MATCH_NOMATCH);
1933 ecode++;
1934 break;
1935
1936 case OP_NOT_WHITESPACE:
1937 if (eptr >= md->end_subject)
1938 {
1939 SCHECK_PARTIAL();
1940 MRRETURN(MATCH_NOMATCH);
1941 }
1942 GETCHARINCTEST(c, eptr);
1943 if (
1944 #ifdef SUPPORT_UTF8
1945 c < 256 &&
1946 #endif
1947 (md->ctypes[c] & ctype_space) != 0
1948 )
1949 MRRETURN(MATCH_NOMATCH);
1950 ecode++;
1951 break;
1952
1953 case OP_WHITESPACE:
1954 if (eptr >= md->end_subject)
1955 {
1956 SCHECK_PARTIAL();
1957 MRRETURN(MATCH_NOMATCH);
1958 }
1959 GETCHARINCTEST(c, eptr);
1960 if (
1961 #ifdef SUPPORT_UTF8
1962 c >= 256 ||
1963 #endif
1964 (md->ctypes[c] & ctype_space) == 0
1965 )
1966 MRRETURN(MATCH_NOMATCH);
1967 ecode++;
1968 break;
1969
1970 case OP_NOT_WORDCHAR:
1971 if (eptr >= md->end_subject)
1972 {
1973 SCHECK_PARTIAL();
1974 MRRETURN(MATCH_NOMATCH);
1975 }
1976 GETCHARINCTEST(c, eptr);
1977 if (
1978 #ifdef SUPPORT_UTF8
1979 c < 256 &&
1980 #endif
1981 (md->ctypes[c] & ctype_word) != 0
1982 )
1983 MRRETURN(MATCH_NOMATCH);
1984 ecode++;
1985 break;
1986
1987 case OP_WORDCHAR:
1988 if (eptr >= md->end_subject)
1989 {
1990 SCHECK_PARTIAL();
1991 MRRETURN(MATCH_NOMATCH);
1992 }
1993 GETCHARINCTEST(c, eptr);
1994 if (
1995 #ifdef SUPPORT_UTF8
1996 c >= 256 ||
1997 #endif
1998 (md->ctypes[c] & ctype_word) == 0
1999 )
2000 MRRETURN(MATCH_NOMATCH);
2001 ecode++;
2002 break;
2003
2004 case OP_ANYNL:
2005 if (eptr >= md->end_subject)
2006 {
2007 SCHECK_PARTIAL();
2008 MRRETURN(MATCH_NOMATCH);
2009 }
2010 GETCHARINCTEST(c, eptr);
2011 switch(c)
2012 {
2013 default: MRRETURN(MATCH_NOMATCH);
2014 case 0x000d:
2015 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2016 break;
2017
2018 case 0x000a:
2019 break;
2020
2021 case 0x000b:
2022 case 0x000c:
2023 case 0x0085:
2024 case 0x2028:
2025 case 0x2029:
2026 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2027 break;
2028 }
2029 ecode++;
2030 break;
2031
2032 case OP_NOT_HSPACE:
2033 if (eptr >= md->end_subject)
2034 {
2035 SCHECK_PARTIAL();
2036 MRRETURN(MATCH_NOMATCH);
2037 }
2038 GETCHARINCTEST(c, eptr);
2039 switch(c)
2040 {
2041 default: break;
2042 case 0x09: /* HT */
2043 case 0x20: /* SPACE */
2044 case 0xa0: /* NBSP */
2045 case 0x1680: /* OGHAM SPACE MARK */
2046 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2047 case 0x2000: /* EN QUAD */
2048 case 0x2001: /* EM QUAD */
2049 case 0x2002: /* EN SPACE */
2050 case 0x2003: /* EM SPACE */
2051 case 0x2004: /* THREE-PER-EM SPACE */
2052 case 0x2005: /* FOUR-PER-EM SPACE */
2053 case 0x2006: /* SIX-PER-EM SPACE */
2054 case 0x2007: /* FIGURE SPACE */
2055 case 0x2008: /* PUNCTUATION SPACE */
2056 case 0x2009: /* THIN SPACE */
2057 case 0x200A: /* HAIR SPACE */
2058 case 0x202f: /* NARROW NO-BREAK SPACE */
2059 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2060 case 0x3000: /* IDEOGRAPHIC SPACE */
2061 MRRETURN(MATCH_NOMATCH);
2062 }
2063 ecode++;
2064 break;
2065
2066 case OP_HSPACE:
2067 if (eptr >= md->end_subject)
2068 {
2069 SCHECK_PARTIAL();
2070 MRRETURN(MATCH_NOMATCH);
2071 }
2072 GETCHARINCTEST(c, eptr);
2073 switch(c)
2074 {
2075 default: MRRETURN(MATCH_NOMATCH);
2076 case 0x09: /* HT */
2077 case 0x20: /* SPACE */
2078 case 0xa0: /* NBSP */
2079 case 0x1680: /* OGHAM SPACE MARK */
2080 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2081 case 0x2000: /* EN QUAD */
2082 case 0x2001: /* EM QUAD */
2083 case 0x2002: /* EN SPACE */
2084 case 0x2003: /* EM SPACE */
2085 case 0x2004: /* THREE-PER-EM SPACE */
2086 case 0x2005: /* FOUR-PER-EM SPACE */
2087 case 0x2006: /* SIX-PER-EM SPACE */
2088 case 0x2007: /* FIGURE SPACE */
2089 case 0x2008: /* PUNCTUATION SPACE */
2090 case 0x2009: /* THIN SPACE */
2091 case 0x200A: /* HAIR SPACE */
2092 case 0x202f: /* NARROW NO-BREAK SPACE */
2093 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2094 case 0x3000: /* IDEOGRAPHIC SPACE */
2095 break;
2096 }
2097 ecode++;
2098 break;
2099
2100 case OP_NOT_VSPACE:
2101 if (eptr >= md->end_subject)
2102 {
2103 SCHECK_PARTIAL();
2104 MRRETURN(MATCH_NOMATCH);
2105 }
2106 GETCHARINCTEST(c, eptr);
2107 switch(c)
2108 {
2109 default: break;
2110 case 0x0a: /* LF */
2111 case 0x0b: /* VT */
2112 case 0x0c: /* FF */
2113 case 0x0d: /* CR */
2114 case 0x85: /* NEL */
2115 case 0x2028: /* LINE SEPARATOR */
2116 case 0x2029: /* PARAGRAPH SEPARATOR */
2117 MRRETURN(MATCH_NOMATCH);
2118 }
2119 ecode++;
2120 break;
2121
2122 case OP_VSPACE:
2123 if (eptr >= md->end_subject)
2124 {
2125 SCHECK_PARTIAL();
2126 MRRETURN(MATCH_NOMATCH);
2127 }
2128 GETCHARINCTEST(c, eptr);
2129 switch(c)
2130 {
2131 default: MRRETURN(MATCH_NOMATCH);
2132 case 0x0a: /* LF */
2133 case 0x0b: /* VT */
2134 case 0x0c: /* FF */
2135 case 0x0d: /* CR */
2136 case 0x85: /* NEL */
2137 case 0x2028: /* LINE SEPARATOR */
2138 case 0x2029: /* PARAGRAPH SEPARATOR */
2139 break;
2140 }
2141 ecode++;
2142 break;
2143
2144 #ifdef SUPPORT_UCP
2145 /* Check the next character by Unicode property. We will get here only
2146 if the support is in the binary; otherwise a compile-time error occurs. */
2147
2148 case OP_PROP:
2149 case OP_NOTPROP:
2150 if (eptr >= md->end_subject)
2151 {
2152 SCHECK_PARTIAL();
2153 MRRETURN(MATCH_NOMATCH);
2154 }
2155 GETCHARINCTEST(c, eptr);
2156 {
2157 const ucd_record *prop = GET_UCD(c);
2158
2159 switch(ecode[1])
2160 {
2161 case PT_ANY:
2162 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2163 break;
2164
2165 case PT_LAMP:
2166 if ((prop->chartype == ucp_Lu ||
2167 prop->chartype == ucp_Ll ||
2168 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2169 MRRETURN(MATCH_NOMATCH);
2170 break;
2171
2172 case PT_GC:
2173 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2174 MRRETURN(MATCH_NOMATCH);
2175 break;
2176
2177 case PT_PC:
2178 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2179 MRRETURN(MATCH_NOMATCH);
2180 break;
2181
2182 case PT_SC:
2183 if ((ecode[2] != prop->script) == (op == OP_PROP))
2184 MRRETURN(MATCH_NOMATCH);
2185 break;
2186
2187 /* These are specials */
2188
2189 case PT_ALNUM:
2190 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2191 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2192 MRRETURN(MATCH_NOMATCH);
2193 break;
2194
2195 case PT_SPACE: /* Perl space */
2196 if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2197 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2198 == (op == OP_NOTPROP))
2199 MRRETURN(MATCH_NOMATCH);
2200 break;
2201
2202 case PT_PXSPACE: /* POSIX space */
2203 if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2204 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2205 c == CHAR_FF || c == CHAR_CR)
2206 == (op == OP_NOTPROP))
2207 MRRETURN(MATCH_NOMATCH);
2208 break;
2209
2210 case PT_WORD:
2211 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2212 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2213 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2214 MRRETURN(MATCH_NOMATCH);
2215 break;
2216
2217 /* This should never occur */
2218
2219 default:
2220 RRETURN(PCRE_ERROR_INTERNAL);
2221 }
2222
2223 ecode += 3;
2224 }
2225 break;
2226
2227 /* Match an extended Unicode sequence. We will get here only if the support
2228 is in the binary; otherwise a compile-time error occurs. */
2229
2230 case OP_EXTUNI:
2231 if (eptr >= md->end_subject)
2232 {
2233 SCHECK_PARTIAL();
2234 MRRETURN(MATCH_NOMATCH);
2235 }
2236 GETCHARINCTEST(c, eptr);
2237 {
2238 int category = UCD_CATEGORY(c);
2239 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2240 while (eptr < md->end_subject)
2241 {
2242 int len = 1;
2243 if (!utf8) c = *eptr; else
2244 {
2245 GETCHARLEN(c, eptr, len);
2246 }
2247 category = UCD_CATEGORY(c);
2248 if (category != ucp_M) break;
2249 eptr += len;
2250 }
2251 }
2252 ecode++;
2253 break;
2254 #endif
2255
2256
2257 /* Match a back reference, possibly repeatedly. Look past the end of the
2258 item to see if there is repeat information following. The code is similar
2259 to that for character classes, but repeated for efficiency. Then obey
2260 similar code to character type repeats - written out again for speed.
2261 However, if the referenced string is the empty string, always treat
2262 it as matched, any number of times (otherwise there could be infinite
2263 loops). */
2264
2265 case OP_REF:
2266 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2267 ecode += 3;
2268
2269 /* If the reference is unset, there are two possibilities:
2270
2271 (a) In the default, Perl-compatible state, set the length negative;
2272 this ensures that every attempt at a match fails. We can't just fail
2273 here, because of the possibility of quantifiers with zero minima.
2274
2275 (b) If the JavaScript compatibility flag is set, set the length to zero
2276 so that the back reference matches an empty string.
2277
2278 Otherwise, set the length to the length of what was matched by the
2279 referenced subpattern. */
2280
2281 if (offset >= offset_top || md->offset_vector[offset] < 0)
2282 length = (md->jscript_compat)? 0 : -1;
2283 else
2284 length = md->offset_vector[offset+1] - md->offset_vector[offset];
2285
2286 /* Set up for repetition, or handle the non-repeated case */
2287
2288 switch (*ecode)
2289 {
2290 case OP_CRSTAR:
2291 case OP_CRMINSTAR:
2292 case OP_CRPLUS:
2293 case OP_CRMINPLUS:
2294 case OP_CRQUERY:
2295 case OP_CRMINQUERY:
2296 c = *ecode++ - OP_CRSTAR;
2297 minimize = (c & 1) != 0;
2298 min = rep_min[c]; /* Pick up values from tables; */
2299 max = rep_max[c]; /* zero for max => infinity */
2300 if (max == 0) max = INT_MAX;
2301 break;
2302
2303 case OP_CRRANGE:
2304 case OP_CRMINRANGE:
2305 minimize = (*ecode == OP_CRMINRANGE);
2306 min = GET2(ecode, 1);
2307 max = GET2(ecode, 3);
2308 if (max == 0) max = INT_MAX;
2309 ecode += 5;
2310 break;
2311
2312 default: /* No repeat follows */
2313 if ((length = match_ref(offset, eptr, length, md, ims)) < 0)
2314 {
2315 CHECK_PARTIAL();
2316 MRRETURN(MATCH_NOMATCH);
2317 }
2318 eptr += length;
2319 continue; /* With the main loop */
2320 }
2321
2322 /* Handle repeated back references. If the length of the reference is
2323 zero, just continue with the main loop. */
2324
2325 if (length == 0) continue;
2326
2327 /* First, ensure the minimum number of matches are present. We get back
2328 the length of the reference string explicitly rather than passing the
2329 address of eptr, so that eptr can be a register variable. */
2330
2331 for (i = 1; i <= min; i++)
2332 {
2333 int slength;
2334 if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2335 {
2336 CHECK_PARTIAL();
2337 MRRETURN(MATCH_NOMATCH);
2338 }
2339 eptr += slength;
2340 }
2341
2342 /* If min = max, continue at the same level without recursion.
2343 They are not both allowed to be zero. */
2344
2345 if (min == max) continue;
2346
2347 /* If minimizing, keep trying and advancing the pointer */
2348
2349 if (minimize)
2350 {
2351 for (fi = min;; fi++)
2352 {
2353 int slength;
2354 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2355 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2356 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2357 if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2358 {
2359 CHECK_PARTIAL();
2360 MRRETURN(MATCH_NOMATCH);
2361 }
2362 eptr += slength;
2363 }
2364 /* Control never gets here */
2365 }
2366
2367 /* If maximizing, find the longest string and work backwards */
2368
2369 else
2370 {
2371 pp = eptr;
2372 for (i = min; i < max; i++)
2373 {
2374 int slength;
2375 if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2376 {
2377 CHECK_PARTIAL();
2378 break;
2379 }
2380 eptr += slength;
2381 }
2382 while (eptr >= pp)
2383 {
2384 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2385 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2386 eptr -= length;
2387 }
2388 MRRETURN(MATCH_NOMATCH);
2389 }
2390 /* Control never gets here */
2391
2392 /* Match a bit-mapped character class, possibly repeatedly. This op code is
2393 used when all the characters in the class have values in the range 0-255,
2394 and either the matching is caseful, or the characters are in the range
2395 0-127 when UTF-8 processing is enabled. The only difference between
2396 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2397 encountered.
2398
2399 First, look past the end of the item to see if there is repeat information
2400 following. Then obey similar code to character type repeats - written out
2401 again for speed. */
2402
2403 case OP_NCLASS:
2404 case OP_CLASS:
2405 {
2406 data = ecode + 1; /* Save for matching */
2407 ecode += 33; /* Advance past the item */
2408
2409 switch (*ecode)
2410 {
2411 case OP_CRSTAR:
2412 case OP_CRMINSTAR:
2413 case OP_CRPLUS:
2414 case OP_CRMINPLUS:
2415 case OP_CRQUERY:
2416 case OP_CRMINQUERY:
2417 c = *ecode++ - OP_CRSTAR;
2418 minimize = (c & 1) != 0;
2419 min = rep_min[c]; /* Pick up values from tables; */
2420 max = rep_max[c]; /* zero for max => infinity */
2421 if (max == 0) max = INT_MAX;
2422 break;
2423
2424 case OP_CRRANGE:
2425 case OP_CRMINRANGE:
2426 minimize = (*ecode == OP_CRMINRANGE);
2427 min = GET2(ecode, 1);
2428 max = GET2(ecode, 3);
2429 if (max == 0) max = INT_MAX;
2430 ecode += 5;
2431 break;
2432
2433 default: /* No repeat follows */
2434 min = max = 1;
2435 break;
2436 }
2437
2438 /* First, ensure the minimum number of matches are present. */
2439
2440 #ifdef SUPPORT_UTF8
2441 /* UTF-8 mode */
2442 if (utf8)
2443 {
2444 for (i = 1; i <= min; i++)
2445 {
2446 if (eptr >= md->end_subject)
2447 {
2448 SCHECK_PARTIAL();
2449 MRRETURN(MATCH_NOMATCH);
2450 }
2451 GETCHARINC(c, eptr);
2452 if (c > 255)
2453 {
2454 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2455 }
2456 else
2457 {
2458 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2459 }
2460 }
2461 }
2462 else
2463 #endif
2464 /* Not UTF-8 mode */
2465 {
2466 for (i = 1; i <= min; i++)
2467 {
2468 if (eptr >= md->end_subject)
2469 {
2470 SCHECK_PARTIAL();
2471 MRRETURN(MATCH_NOMATCH);
2472 }
2473 c = *eptr++;
2474 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2475 }
2476 }
2477
2478 /* If max == min we can continue with the main loop without the
2479 need to recurse. */
2480
2481 if (min == max) continue;
2482
2483 /* If minimizing, keep testing the rest of the expression and advancing
2484 the pointer while it matches the class. */
2485
2486 if (minimize)
2487 {
2488 #ifdef SUPPORT_UTF8
2489 /* UTF-8 mode */
2490 if (utf8)
2491 {
2492 for (fi = min;; fi++)
2493 {
2494 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2495 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2496 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2497 if (eptr >= md->end_subject)
2498 {
2499 SCHECK_PARTIAL();
2500 MRRETURN(MATCH_NOMATCH);
2501 }
2502 GETCHARINC(c, eptr);
2503 if (c > 255)
2504 {
2505 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2506 }
2507 else
2508 {
2509 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2510 }
2511 }
2512 }
2513 else
2514 #endif
2515 /* Not UTF-8 mode */
2516 {
2517 for (fi = min;; fi++)
2518 {
2519 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2520 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2521 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2522 if (eptr >= md->end_subject)
2523 {
2524 SCHECK_PARTIAL();
2525 MRRETURN(MATCH_NOMATCH);
2526 }
2527 c = *eptr++;
2528 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2529 }
2530 }
2531 /* Control never gets here */
2532 }
2533
2534 /* If maximizing, find the longest possible run, then work backwards. */
2535
2536 else
2537 {
2538 pp = eptr;
2539
2540 #ifdef SUPPORT_UTF8
2541 /* UTF-8 mode */
2542 if (utf8)
2543 {
2544 for (i = min; i < max; i++)
2545 {
2546 int len = 1;
2547 if (eptr >= md->end_subject)
2548 {
2549 SCHECK_PARTIAL();
2550 break;
2551 }
2552 GETCHARLEN(c, eptr, len);
2553 if (c > 255)
2554 {
2555 if (op == OP_CLASS) break;
2556 }
2557 else
2558 {
2559 if ((data[c/8] & (1 << (c&7))) == 0) break;
2560 }
2561 eptr += len;
2562 }
2563 for (;;)
2564 {
2565 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2566 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2567 if (eptr-- == pp) break; /* Stop if tried at original pos */
2568 BACKCHAR(eptr);
2569 }
2570 }
2571 else
2572 #endif
2573 /* Not UTF-8 mode */
2574 {
2575 for (i = min; i < max; i++)
2576 {
2577 if (eptr >= md->end_subject)
2578 {
2579 SCHECK_PARTIAL();
2580 break;
2581 }
2582 c = *eptr;
2583 if ((data[c/8] & (1 << (c&7))) == 0) break;
2584 eptr++;
2585 }
2586 while (eptr >= pp)
2587 {
2588 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2589 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2590 eptr--;
2591 }
2592 }
2593
2594 MRRETURN(MATCH_NOMATCH);
2595 }
2596 }
2597 /* Control never gets here */
2598
2599
2600 /* Match an extended character class. This opcode is encountered only
2601 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2602 mode, because Unicode properties are supported in non-UTF-8 mode. */
2603
2604 #ifdef SUPPORT_UTF8
2605 case OP_XCLASS:
2606 {
2607 data = ecode + 1 + LINK_SIZE; /* Save for matching */
2608 ecode += GET(ecode, 1); /* Advance past the item */
2609
2610 switch (*ecode)
2611 {
2612 case OP_CRSTAR:
2613 case OP_CRMINSTAR:
2614 case OP_CRPLUS:
2615 case OP_CRMINPLUS:
2616 case OP_CRQUERY:
2617 case OP_CRMINQUERY:
2618 c = *ecode++ - OP_CRSTAR;
2619 minimize = (c & 1) != 0;
2620 min = rep_min[c]; /* Pick up values from tables; */
2621 max = rep_max[c]; /* zero for max => infinity */
2622 if (max == 0) max = INT_MAX;
2623 break;
2624
2625 case OP_CRRANGE:
2626 case OP_CRMINRANGE:
2627 minimize = (*ecode == OP_CRMINRANGE);
2628 min = GET2(ecode, 1);
2629 max = GET2(ecode, 3);
2630 if (max == 0) max = INT_MAX;
2631 ecode += 5;
2632 break;
2633
2634 default: /* No repeat follows */
2635 min = max = 1;
2636 break;
2637 }
2638
2639 /* First, ensure the minimum number of matches are present. */
2640
2641 for (i = 1; i <= min; i++)
2642 {
2643 if (eptr >= md->end_subject)
2644 {
2645 SCHECK_PARTIAL();
2646 MRRETURN(MATCH_NOMATCH);
2647 }
2648 GETCHARINCTEST(c, eptr);
2649 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2650 }
2651
2652 /* If max == min we can continue with the main loop without the
2653 need to recurse. */
2654
2655 if (min == max) continue;
2656
2657 /* If minimizing, keep testing the rest of the expression and advancing
2658 the pointer while it matches the class. */
2659
2660 if (minimize)
2661 {
2662 for (fi = min;; fi++)
2663 {
2664 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2665 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2666 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2667 if (eptr >= md->end_subject)
2668 {
2669 SCHECK_PARTIAL();
2670 MRRETURN(MATCH_NOMATCH);
2671 }
2672 GETCHARINCTEST(c, eptr);
2673 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2674 }
2675 /* Control never gets here */
2676 }
2677
2678 /* If maximizing, find the longest possible run, then work backwards. */
2679
2680 else
2681 {
2682 pp = eptr;
2683 for (i = min; i < max; i++)
2684 {
2685 int len = 1;
2686 if (eptr >= md->end_subject)
2687 {
2688 SCHECK_PARTIAL();
2689 break;
2690 }
2691 GETCHARLENTEST(c, eptr, len);
2692 if (!_pcre_xclass(c, data)) break;
2693 eptr += len;
2694 }
2695 for(;;)
2696 {
2697 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2698 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2699 if (eptr-- == pp) break; /* Stop if tried at original pos */
2700 if (utf8) BACKCHAR(eptr);
2701 }
2702 MRRETURN(MATCH_NOMATCH);
2703 }
2704
2705 /* Control never gets here */
2706 }
2707 #endif /* End of XCLASS */
2708
2709 /* Match a single character, casefully */
2710
2711 case OP_CHAR:
2712 #ifdef SUPPORT_UTF8
2713 if (utf8)
2714 {
2715 length = 1;
2716 ecode++;
2717 GETCHARLEN(fc, ecode, length);
2718 if (length > md->end_subject - eptr)
2719 {
2720 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2721 MRRETURN(MATCH_NOMATCH);
2722 }
2723 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2724 }
2725 else
2726 #endif
2727
2728 /* Non-UTF-8 mode */
2729 {
2730 if (md->end_subject - eptr < 1)
2731 {
2732 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2733 MRRETURN(MATCH_NOMATCH);
2734 }
2735 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2736 ecode += 2;
2737 }
2738 break;
2739
2740 /* Match a single character, caselessly */
2741
2742 case OP_CHARNC:
2743 #ifdef SUPPORT_UTF8
2744 if (utf8)
2745 {
2746 length = 1;
2747 ecode++;
2748 GETCHARLEN(fc, ecode, length);
2749
2750 if (length > md->end_subject - eptr)
2751 {
2752 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2753 MRRETURN(MATCH_NOMATCH);
2754 }
2755
2756 /* If the pattern character's value is < 128, we have only one byte, and
2757 can use the fast lookup table. */
2758
2759 if (fc < 128)
2760 {
2761 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2762 }
2763
2764 /* Otherwise we must pick up the subject character */
2765
2766 else
2767 {
2768 unsigned int dc;
2769 GETCHARINC(dc, eptr);
2770 ecode += length;
2771
2772 /* If we have Unicode property support, we can use it to test the other
2773 case of the character, if there is one. */
2774
2775 if (fc != dc)
2776 {
2777 #ifdef SUPPORT_UCP
2778 if (dc != UCD_OTHERCASE(fc))
2779 #endif
2780 MRRETURN(MATCH_NOMATCH);
2781 }
2782 }
2783 }
2784 else
2785 #endif /* SUPPORT_UTF8 */
2786
2787 /* Non-UTF-8 mode */
2788 {
2789 if (md->end_subject - eptr < 1)
2790 {
2791 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2792 MRRETURN(MATCH_NOMATCH);
2793 }
2794 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2795 ecode += 2;
2796 }
2797 break;
2798
2799 /* Match a single character repeatedly. */
2800
2801 case OP_EXACT:
2802 min = max = GET2(ecode, 1);
2803 ecode += 3;
2804 goto REPEATCHAR;
2805
2806 case OP_POSUPTO:
2807 possessive = TRUE;
2808 /* Fall through */
2809
2810 case OP_UPTO:
2811 case OP_MINUPTO:
2812 min = 0;
2813 max = GET2(ecode, 1);
2814 minimize = *ecode == OP_MINUPTO;
2815 ecode += 3;
2816 goto REPEATCHAR;
2817
2818 case OP_POSSTAR:
2819 possessive = TRUE;
2820 min = 0;
2821 max = INT_MAX;
2822 ecode++;
2823 goto REPEATCHAR;
2824
2825 case OP_POSPLUS:
2826 possessive = TRUE;
2827 min = 1;
2828 max = INT_MAX;
2829 ecode++;
2830 goto REPEATCHAR;
2831
2832 case OP_POSQUERY:
2833 possessive = TRUE;
2834 min = 0;
2835 max = 1;
2836 ecode++;
2837 goto REPEATCHAR;
2838
2839 case OP_STAR:
2840 case OP_MINSTAR:
2841 case OP_PLUS:
2842 case OP_MINPLUS:
2843 case OP_QUERY:
2844 case OP_MINQUERY:
2845 c = *ecode++ - OP_STAR;
2846 minimize = (c & 1) != 0;
2847
2848 min = rep_min[c]; /* Pick up values from tables; */
2849 max = rep_max[c]; /* zero for max => infinity */
2850 if (max == 0) max = INT_MAX;
2851
2852 /* Common code for all repeated single-character matches. */
2853
2854 REPEATCHAR:
2855 #ifdef SUPPORT_UTF8
2856 if (utf8)
2857 {
2858 length = 1;
2859 charptr = ecode;
2860 GETCHARLEN(fc, ecode, length);
2861 ecode += length;
2862
2863 /* Handle multibyte character matching specially here. There is
2864 support for caseless matching if UCP support is present. */
2865
2866 if (length > 1)
2867 {
2868 #ifdef SUPPORT_UCP
2869 unsigned int othercase;
2870 if ((ims & PCRE_CASELESS) != 0 &&
2871 (othercase = UCD_OTHERCASE(fc)) != fc)
2872 oclength = _pcre_ord2utf8(othercase, occhars);
2873 else oclength = 0;
2874 #endif /* SUPPORT_UCP */
2875
2876 for (i = 1; i <= min; i++)
2877 {
2878 if (eptr <= md->end_subject - length &&
2879 memcmp(eptr, charptr, length) == 0) eptr += length;
2880 #ifdef SUPPORT_UCP
2881 else if (oclength > 0 &&
2882 eptr <= md->end_subject - oclength &&
2883 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2884 #endif /* SUPPORT_UCP */
2885 else
2886 {
2887 CHECK_PARTIAL();
2888 MRRETURN(MATCH_NOMATCH);
2889 }
2890 }
2891
2892 if (min == max) continue;
2893
2894 if (minimize)
2895 {
2896 for (fi = min;; fi++)
2897 {
2898 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2899 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2900 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2901 if (eptr <= md->end_subject - length &&
2902 memcmp(eptr, charptr, length) == 0) eptr += length;
2903 #ifdef SUPPORT_UCP
2904 else if (oclength > 0 &&
2905 eptr <= md->end_subject - oclength &&
2906 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2907 #endif /* SUPPORT_UCP */
2908 else
2909 {
2910 CHECK_PARTIAL();
2911 MRRETURN(MATCH_NOMATCH);
2912 }
2913 }
2914 /* Control never gets here */
2915 }
2916
2917 else /* Maximize */
2918 {
2919 pp = eptr;
2920 for (i = min; i < max; i++)
2921 {
2922 if (eptr <= md->end_subject - length &&
2923 memcmp(eptr, charptr, length) == 0) eptr += length;
2924 #ifdef SUPPORT_UCP
2925 else if (oclength > 0 &&
2926 eptr <= md->end_subject - oclength &&
2927 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2928 #endif /* SUPPORT_UCP */
2929 else
2930 {
2931 CHECK_PARTIAL();
2932 break;
2933 }
2934 }
2935
2936 if (possessive) continue;
2937
2938 for(;;)
2939 {
2940 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2941 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2942 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2943 #ifdef SUPPORT_UCP
2944 eptr--;
2945 BACKCHAR(eptr);
2946 #else /* without SUPPORT_UCP */
2947 eptr -= length;
2948 #endif /* SUPPORT_UCP */
2949 }
2950 }
2951 /* Control never gets here */
2952 }
2953
2954 /* If the length of a UTF-8 character is 1, we fall through here, and
2955 obey the code as for non-UTF-8 characters below, though in this case the
2956 value of fc will always be < 128. */
2957 }
2958 else
2959 #endif /* SUPPORT_UTF8 */
2960
2961 /* When not in UTF-8 mode, load a single-byte character. */
2962
2963 fc = *ecode++;
2964
2965 /* The value of fc at this point is always less than 256, though we may or
2966 may not be in UTF-8 mode. The code is duplicated for the caseless and
2967 caseful cases, for speed, since matching characters is likely to be quite
2968 common. First, ensure the minimum number of matches are present. If min =
2969 max, continue at the same level without recursing. Otherwise, if
2970 minimizing, keep trying the rest of the expression and advancing one
2971 matching character if failing, up to the maximum. Alternatively, if
2972 maximizing, find the maximum number of characters and work backwards. */
2973
2974 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2975 max, eptr));
2976
2977 if ((ims & PCRE_CASELESS) != 0)
2978 {
2979 fc = md->lcc[fc];
2980 for (i = 1; i <= min; i++)
2981 {
2982 if (eptr >= md->end_subject)
2983 {
2984 SCHECK_PARTIAL();
2985 MRRETURN(MATCH_NOMATCH);
2986 }
2987 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2988 }
2989 if (min == max) continue;
2990 if (minimize)
2991 {
2992 for (fi = min;; fi++)
2993 {
2994 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2995 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2996 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2997 if (eptr >= md->end_subject)
2998 {
2999 SCHECK_PARTIAL();
3000 MRRETURN(MATCH_NOMATCH);
3001 }
3002 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3003 }
3004 /* Control never gets here */
3005 }
3006 else /* Maximize */
3007 {
3008 pp = eptr;
3009 for (i = min; i < max; i++)
3010 {
3011 if (eptr >= md->end_subject)
3012 {
3013 SCHECK_PARTIAL();
3014 break;
3015 }
3016 if (fc != md->lcc[*eptr]) break;
3017 eptr++;
3018 }
3019
3020 if (possessive) continue;
3021
3022 while (eptr >= pp)
3023 {
3024 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3025 eptr--;
3026 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3027 }
3028 MRRETURN(MATCH_NOMATCH);
3029 }
3030 /* Control never gets here */
3031 }
3032
3033 /* Caseful comparisons (includes all multi-byte characters) */
3034
3035 else
3036 {
3037 for (i = 1; i <= min; i++)
3038 {
3039 if (eptr >= md->end_subject)
3040 {
3041 SCHECK_PARTIAL();
3042 MRRETURN(MATCH_NOMATCH);
3043 }
3044 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3045 }
3046
3047 if (min == max) continue;
3048
3049 if (minimize)
3050 {
3051 for (fi = min;; fi++)
3052 {
3053 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3054 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3055 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3056 if (eptr >= md->end_subject)
3057 {
3058 SCHECK_PARTIAL();
3059 MRRETURN(MATCH_NOMATCH);
3060 }
3061 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3062 }
3063 /* Control never gets here */
3064 }
3065 else /* Maximize */
3066 {
3067 pp = eptr;
3068 for (i = min; i < max; i++)
3069 {
3070 if (eptr >= md->end_subject)
3071 {
3072 SCHECK_PARTIAL();
3073 break;
3074 }
3075 if (fc != *eptr) break;
3076 eptr++;
3077 }
3078 if (possessive) continue;
3079
3080 while (eptr >= pp)
3081 {
3082 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3083 eptr--;
3084 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3085 }
3086 MRRETURN(MATCH_NOMATCH);
3087 }
3088 }
3089 /* Control never gets here */
3090
3091 /* Match a negated single one-byte character. The character we are
3092 checking can be multibyte. */
3093
3094 case OP_NOT:
3095 if (eptr >= md->end_subject)
3096 {
3097 SCHECK_PARTIAL();
3098 MRRETURN(MATCH_NOMATCH);
3099 }
3100 ecode++;
3101 GETCHARINCTEST(c, eptr);
3102 if ((ims & PCRE_CASELESS) != 0)
3103 {
3104 #ifdef SUPPORT_UTF8
3105 if (c < 256)
3106 #endif
3107 c = md->lcc[c];
3108 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3109 }
3110 else
3111 {
3112 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3113 }
3114 break;
3115
3116 /* Match a negated single one-byte character repeatedly. This is almost a
3117 repeat of the code for a repeated single character, but I haven't found a
3118 nice way of commoning these up that doesn't require a test of the
3119 positive/negative option for each character match. Maybe that wouldn't add
3120 very much to the time taken, but character matching *is* what this is all
3121 about... */
3122
3123 case OP_NOTEXACT:
3124 min = max = GET2(ecode, 1);
3125 ecode += 3;
3126 goto REPEATNOTCHAR;
3127
3128 case OP_NOTUPTO:
3129 case OP_NOTMINUPTO:
3130 min = 0;
3131 max = GET2(ecode, 1);
3132 minimize = *ecode == OP_NOTMINUPTO;
3133 ecode += 3;
3134 goto REPEATNOTCHAR;
3135
3136 case OP_NOTPOSSTAR:
3137 possessive = TRUE;
3138 min = 0;
3139 max = INT_MAX;
3140 ecode++;
3141 goto REPEATNOTCHAR;
3142
3143 case OP_NOTPOSPLUS:
3144 possessive = TRUE;
3145 min = 1;
3146 max = INT_MAX;
3147 ecode++;
3148 goto REPEATNOTCHAR;
3149
3150 case OP_NOTPOSQUERY:
3151 possessive = TRUE;
3152 min = 0;
3153 max = 1;
3154 ecode++;
3155 goto REPEATNOTCHAR;
3156
3157 case OP_NOTPOSUPTO:
3158 possessive = TRUE;
3159 min = 0;
3160 max = GET2(ecode, 1);
3161 ecode += 3;
3162 goto REPEATNOTCHAR;
3163
3164 case OP_NOTSTAR:
3165 case OP_NOTMINSTAR:
3166 case OP_NOTPLUS:
3167 case OP_NOTMINPLUS:
3168 case OP_NOTQUERY:
3169 case OP_NOTMINQUERY:
3170 c = *ecode++ - OP_NOTSTAR;
3171 minimize = (c & 1) != 0;
3172 min = rep_min[c]; /* Pick up values from tables; */
3173 max = rep_max[c]; /* zero for max => infinity */
3174 if (max == 0) max = INT_MAX;
3175
3176 /* Common code for all repeated single-byte matches. */
3177
3178 REPEATNOTCHAR:
3179 fc = *ecode++;
3180
3181 /* The code is duplicated for the caseless and caseful cases, for speed,
3182 since matching characters is likely to be quite common. First, ensure the
3183 minimum number of matches are present. If min = max, continue at the same
3184 level without recursing. Otherwise, if minimizing, keep trying the rest of
3185 the expression and advancing one matching character if failing, up to the
3186 maximum. Alternatively, if maximizing, find the maximum number of
3187 characters and work backwards. */
3188
3189 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3190 max, eptr));
3191
3192 if ((ims & PCRE_CASELESS) != 0)
3193 {
3194 fc = md->lcc[fc];
3195
3196 #ifdef SUPPORT_UTF8
3197 /* UTF-8 mode */
3198 if (utf8)
3199 {
3200 register unsigned int d;
3201 for (i = 1; i <= min; i++)
3202 {
3203 if (eptr >= md->end_subject)
3204 {
3205 SCHECK_PARTIAL();
3206 MRRETURN(MATCH_NOMATCH);
3207 }
3208 GETCHARINC(d, eptr);
3209 if (d < 256) d = md->lcc[d];
3210 if (fc == d) MRRETURN(MATCH_NOMATCH);
3211 }
3212 }
3213 else
3214 #endif
3215
3216 /* Not UTF-8 mode */
3217 {
3218 for (i = 1; i <= min; i++)
3219 {
3220 if (eptr >= md->end_subject)
3221 {
3222 SCHECK_PARTIAL();
3223 MRRETURN(MATCH_NOMATCH);
3224 }
3225 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3226 }
3227 }
3228
3229 if (min == max) continue;
3230
3231 if (minimize)
3232 {
3233 #ifdef SUPPORT_UTF8
3234 /* UTF-8 mode */
3235 if (utf8)
3236 {
3237 register unsigned int d;
3238 for (fi = min;; fi++)
3239 {
3240 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3241 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3242 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3243 if (eptr >= md->end_subject)
3244 {
3245 SCHECK_PARTIAL();
3246 MRRETURN(MATCH_NOMATCH);
3247 }
3248 GETCHARINC(d, eptr);
3249 if (d < 256) d = md->lcc[d];
3250 if (fc == d) MRRETURN(MATCH_NOMATCH);
3251 }
3252 }
3253 else
3254 #endif
3255 /* Not UTF-8 mode */
3256 {
3257 for (fi = min;; fi++)
3258 {
3259 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3260 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3261 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3262 if (eptr >= md->end_subject)
3263 {
3264 SCHECK_PARTIAL();
3265 MRRETURN(MATCH_NOMATCH);
3266 }
3267 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3268 }
3269 }
3270 /* Control never gets here */
3271 }
3272
3273 /* Maximize case */
3274
3275 else
3276 {
3277 pp = eptr;
3278
3279 #ifdef SUPPORT_UTF8
3280 /* UTF-8 mode */
3281 if (utf8)
3282 {
3283 register unsigned int d;
3284 for (i = min; i < max; i++)
3285 {
3286 int len = 1;
3287 if (eptr >= md->end_subject)
3288 {
3289 SCHECK_PARTIAL();
3290 break;
3291 }
3292 GETCHARLEN(d, eptr, len);
3293 if (d < 256) d = md->lcc[d];
3294 if (fc == d) break;
3295 eptr += len;
3296 }
3297 if (possessive) continue;
3298 for(;;)
3299 {
3300 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3301 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3302 if (eptr-- == pp) break; /* Stop if tried at original pos */
3303 BACKCHAR(eptr);
3304 }
3305 }
3306 else
3307 #endif
3308 /* Not UTF-8 mode */
3309 {
3310 for (i = min; i < max; i++)
3311 {
3312 if (eptr >= md->end_subject)
3313 {
3314 SCHECK_PARTIAL();
3315 break;
3316 }
3317 if (fc == md->lcc[*eptr]) break;
3318 eptr++;
3319 }
3320 if (possessive) continue;
3321 while (eptr >= pp)
3322 {
3323 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3324 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3325 eptr--;
3326 }
3327 }
3328
3329 MRRETURN(MATCH_NOMATCH);
3330 }
3331 /* Control never gets here */
3332 }
3333
3334 /* Caseful comparisons */
3335
3336 else
3337 {
3338 #ifdef SUPPORT_UTF8
3339 /* UTF-8 mode */
3340 if (utf8)
3341 {
3342 register unsigned int d;
3343 for (i = 1; i <= min; i++)
3344 {
3345 if (eptr >= md->end_subject)
3346 {
3347 SCHECK_PARTIAL();
3348 MRRETURN(MATCH_NOMATCH);
3349 }
3350 GETCHARINC(d, eptr);
3351 if (fc == d) MRRETURN(MATCH_NOMATCH);
3352 }
3353 }
3354 else
3355 #endif
3356 /* Not UTF-8 mode */
3357 {
3358 for (i = 1; i <= min; i++)
3359 {
3360 if (eptr >= md->end_subject)
3361 {
3362 SCHECK_PARTIAL();
3363 MRRETURN(MATCH_NOMATCH);
3364 }
3365 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3366 }
3367 }
3368
3369 if (min == max) continue;
3370
3371 if (minimize)
3372 {
3373 #ifdef SUPPORT_UTF8
3374 /* UTF-8 mode */
3375 if (utf8)
3376 {
3377 register unsigned int d;
3378 for (fi = min;; fi++)
3379 {
3380 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3381 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3382 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3383 if (eptr >= md->end_subject)
3384 {
3385 SCHECK_PARTIAL();
3386 MRRETURN(MATCH_NOMATCH);
3387 }
3388 GETCHARINC(d, eptr);
3389 if (fc == d) MRRETURN(MATCH_NOMATCH);
3390 }
3391 }
3392 else
3393 #endif
3394 /* Not UTF-8 mode */
3395 {
3396 for (fi = min;; fi++)
3397 {
3398 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3399 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3400 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3401 if (eptr >= md->end_subject)
3402 {
3403 SCHECK_PARTIAL();
3404 MRRETURN(MATCH_NOMATCH);
3405 }
3406 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3407 }
3408 }
3409 /* Control never gets here */
3410 }
3411
3412 /* Maximize case */
3413
3414 else
3415 {
3416 pp = eptr;
3417
3418 #ifdef SUPPORT_UTF8
3419 /* UTF-8 mode */
3420 if (utf8)
3421 {
3422 register unsigned int d;
3423 for (i = min; i < max; i++)
3424 {
3425 int len = 1;
3426 if (eptr >= md->end_subject)
3427 {
3428 SCHECK_PARTIAL();
3429 break;
3430 }
3431 GETCHARLEN(d, eptr, len);
3432 if (fc == d) break;
3433 eptr += len;
3434 }
3435 if (possessive) continue;
3436 for(;;)
3437 {
3438 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3439 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3440 if (eptr-- == pp) break; /* Stop if tried at original pos */
3441 BACKCHAR(eptr);
3442 }
3443 }
3444 else
3445 #endif
3446 /* Not UTF-8 mode */
3447 {
3448 for (i = min; i < max; i++)
3449 {
3450 if (eptr >= md->end_subject)
3451 {
3452 SCHECK_PARTIAL();
3453 break;
3454 }
3455 if (fc == *eptr) break;
3456 eptr++;
3457 }
3458 if (possessive) continue;
3459 while (eptr >= pp)
3460 {
3461 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3462 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3463 eptr--;
3464 }
3465 }
3466
3467 MRRETURN(MATCH_NOMATCH);
3468 }
3469 }
3470 /* Control never gets here */
3471
3472 /* Match a single character type repeatedly; several different opcodes
3473 share code. This is very similar to the code for single characters, but we
3474 repeat it in the interests of efficiency. */
3475
3476 case OP_TYPEEXACT:
3477 min = max = GET2(ecode, 1);
3478 minimize = TRUE;
3479 ecode += 3;
3480 goto REPEATTYPE;
3481
3482 case OP_TYPEUPTO:
3483 case OP_TYPEMINUPTO:
3484 min = 0;
3485 max = GET2(ecode, 1);
3486 minimize = *ecode == OP_TYPEMINUPTO;
3487 ecode += 3;
3488 goto REPEATTYPE;
3489
3490 case OP_TYPEPOSSTAR:
3491 possessive = TRUE;
3492 min = 0;
3493 max = INT_MAX;
3494 ecode++;
3495 goto REPEATTYPE;
3496
3497 case OP_TYPEPOSPLUS:
3498 possessive = TRUE;
3499 min = 1;
3500 max = INT_MAX;
3501 ecode++;
3502 goto REPEATTYPE;
3503
3504 case OP_TYPEPOSQUERY:
3505 possessive = TRUE;
3506 min = 0;
3507 max = 1;
3508 ecode++;
3509 goto REPEATTYPE;
3510
3511 case OP_TYPEPOSUPTO:
3512 possessive = TRUE;
3513 min = 0;
3514 max = GET2(ecode, 1);
3515 ecode += 3;
3516 goto REPEATTYPE;
3517
3518 case OP_TYPESTAR:
3519 case OP_TYPEMINSTAR:
3520 case OP_TYPEPLUS:
3521 case OP_TYPEMINPLUS:
3522 case OP_TYPEQUERY:
3523 case OP_TYPEMINQUERY:
3524 c = *ecode++ - OP_TYPESTAR;
3525 minimize = (c & 1) != 0;
3526 min = rep_min[c]; /* Pick up values from tables; */
3527 max = rep_max[c]; /* zero for max => infinity */
3528 if (max == 0) max = INT_MAX;
3529
3530 /* Common code for all repeated single character type matches. Note that
3531 in UTF-8 mode, '.' matches a character of any length, but for the other
3532 character types, the valid characters are all one-byte long. */
3533
3534 REPEATTYPE:
3535 ctype = *ecode++; /* Code for the character type */
3536
3537 #ifdef SUPPORT_UCP
3538 if (ctype == OP_PROP || ctype == OP_NOTPROP)
3539 {
3540 prop_fail_result = ctype == OP_NOTPROP;
3541 prop_type = *ecode++;
3542 prop_value = *ecode++;
3543 }
3544 else prop_type = -1;
3545 #endif
3546
3547 /* First, ensure the minimum number of matches are present. Use inline
3548 code for maximizing the speed, and do the type test once at the start
3549 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3550 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3551 and single-bytes. */
3552
3553 if (min > 0)
3554 {
3555 #ifdef SUPPORT_UCP
3556 if (prop_type >= 0)
3557 {
3558 switch(prop_type)
3559 {
3560 case PT_ANY:
3561 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3562 for (i = 1; i <= min; i++)
3563 {
3564 if (eptr >= md->end_subject)
3565 {
3566 SCHECK_PARTIAL();
3567 MRRETURN(MATCH_NOMATCH);
3568 }
3569 GETCHARINCTEST(c, eptr);
3570 }
3571 break;
3572
3573 case PT_LAMP:
3574 for (i = 1; i <= min; i++)
3575 {
3576 if (eptr >= md->end_subject)
3577 {
3578 SCHECK_PARTIAL();
3579 MRRETURN(MATCH_NOMATCH);
3580 }
3581 GETCHARINCTEST(c, eptr);
3582 prop_chartype = UCD_CHARTYPE(c);
3583 if ((prop_chartype == ucp_Lu ||
3584 prop_chartype == ucp_Ll ||
3585 prop_chartype == ucp_Lt) == prop_fail_result)
3586 MRRETURN(MATCH_NOMATCH);
3587 }
3588 break;
3589
3590 case PT_GC:
3591 for (i = 1; i <= min; i++)
3592 {
3593 if (eptr >= md->end_subject)
3594 {
3595 SCHECK_PARTIAL();
3596 MRRETURN(MATCH_NOMATCH);
3597 }
3598 GETCHARINCTEST(c, eptr);
3599 prop_category = UCD_CATEGORY(c);
3600 if ((prop_category == prop_value) == prop_fail_result)
3601 MRRETURN(MATCH_NOMATCH);
3602 }
3603 break;
3604
3605 case PT_PC:
3606 for (i = 1; i <= min; i++)
3607 {
3608 if (eptr >= md->end_subject)
3609 {
3610 SCHECK_PARTIAL();
3611 MRRETURN(MATCH_NOMATCH);
3612 }
3613 GETCHARINCTEST(c, eptr);
3614 prop_chartype = UCD_CHARTYPE(c);
3615 if ((prop_chartype == prop_value) == prop_fail_result)
3616 MRRETURN(MATCH_NOMATCH);
3617 }
3618 break;
3619
3620 case PT_SC:
3621 for (i = 1; i <= min; i++)
3622 {
3623 if (eptr >= md->end_subject)
3624 {
3625 SCHECK_PARTIAL();
3626 MRRETURN(MATCH_NOMATCH);
3627 }
3628 GETCHARINCTEST(c, eptr);
3629 prop_script = UCD_SCRIPT(c);
3630 if ((prop_script == prop_value) == prop_fail_result)
3631 MRRETURN(MATCH_NOMATCH);
3632 }
3633 break;
3634
3635 case PT_ALNUM:
3636 for (i = 1; i <= min; i++)
3637 {
3638 if (eptr >= md->end_subject)
3639 {
3640 SCHECK_PARTIAL();
3641 MRRETURN(MATCH_NOMATCH);
3642 }
3643 GETCHARINCTEST(c, eptr);
3644 prop_category = UCD_CATEGORY(c);
3645 if ((prop_category == ucp_L || prop_category == ucp_N)
3646 == prop_fail_result)
3647 MRRETURN(MATCH_NOMATCH);
3648 }
3649 break;
3650
3651 case PT_SPACE: /* Perl space */
3652 for (i = 1; i <= min; i++)
3653 {
3654 if (eptr >= md->end_subject)
3655 {
3656 SCHECK_PARTIAL();
3657 MRRETURN(MATCH_NOMATCH);
3658 }
3659 GETCHARINCTEST(c, eptr);
3660 prop_category = UCD_CATEGORY(c);
3661 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3662 c == CHAR_FF || c == CHAR_CR)
3663 == prop_fail_result)
3664 MRRETURN(MATCH_NOMATCH);
3665 }
3666 break;
3667
3668 case PT_PXSPACE: /* POSIX space */
3669 for (i = 1; i <= min; i++)
3670 {
3671 if (eptr >= md->end_subject)
3672 {
3673 SCHECK_PARTIAL();
3674 MRRETURN(MATCH_NOMATCH);
3675 }
3676 GETCHARINCTEST(c, eptr);
3677 prop_category = UCD_CATEGORY(c);
3678 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3679 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3680 == prop_fail_result)
3681 MRRETURN(MATCH_NOMATCH);
3682 }
3683 break;
3684
3685 case PT_WORD:
3686 for (i = 1; i <= min; i++)
3687 {
3688 if (eptr >= md->end_subject)
3689 {
3690 SCHECK_PARTIAL();
3691 MRRETURN(MATCH_NOMATCH);
3692 }
3693 GETCHARINCTEST(c, eptr);
3694 prop_category = UCD_CATEGORY(c);
3695 if ((prop_category == ucp_L || prop_category == ucp_N ||
3696 c == CHAR_UNDERSCORE)
3697 == prop_fail_result)
3698 MRRETURN(MATCH_NOMATCH);
3699 }
3700 break;
3701
3702 /* This should not occur */
3703
3704 default:
3705 RRETURN(PCRE_ERROR_INTERNAL);
3706 }
3707 }
3708
3709 /* Match extended Unicode sequences. We will get here only if the
3710 support is in the binary; otherwise a compile-time error occurs. */
3711
3712 else if (ctype == OP_EXTUNI)
3713 {
3714 for (i = 1; i <= min; i++)
3715 {
3716 if (eptr >= md->end_subject)
3717 {
3718 SCHECK_PARTIAL();
3719 MRRETURN(MATCH_NOMATCH);
3720 }
3721 GETCHARINCTEST(c, eptr);
3722 prop_category = UCD_CATEGORY(c);
3723 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3724 while (eptr < md->end_subject)
3725 {
3726 int len = 1;
3727 if (!utf8) c = *eptr;
3728 else { GETCHARLEN(c, eptr, len); }
3729 prop_category = UCD_CATEGORY(c);
3730 if (prop_category != ucp_M) break;
3731 eptr += len;
3732 }
3733 }
3734 }
3735
3736 else
3737 #endif /* SUPPORT_UCP */
3738
3739 /* Handle all other cases when the coding is UTF-8 */
3740
3741 #ifdef SUPPORT_UTF8
3742 if (utf8) switch(ctype)
3743 {
3744 case OP_ANY:
3745 for (i = 1; i <= min; i++)
3746 {
3747 if (eptr >= md->end_subject)
3748 {
3749 SCHECK_PARTIAL();
3750 MRRETURN(MATCH_NOMATCH);
3751 }
3752 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3753 eptr++;
3754 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3755 }
3756 break;
3757
3758 case OP_ALLANY:
3759 for (i = 1; i <= min; i++)
3760 {
3761 if (eptr >= md->end_subject)
3762 {
3763 SCHECK_PARTIAL();
3764 MRRETURN(MATCH_NOMATCH);
3765 }
3766 eptr++;
3767 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3768 }
3769 break;
3770
3771 case OP_ANYBYTE:
3772 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3773 eptr += min;
3774 break;
3775
3776 case OP_ANYNL:
3777 for (i = 1; i <= min; i++)
3778 {
3779 if (eptr >= md->end_subject)
3780 {
3781 SCHECK_PARTIAL();
3782 MRRETURN(MATCH_NOMATCH);
3783 }
3784 GETCHARINC(c, eptr);
3785 switch(c)
3786 {
3787 default: MRRETURN(MATCH_NOMATCH);
3788 case 0x000d:
3789 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3790 break;
3791
3792 case 0x000a:
3793 break;
3794
3795 case 0x000b:
3796 case 0x000c:
3797 case 0x0085:
3798 case 0x2028:
3799 case 0x2029:
3800 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3801 break;
3802 }
3803 }
3804 break;
3805
3806 case OP_NOT_HSPACE:
3807 for (i = 1; i <= min; i++)
3808 {
3809 if (eptr >= md->end_subject)
3810 {
3811 SCHECK_PARTIAL();
3812 MRRETURN(MATCH_NOMATCH);
3813 }
3814 GETCHARINC(c, eptr);
3815 switch(c)
3816 {
3817 default: break;
3818 case 0x09: /* HT */
3819 case 0x20: /* SPACE */
3820 case 0xa0: /* NBSP */
3821 case 0x1680: /* OGHAM SPACE MARK */
3822 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3823 case 0x2000: /* EN QUAD */
3824 case 0x2001: /* EM QUAD */
3825 case 0x2002: /* EN SPACE */
3826 case 0x2003: /* EM SPACE */
3827 case 0x2004: /* THREE-PER-EM SPACE */
3828 case 0x2005: /* FOUR-PER-EM SPACE */
3829 case 0x2006: /* SIX-PER-EM SPACE */
3830 case 0x2007: /* FIGURE SPACE */
3831 case 0x2008: /* PUNCTUATION SPACE */
3832 case 0x2009: /* THIN SPACE */
3833 case 0x200A: /* HAIR SPACE */
3834 case 0x202f: /* NARROW NO-BREAK SPACE */
3835 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3836 case 0x3000: /* IDEOGRAPHIC SPACE */
3837 MRRETURN(MATCH_NOMATCH);
3838 }
3839 }
3840 break;
3841
3842 case OP_HSPACE:
3843 for (i = 1; i <= min; i++)
3844 {
3845 if (eptr >= md->end_subject)
3846 {
3847 SCHECK_PARTIAL();
3848 MRRETURN(MATCH_NOMATCH);
3849 }
3850 GETCHARINC(c, eptr);
3851 switch(c)
3852 {
3853 default: MRRETURN(MATCH_NOMATCH);
3854 case 0x09: /* HT */
3855 case 0x20: /* SPACE */
3856 case 0xa0: /* NBSP */
3857 case 0x1680: /* OGHAM SPACE MARK */
3858 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3859 case 0x2000: /* EN QUAD */
3860 case 0x2001: /* EM QUAD */
3861 case 0x2002: /* EN SPACE */
3862 case 0x2003: /* EM SPACE */
3863 case 0x2004: /* THREE-PER-EM SPACE */
3864 case 0x2005: /* FOUR-PER-EM SPACE */
3865 case 0x2006: /* SIX-PER-EM SPACE */
3866 case 0x2007: /* FIGURE SPACE */
3867 case 0x2008: /* PUNCTUATION SPACE */
3868 case 0x2009: /* THIN SPACE */
3869 case 0x200A: /* HAIR SPACE */
3870 case 0x202f: /* NARROW NO-BREAK SPACE */
3871 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3872 case 0x3000: /* IDEOGRAPHIC SPACE */
3873 break;
3874 }
3875 }
3876 break;
3877
3878 case OP_NOT_VSPACE:
3879 for (i = 1; i <= min; i++)
3880 {
3881 if (eptr >= md->end_subject)
3882 {
3883 SCHECK_PARTIAL();
3884 MRRETURN(MATCH_NOMATCH);
3885 }
3886 GETCHARINC(c, eptr);
3887 switch(c)
3888 {
3889 default: break;
3890 case 0x0a: /* LF */
3891 case 0x0b: /* VT */
3892 case 0x0c: /* FF */
3893 case 0x0d: /* CR */
3894 case 0x85: /* NEL */
3895 case 0x2028: /* LINE SEPARATOR */
3896 case 0x2029: /* PARAGRAPH SEPARATOR */
3897 MRRETURN(MATCH_NOMATCH);
3898 }
3899 }
3900 break;
3901
3902 case OP_VSPACE:
3903 for (i = 1; i <= min; i++)
3904 {
3905 if (eptr >= md->end_subject)
3906 {
3907 SCHECK_PARTIAL();
3908 MRRETURN(MATCH_NOMATCH);
3909 }
3910 GETCHARINC(c, eptr);
3911 switch(c)
3912 {
3913 default: MRRETURN(MATCH_NOMATCH);
3914 case 0x0a: /* LF */
3915 case 0x0b: /* VT */
3916 case 0x0c: /* FF */
3917 case 0x0d: /* CR */
3918 case 0x85: /* NEL */
3919 case 0x2028: /* LINE SEPARATOR */
3920 case 0x2029: /* PARAGRAPH SEPARATOR */
3921 break;
3922 }
3923 }
3924 break;
3925
3926 case OP_NOT_DIGIT:
3927 for (i = 1; i <= min; i++)
3928 {
3929 if (eptr >= md->end_subject)
3930 {
3931 SCHECK_PARTIAL();
3932 MRRETURN(MATCH_NOMATCH);
3933 }
3934 GETCHARINC(c, eptr);
3935 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3936 MRRETURN(MATCH_NOMATCH);
3937 }
3938 break;
3939
3940 case OP_DIGIT:
3941 for (i = 1; i <= min; i++)
3942 {
3943 if (eptr >= md->end_subject)
3944 {
3945 SCHECK_PARTIAL();
3946 MRRETURN(MATCH_NOMATCH);
3947 }
3948 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3949 MRRETURN(MATCH_NOMATCH);
3950 /* No need to skip more bytes - we know it's a 1-byte character */
3951 }
3952 break;
3953
3954 case OP_NOT_WHITESPACE:
3955 for (i = 1; i <= min; i++)
3956 {
3957 if (eptr >= md->end_subject)
3958 {
3959 SCHECK_PARTIAL();
3960 MRRETURN(MATCH_NOMATCH);
3961 }
3962 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3963 MRRETURN(MATCH_NOMATCH);
3964 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3965 }
3966 break;
3967
3968 case OP_WHITESPACE:
3969 for (i = 1; i <= min; i++)
3970 {
3971 if (eptr >= md->end_subject)
3972 {
3973 SCHECK_PARTIAL();
3974 MRRETURN(MATCH_NOMATCH);
3975 }
3976 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3977 MRRETURN(MATCH_NOMATCH);
3978 /* No need to skip more bytes - we know it's a 1-byte character */
3979 }
3980 break;
3981
3982 case OP_NOT_WORDCHAR:
3983 for (i = 1; i <= min; i++)
3984 {
3985 if (eptr >= md->end_subject)
3986 {
3987 SCHECK_PARTIAL();
3988 MRRETURN(MATCH_NOMATCH);
3989 }
3990 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3991 MRRETURN(MATCH_NOMATCH);
3992 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3993 }
3994 break;
3995
3996 case OP_WORDCHAR:
3997 for (i = 1; i <= min; i++)
3998 {
3999 if (eptr >= md->end_subject)
4000 {
4001 SCHECK_PARTIAL();
4002 MRRETURN(MATCH_NOMATCH);
4003 }
4004 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
4005 MRRETURN(MATCH_NOMATCH);
4006 /* No need to skip more bytes - we know it's a 1-byte character */
4007 }
4008 break;
4009
4010 default:
4011 RRETURN(PCRE_ERROR_INTERNAL);
4012 } /* End switch(ctype) */
4013
4014 else
4015 #endif /* SUPPORT_UTF8 */
4016
4017 /* Code for the non-UTF-8 case for minimum matching of operators other
4018 than OP_PROP and OP_NOTPROP. */
4019
4020 switch(ctype)
4021 {
4022 case OP_ANY:
4023 for (i = 1; i <= min; i++)
4024 {
4025 if (eptr >= md->end_subject)
4026 {
4027 SCHECK_PARTIAL();
4028 MRRETURN(MATCH_NOMATCH);
4029 }
4030 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
4031 eptr++;
4032 }
4033 break;
4034
4035 case OP_ALLANY:
4036 if (eptr > md->end_subject - min)
4037 {
4038 SCHECK_PARTIAL();
4039 MRRETURN(MATCH_NOMATCH);
4040 }
4041 eptr += min;
4042 break;
4043
4044 case OP_ANYBYTE:
4045 if (eptr > md->end_subject - min)
4046 {
4047 SCHECK_PARTIAL();
4048 MRRETURN(MATCH_NOMATCH);
4049 }
4050 eptr += min;
4051 break;
4052
4053 case OP_ANYNL:
4054 for (i = 1; i <= min; i++)
4055 {
4056 if (eptr >= md->end_subject)
4057 {
4058 SCHECK_PARTIAL();
4059 MRRETURN(MATCH_NOMATCH);
4060 }
4061 switch(*eptr++)
4062 {
4063 default: MRRETURN(MATCH_NOMATCH);
4064 case 0x000d:
4065 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4066 break;
4067 case 0x000a:
4068 break;
4069
4070 case 0x000b:
4071 case 0x000c:
4072 case 0x0085:
4073 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4074 break;
4075 }
4076 }
4077 break;
4078
4079 case OP_NOT_HSPACE:
4080 for (i = 1; i <= min; i++)
4081 {
4082 if (eptr >= md->end_subject)
4083 {
4084 SCHECK_PARTIAL();
4085 MRRETURN(MATCH_NOMATCH);
4086 }
4087 switch(*eptr++)
4088 {
4089 default: break;
4090 case 0x09: /* HT */
4091 case 0x20: /* SPACE */
4092 case 0xa0: /* NBSP */
4093 MRRETURN(MATCH_NOMATCH);
4094 }
4095 }
4096 break;
4097
4098 case OP_HSPACE:
4099 for (i = 1; i <= min; i++)
4100 {
4101 if (eptr >= md->end_subject)
4102 {
4103 SCHECK_PARTIAL();
4104 MRRETURN(MATCH_NOMATCH);
4105 }
4106 switch(*eptr++)
4107 {
4108 default: MRRETURN(MATCH_NOMATCH);
4109 case 0x09: /* HT */
4110 case 0x20: /* SPACE */
4111 case 0xa0: /* NBSP */
4112 break;
4113 }
4114 }
4115 break;
4116
4117 case OP_NOT_VSPACE:
4118 for (i = 1; i <= min; i++)
4119 {
4120 if (eptr >= md->end_subject)
4121 {
4122 SCHECK_PARTIAL();
4123 MRRETURN(MATCH_NOMATCH);
4124 }
4125 switch(*eptr++)
4126 {
4127 default: break;
4128 case 0x0a: /* LF */
4129 case 0x0b: /* VT */
4130 case 0x0c: /* FF */
4131 case 0x0d: /* CR */
4132 case 0x85: /* NEL */
4133 MRRETURN(MATCH_NOMATCH);
4134 }
4135 }
4136 break;
4137
4138 case OP_VSPACE:
4139 for (i = 1; i <= min; i++)
4140 {
4141 if (eptr >= md->end_subject)
4142 {
4143 SCHECK_PARTIAL();
4144 MRRETURN(MATCH_NOMATCH);
4145 }
4146 switch(*eptr++)
4147 {
4148 default: MRRETURN(MATCH_NOMATCH);
4149 case 0x0a: /* LF */
4150 case 0x0b: /* VT */
4151 case 0x0c: /* FF */
4152 case 0x0d: /* CR */
4153 case 0x85: /* NEL */
4154 break;
4155 }
4156 }
4157 break;
4158
4159 case OP_NOT_DIGIT:
4160 for (i = 1; i <= min; i++)
4161 {
4162 if (eptr >= md->end_subject)
4163 {
4164 SCHECK_PARTIAL();
4165 MRRETURN(MATCH_NOMATCH);
4166 }
4167 if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4168 }
4169 break;
4170
4171 case OP_DIGIT:
4172 for (i = 1; i <= min; i++)
4173 {
4174 if (eptr >= md->end_subject)
4175 {
4176 SCHECK_PARTIAL();
4177 MRRETURN(MATCH_NOMATCH);
4178 }
4179 if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4180 }
4181 break;
4182
4183 case OP_NOT_WHITESPACE:
4184 for (i = 1; i <= min; i++)
4185 {
4186 if (eptr >= md->end_subject)
4187 {
4188 SCHECK_PARTIAL();
4189 MRRETURN(MATCH_NOMATCH);
4190 }
4191 if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4192 }
4193 break;
4194
4195 case OP_WHITESPACE:
4196 for (i = 1; i <= min; i++)
4197 {
4198 if (eptr >= md->end_subject)
4199 {
4200 SCHECK_PARTIAL();
4201 MRRETURN(MATCH_NOMATCH);
4202 }
4203 if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4204 }
4205 break;
4206
4207 case OP_NOT_WORDCHAR:
4208 for (i = 1; i <= min; i++)
4209 {
4210 if (eptr >= md->end_subject)
4211 {
4212 SCHECK_PARTIAL();
4213 MRRETURN(MATCH_NOMATCH);
4214 }
4215 if ((md->ctypes[*eptr++] & ctype_word) != 0)
4216 MRRETURN(MATCH_NOMATCH);
4217 }
4218 break;
4219
4220 case OP_WORDCHAR:
4221 for (i = 1; i <= min; i++)
4222 {
4223 if (eptr >= md->end_subject)
4224 {
4225 SCHECK_PARTIAL();
4226 MRRETURN(MATCH_NOMATCH);
4227 }
4228 if ((md->ctypes[*eptr++] & ctype_word) == 0)
4229 MRRETURN(MATCH_NOMATCH);
4230 }
4231 break;
4232
4233 default:
4234 RRETURN(PCRE_ERROR_INTERNAL);
4235 }
4236 }
4237
4238 /* If min = max, continue at the same level without recursing */
4239
4240 if (min == max) continue;
4241
4242 /* If minimizing, we have to test the rest of the pattern before each
4243 subsequent match. Again, separate the UTF-8 case for speed, and also
4244 separate the UCP cases. */
4245
4246 if (minimize)
4247 {
4248 #ifdef SUPPORT_UCP
4249 if (prop_type >= 0)
4250 {
4251 switch(prop_type)
4252 {
4253 case PT_ANY:
4254 for (fi = min;; fi++)
4255 {
4256 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
4257 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4258 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4259 if (eptr >= md->end_subject)
4260 {
4261 SCHECK_PARTIAL();
4262 MRRETURN(MATCH_NOMATCH);
4263 }
4264 GETCHARINCTEST(c, eptr);
4265 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4266 }
4267 /* Control never gets here */
4268
4269 case PT_LAMP:
4270 for (fi = min;; fi++)
4271 {
4272 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
4273 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4274 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4275 if (eptr >= md->end_subject)
4276 {
4277 SCHECK_PARTIAL();
4278 MRRETURN(MATCH_NOMATCH);
4279 }
4280 GETCHARINCTEST(c, eptr);
4281 prop_chartype = UCD_CHARTYPE(c);
4282 if ((prop_chartype == ucp_Lu ||
4283 prop_chartype == ucp_Ll ||
4284 prop_chartype == ucp_Lt) == prop_fail_result)
4285 MRRETURN(MATCH_NOMATCH);
4286 }
4287 /* Control never gets here */
4288
4289 case PT_GC:
4290 for (fi = min;; fi++)
4291 {
4292 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
4293 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4294 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4295 if (eptr >= md->end_subject)
4296 {
4297 SCHECK_PARTIAL();
4298 MRRETURN(MATCH_NOMATCH);
4299 }
4300 GETCHARINCTEST(c, eptr);
4301 prop_category = UCD_CATEGORY(c);
4302 if ((prop_category == prop_value) == prop_fail_result)
4303 MRRETURN(MATCH_NOMATCH);
4304 }
4305 /* Control never gets here */
4306
4307 case PT_PC:
4308 for (fi = min;; fi++)
4309 {
4310 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4311 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4312 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4313 if (eptr >= md->end_subject)
4314 {
4315 SCHECK_PARTIAL();
4316 MRRETURN(MATCH_NOMATCH);
4317 }
4318 GETCHARINCTEST(c, eptr);
4319 prop_chartype = UCD_CHARTYPE(c);
4320 if ((prop_chartype == prop_value) == prop_fail_result)
4321 MRRETURN(MATCH_NOMATCH);
4322 }
4323 /* Control never gets here */
4324
4325 case PT_SC:
4326 for (fi = min;; fi++)
4327 {
4328 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
4329 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4330 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4331 if (eptr >= md->end_subject)
4332 {
4333 SCHECK_PARTIAL();
4334 MRRETURN(MATCH_NOMATCH);
4335 }
4336 GETCHARINCTEST(c, eptr);
4337 prop_script = UCD_SCRIPT(c);
4338 if ((prop_script == prop_value) == prop_fail_result)
4339 MRRETURN(MATCH_NOMATCH);
4340 }
4341 /* Control never gets here */
4342
4343 case PT_ALNUM:
4344 for (fi = min;; fi++)
4345 {
4346 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
4347 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4348 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4349 if (eptr >= md->end_subject)
4350 {
4351 SCHECK_PARTIAL();
4352 MRRETURN(MATCH_NOMATCH);
4353 }
4354 GETCHARINCTEST(c, eptr);
4355 prop_category = UCD_CATEGORY(c);
4356 if ((prop_category == ucp_L || prop_category == ucp_N)
4357 == prop_fail_result)
4358 MRRETURN(MATCH_NOMATCH);
4359 }
4360 /* Control never gets here */
4361
4362 case PT_SPACE: /* Perl space */
4363 for (fi = min;; fi++)
4364 {
4365 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
4366 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4367 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4368 if (eptr >= md->end_subject)
4369 {
4370 SCHECK_PARTIAL();
4371 MRRETURN(MATCH_NOMATCH);
4372 }
4373 GETCHARINCTEST(c, eptr);
4374 prop_category = UCD_CATEGORY(c);
4375 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4376 c == CHAR_FF || c == CHAR_CR)
4377 == prop_fail_result)
4378 MRRETURN(MATCH_NOMATCH);
4379 }
4380 /* Control never gets here */
4381
4382 case PT_PXSPACE: /* POSIX space */
4383 for (fi = min;; fi++)
4384 {
4385 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
4386 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4387 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4388 if (eptr >= md->end_subject)
4389 {
4390 SCHECK_PARTIAL();
4391 MRRETURN(MATCH_NOMATCH);
4392 }
4393 GETCHARINCTEST(c, eptr);
4394 prop_category = UCD_CATEGORY(c);
4395 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4396 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4397 == prop_fail_result)
4398 MRRETURN(MATCH_NOMATCH);
4399 }
4400 /* Control never gets here */
4401
4402 case PT_WORD:
4403 for (fi = min;; fi++)
4404 {
4405 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
4406 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4407 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4408 if (eptr >= md->end_subject)
4409 {
4410 SCHECK_PARTIAL();
4411 MRRETURN(MATCH_NOMATCH);
4412 }
4413 GETCHARINCTEST(c, eptr);
4414 prop_category = UCD_CATEGORY(c);
4415 if ((prop_category == ucp_L ||
4416 prop_category == ucp_N ||
4417 c == CHAR_UNDERSCORE)
4418 == prop_fail_result)
4419 MRRETURN(MATCH_NOMATCH);
4420 }
4421 /* Control never gets here */
4422
4423 /* This should never occur */
4424
4425 default:
4426 RRETURN(PCRE_ERROR_INTERNAL);
4427 }
4428 }
4429
4430 /* Match extended Unicode sequences. We will get here only if the
4431 support is in the binary; otherwise a compile-time error occurs. */
4432
4433 else if (ctype == OP_EXTUNI)
4434 {
4435 for (fi = min;; fi++)
4436 {
4437 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
4438 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4439 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4440 if (eptr >= md->end_subject)
4441 {
4442 SCHECK_PARTIAL();
4443 MRRETURN(MATCH_NOMATCH);
4444 }
4445 GETCHARINCTEST(c, eptr);
4446 prop_category = UCD_CATEGORY(c);
4447 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
4448 while (eptr < md->end_subject)
4449 {
4450 int len = 1;
4451 if (!utf8) c = *eptr;
4452 else { GETCHARLEN(c, eptr, len); }
4453 prop_category = UCD_CATEGORY(c);
4454 if (prop_category != ucp_M) break;
4455 eptr += len;
4456 }
4457 }
4458 }
4459
4460 else
4461 #endif /* SUPPORT_UCP */
4462
4463 #ifdef SUPPORT_UTF8
4464 /* UTF-8 mode */
4465 if (utf8)
4466 {
4467 for (fi = min;; fi++)
4468 {
4469 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
4470 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4471 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4472 if (eptr >= md->end_subject)
4473 {
4474 SCHECK_PARTIAL();
4475 MRRETURN(MATCH_NOMATCH);
4476 }
4477 if (ctype == OP_ANY && IS_NEWLINE(eptr))
4478 MRRETURN(MATCH_NOMATCH);
4479 GETCHARINC(c, eptr);
4480 switch(ctype)
4481 {
4482 case OP_ANY: /* This is the non-NL case */
4483 case OP_ALLANY:
4484 case OP_ANYBYTE:
4485 break;
4486
4487 case OP_ANYNL:
4488 switch(c)
4489 {
4490 default: MRRETURN(MATCH_NOMATCH);
4491 case 0x000d:
4492 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4493 break;
4494 case 0x000a:
4495 break;
4496
4497 case 0x000b:
4498 case 0x000c:
4499 case 0x0085:
4500 case 0x2028:
4501 case 0x2029:
4502 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4503 break;
4504 }
4505 break;
4506
4507 case OP_NOT_HSPACE:
4508 switch(c)
4509 {
4510 default: break;
4511 case 0x09: /* HT */
4512 case 0x20: /* SPACE */
4513 case 0xa0: /* NBSP */
4514 case 0x1680: /* OGHAM SPACE MARK */
4515 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4516 case 0x2000: /* EN QUAD */
4517 case 0x2001: /* EM QUAD */
4518 case 0x2002: /* EN SPACE */
4519 case 0x2003: /* EM SPACE */
4520 case 0x2004: /* THREE-PER-EM SPACE */
4521 case 0x2005: /* FOUR-PER-EM SPACE */
4522 case 0x2006: /* SIX-PER-EM SPACE */
4523 case 0x2007: /* FIGURE SPACE */
4524 case 0x2008: /* PUNCTUATION SPACE */
4525 case 0x2009: /* THIN SPACE */
4526 case 0x200A: /* HAIR SPACE */
4527 case 0x202f: /* NARROW NO-BREAK SPACE */
4528 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4529 case 0x3000: /* IDEOGRAPHIC SPACE */
4530 MRRETURN(MATCH_NOMATCH);
4531 }
4532 break;
4533
4534 case OP_HSPACE:
4535 switch(c)
4536 {
4537 default: MRRETURN(MATCH_NOMATCH);
4538 case 0x09: /* HT */
4539 case 0x20: /* SPACE */
4540 case 0xa0: /* NBSP */
4541 case 0x1680: /* OGHAM SPACE MARK */
4542 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4543 case 0x2000: /* EN QUAD */
4544 case 0x2001: /* EM QUAD */
4545 case 0x2002: /* EN SPACE */
4546 case 0x2003: /* EM SPACE */
4547 case 0x2004: /* THREE-PER-EM SPACE */
4548 case 0x2005: /* FOUR-PER-EM SPACE */
4549 case 0x2006: /* SIX-PER-EM SPACE */
4550 case 0x2007: /* FIGURE SPACE */
4551 case 0x2008: /* PUNCTUATION SPACE */
4552 case 0x2009: /* THIN SPACE */
4553 case 0x200A: /* HAIR SPACE */
4554 case 0x202f: /* NARROW NO-BREAK SPACE */
4555 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4556 case 0x3000: /* IDEOGRAPHIC SPACE */
4557 break;
4558 }
4559 break;
4560
4561 case OP_NOT_VSPACE:
4562 switch(c)
4563 {
4564 default: break;
4565 case 0x0a: /* LF */
4566 case 0x0b: /* VT */
4567 case 0x0c: /* FF */
4568 case 0x0d: /* CR */
4569 case 0x85: /* NEL */
4570 case 0x2028: /* LINE SEPARATOR */
4571 case 0x2029: /* PARAGRAPH SEPARATOR */
4572 MRRETURN(MATCH_NOMATCH);
4573 }
4574 break;
4575
4576 case OP_VSPACE:
4577 switch(c)
4578 {
4579 default: MRRETURN(MATCH_NOMATCH);
4580 case 0x0a: /* LF */
4581 case 0x0b: /* VT */
4582 case 0x0c: /* FF */
4583 case 0x0d: /* CR */
4584 case 0x85: /* NEL */
4585 case 0x2028: /* LINE SEPARATOR */
4586 case 0x2029: /* PARAGRAPH SEPARATOR */
4587 break;
4588 }
4589 break;
4590
4591 case OP_NOT_DIGIT:
4592 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
4593 MRRETURN(MATCH_NOMATCH);
4594 break;
4595
4596 case OP_DIGIT:
4597 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
4598 MRRETURN(MATCH_NOMATCH);
4599 break;
4600
4601 case OP_NOT_WHITESPACE:
4602 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
4603 MRRETURN(MATCH_NOMATCH);
4604 break;
4605
4606 case OP_WHITESPACE:
4607 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
4608 MRRETURN(MATCH_NOMATCH);
4609 break;
4610
4611 case OP_NOT_WORDCHAR:
4612 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
4613 MRRETURN(MATCH_NOMATCH);
4614 break;
4615
4616 case OP_WORDCHAR:
4617 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
4618 MRRETURN(MATCH_NOMATCH);
4619 break;
4620
4621 default:
4622 RRETURN(PCRE_ERROR_INTERNAL);
4623 }
4624 }
4625 }
4626 else
4627 #endif
4628 /* Not UTF-8 mode */
4629 {
4630 for (fi = min;; fi++)
4631 {
4632 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4633 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4634 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4635 if (eptr >= md->end_subject)
4636 {
4637 SCHECK_PARTIAL();
4638 MRRETURN(MATCH_NOMATCH);
4639 }
4640 if (ctype == OP_ANY && IS_NEWLINE(eptr))
4641 MRRETURN(MATCH_NOMATCH);
4642 c = *eptr++;
4643 switch(ctype)
4644 {
4645 case OP_ANY: /* This is the non-NL case */
4646 case OP_ALLANY:
4647 case OP_ANYBYTE:
4648 break;
4649
4650 case OP_ANYNL:
4651 switch(c)
4652 {
4653 default: MRRETURN(MATCH_NOMATCH);
4654 case 0x000d:
4655 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4656 break;
4657
4658 case 0x000a:
4659 break;
4660
4661 case 0x000b:
4662 case 0x000c:
4663 case 0x0085:
4664 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4665 break;
4666 }
4667 break;
4668
4669 case OP_NOT_HSPACE:
4670 switch(c)
4671 {
4672 default: break;
4673 case 0x09: /* HT */
4674 case 0x20: /* SPACE */
4675 case 0xa0: /* NBSP */
4676 MRRETURN(MATCH_NOMATCH);
4677 }
4678 break;
4679
4680 case OP_HSPACE:
4681 switch(c)
4682 {
4683 default: MRRETURN(MATCH_NOMATCH);
4684 case 0x09: /* HT */
4685 case 0x20: /* SPACE */
4686 case 0xa0: /* NBSP */
4687 break;
4688 }
4689 break;
4690
4691 case OP_NOT_VSPACE:
4692 switch(c)
4693 {
4694 default: break;
4695 case 0x0a: /* LF */
4696 case 0x0b: /* VT */
4697 case 0x0c: /* FF */
4698 case 0x0d: /* CR */
4699 case 0x85: /* NEL */
4700 MRRETURN(MATCH_NOMATCH);
4701 }
4702 break;
4703
4704 case OP_VSPACE:
4705 switch(c)
4706 {
4707 default: MRRETURN(MATCH_NOMATCH);
4708 case 0x0a: /* LF */
4709 case 0x0b: /* VT */
4710 case 0x0c: /* FF */
4711 case 0x0d: /* CR */
4712 case 0x85: /* NEL */
4713 break;
4714 }
4715 break;
4716
4717 case OP_NOT_DIGIT:
4718 if ((md->ctypes[c] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4719 break;
4720
4721 case OP_DIGIT:
4722 if ((md->ctypes[c] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4723 break;
4724
4725 case OP_NOT_WHITESPACE:
4726 if ((md->ctypes[c] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4727 break;
4728
4729 case OP_WHITESPACE:
4730 if ((md->ctypes[c] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4731 break;
4732
4733 case OP_NOT_WORDCHAR:
4734 if ((md->ctypes[c] & ctype_word) != 0) MRRETURN(MATCH_NOMATCH);
4735 break;
4736
4737 case OP_WORDCHAR:
4738 if ((md->ctypes[c] & ctype_word) == 0) MRRETURN(MATCH_NOMATCH);
4739 break;
4740
4741 default:
4742 RRETURN(PCRE_ERROR_INTERNAL);
4743 }
4744 }
4745 }
4746 /* Control never gets here */
4747 }
4748
4749 /* If maximizing, it is worth using inline code for speed, doing the type
4750 test once at the start (i.e. keep it out of the loop). Again, keep the
4751 UTF-8 and UCP stuff separate. */
4752
4753 else
4754 {
4755 pp = eptr; /* Remember where we started */
4756
4757 #ifdef SUPPORT_UCP
4758 if (prop_type >= 0)
4759 {
4760 switch(prop_type)
4761 {
4762 case PT_ANY:
4763 for (i = min; i < max; i++)
4764 {
4765 int len = 1;
4766 if (eptr >= md->end_subject)
4767 {
4768 SCHECK_PARTIAL();
4769 break;
4770 }
4771 GETCHARLENTEST(c, eptr, len);
4772 if (prop_fail_result) break;
4773 eptr+= len;
4774 }
4775 break;
4776
4777 case PT_LAMP:
4778 for (i = min; i < max; i++)
4779 {
4780 int len = 1;
4781 if (eptr >= md->end_subject)
4782 {
4783 SCHECK_PARTIAL();
4784 break;
4785 }
4786 GETCHARLENTEST(c, eptr, len);
4787 prop_chartype = UCD_CHARTYPE(c);
4788 if ((prop_chartype == ucp_Lu ||
4789 prop_chartype == ucp_Ll ||
4790 prop_chartype == ucp_Lt) == prop_fail_result)
4791 break;
4792 eptr+= len;
4793 }
4794 break;
4795
4796 case PT_GC:
4797 for (i = min; i < max; i++)
4798 {
4799 int len = 1;
4800 if (eptr >= md->end_subject)
4801 {
4802 SCHECK_PARTIAL();
4803 break;
4804 }
4805 GETCHARLENTEST(c, eptr, len);
4806 prop_category = UCD_CATEGORY(c);
4807 if ((prop_category == prop_value) == prop_fail_result)
4808 break;
4809 eptr+= len;
4810 }
4811 break;
4812
4813 case PT_PC:
4814 for (i = min; i < max; i++)
4815 {
4816 int len = 1;
4817 if (eptr >= md->end_subject)
4818 {
4819 SCHECK_PARTIAL();
4820 break;
4821 }
4822 GETCHARLENTEST(c, eptr, len);
4823 prop_chartype = UCD_CHARTYPE(c);
4824 if ((prop_chartype == prop_value) == prop_fail_result)
4825 break;
4826 eptr+= len;
4827 }
4828 break;
4829
4830 case PT_SC:
4831 for (i = min; i < max; i++)
4832 {
4833 int len = 1;
4834 if (eptr >= md->end_subject)
4835 {
4836 SCHECK_PARTIAL();
4837 break;
4838 }
4839 GETCHARLENTEST(c, eptr, len);
4840 prop_script = UCD_SCRIPT(c);
4841 if ((prop_script == prop_value) == prop_fail_result)
4842 break;
4843 eptr+= len;
4844 }
4845 break;
4846
4847 case PT_ALNUM:
4848 for (i = min; i < max; i++)
4849 {
4850 int len = 1;
4851 if (eptr >= md->end_subject)
4852 {
4853 SCHECK_PARTIAL();
4854 break;
4855 }
4856 GETCHARLENTEST(c, eptr, len);
4857 prop_category = UCD_CATEGORY(c);
4858 if ((prop_category == ucp_L || prop_category == ucp_N)
4859 == prop_fail_result)
4860 break;
4861 eptr+= len;
4862 }
4863 break;
4864
4865 case PT_SPACE: /* Perl space */
4866 for (i = min; i < max; i++)
4867 {
4868 int len = 1;
4869 if (eptr >= md->end_subject)
4870 {
4871 SCHECK_PARTIAL();
4872 break;
4873 }
4874 GETCHARLENTEST(c, eptr, len);
4875 prop_category = UCD_CATEGORY(c);
4876 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4877 c == CHAR_FF || c == CHAR_CR)
4878 == prop_fail_result)
4879 break;
4880 eptr+= len;
4881 }
4882 break;
4883
4884 case PT_PXSPACE: /* POSIX space */
4885 for (i = min; i < max; i++)
4886 {
4887 int len = 1;
4888 if (eptr >= md->end_subject)
4889 {
4890 SCHECK_PARTIAL();
4891 break;
4892 }
4893 GETCHARLENTEST(c, eptr, len);
4894 prop_category = UCD_CATEGORY(c);
4895 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4896 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4897 == prop_fail_result)
4898 break;
4899 eptr+= len;
4900 }
4901 break;
4902
4903 case PT_WORD:
4904 for (i = min; i < max; i++)
4905 {
4906 int len = 1;
4907 if (eptr >= md->end_subject)
4908 {
4909 SCHECK_PARTIAL();
4910 break;
4911 }
4912 GETCHARLENTEST(c, eptr, len);
4913 prop_category = UCD_CATEGORY(c);
4914 if ((prop_category == ucp_L || prop_category == ucp_N ||
4915 c == CHAR_UNDERSCORE) == prop_fail_result)
4916 break;
4917 eptr+= len;
4918 }
4919 break;
4920
4921 default:
4922 RRETURN(PCRE_ERROR_INTERNAL);
4923 }
4924
4925 /* eptr is now past the end of the maximum run */
4926
4927 if (possessive) continue;
4928 for(;;)
4929 {
4930 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
4931 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4932 if (eptr-- == pp) break; /* Stop if tried at original pos */
4933 if (utf8) BACKCHAR(eptr);
4934 }
4935 }
4936
4937 /* Match extended Unicode sequences. We will get here only if the
4938 support is in the binary; otherwise a compile-time error occurs. */
4939
4940 else if (ctype == OP_EXTUNI)
4941 {
4942 for (i = min; i < max; i++)
4943 {
4944 if (eptr >= md->end_subject)
4945 {
4946 SCHECK_PARTIAL();
4947 break;
4948 }
4949 GETCHARINCTEST(c, eptr);
4950 prop_category = UCD_CATEGORY(c);
4951 if (prop_category == ucp_M) break;
4952 while (eptr < md->end_subject)
4953 {
4954 int len = 1;
4955 if (!utf8) c = *eptr; else
4956 {
4957 GETCHARLEN(c, eptr, len);
4958 }
4959 prop_category = UCD_CATEGORY(c);
4960 if (prop_category != ucp_M) break;
4961 eptr += len;
4962 }
4963 }
4964
4965 /* eptr is now past the end of the maximum run */
4966
4967 if (possessive) continue;
4968
4969 for(;;)
4970 {
4971 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
4972 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4973 if (eptr-- == pp) break; /* Stop if tried at original pos */
4974 for (;;) /* Move back over one extended */
4975 {
4976 int len = 1;
4977 if (!utf8) c = *eptr; else
4978 {
4979 BACKCHAR(eptr);
4980 GETCHARLEN(c, eptr, len);
4981 }
4982 prop_category = UCD_CATEGORY(c);
4983 if (prop_category != ucp_M) break;
4984 eptr--;
4985 }
4986 }
4987 }
4988
4989 else
4990 #endif /* SUPPORT_UCP */
4991
4992 #ifdef SUPPORT_UTF8
4993 /* UTF-8 mode */
4994
4995 if (utf8)
4996 {
4997 switch(ctype)
4998 {
4999 case OP_ANY:
5000 if (max < INT_MAX)
5001 {
5002 for (i = min; i < max; i++)
5003 {
5004 if (eptr >= md->end_subject)
5005 {
5006 SCHECK_PARTIAL();
5007 break;
5008 }
5009 if (IS_NEWLINE(eptr)) break;
5010 eptr++;
5011 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5012 }
5013 }
5014
5015 /* Handle unlimited UTF-8 repeat */
5016
5017 else
5018 {
5019 for (i = min; i < max; i++)
5020 {
5021 if (eptr >= md->end_subject)
5022 {
5023 SCHECK_PARTIAL();
5024 break;
5025 }
5026 if (IS_NEWLINE(eptr)) break;
5027 eptr++;
5028 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5029 }
5030 }
5031 break;
5032
5033 case OP_ALLANY:
5034 if (max < INT_MAX)
5035 {
5036 for (i = min; i < max; i++)
5037 {
5038 if (eptr >= md->end_subject)
5039 {
5040 SCHECK_PARTIAL();
5041 break;
5042 }
5043 eptr++;
5044 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5045 }
5046 }
5047 else eptr = md->end_subject; /* Unlimited UTF-8 repeat */
5048 break;
5049
5050 /* The byte case is the same as non-UTF8 */
5051
5052 case OP_ANYBYTE:
5053 c = max - min;
5054 if (c > (unsigned int)(md->end_subject - eptr))
5055 {
5056 eptr = md->end_subject;
5057 SCHECK_PARTIAL();
5058 }
5059 else eptr += c;
5060 break;
5061
5062 case OP_ANYNL:
5063 for (i = min; i < max; i++)
5064 {
5065 int len = 1;
5066 if (eptr >= md->end_subject)
5067 {
5068 SCHECK_PARTIAL();
5069 break;
5070 }
5071 GETCHARLEN(c, eptr, len);
5072 if (c == 0x000d)
5073 {
5074 if (++eptr >= md->end_subject) break;
5075 if (*eptr == 0x000a) eptr++;
5076 }
5077 else
5078 {
5079 if (c != 0x000a &&
5080 (md->bsr_anycrlf ||
5081 (c != 0x000b && c != 0x000c &&
5082 c != 0x0085 && c != 0x2028 && c != 0x2029)))
5083 break;
5084 eptr += len;
5085 }
5086 }
5087 break;
5088
5089 case OP_NOT_HSPACE:
5090 case OP_HSPACE:
5091 for (i = min; i < max; i++)
5092 {
5093 BOOL gotspace;
5094 int len = 1;
5095 if (eptr >= md->end_subject)
5096 {
5097 SCHECK_PARTIAL();
5098 break;
5099 }
5100 GETCHARLEN(c, eptr, len);
5101 switch(c)
5102 {
5103 default: gotspace = FALSE; break;
5104 case 0x09: /* HT */
5105 case 0x20: /* SPACE */
5106 case 0xa0: /* NBSP */
5107 case 0x1680: /* OGHAM SPACE MARK */
5108 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5109 case 0x2000: /* EN QUAD */
5110 case 0x2001: /* EM QUAD */
5111 case 0x2002: /* EN SPACE */
5112 case 0x2003: /* EM SPACE */
5113 case 0x2004: /* THREE-PER-EM SPACE */
5114 case 0x2005: /* FOUR-PER-EM SPACE */
5115 case 0x2006: /* SIX-PER-EM SPACE */
5116 case 0x2007: /* FIGURE SPACE */
5117 case 0x2008: /* PUNCTUATION SPACE */
5118 case 0x2009: /* THIN SPACE */
5119 case 0x200A: /* HAIR SPACE */
5120 case 0x202f: /* NARROW NO-BREAK SPACE */
5121 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5122 case 0x3000: /* IDEOGRAPHIC SPACE */
5123 gotspace = TRUE;
5124 break;
5125 }
5126 if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5127 eptr += len;
5128 }
5129 break;
5130
5131 case OP_NOT_VSPACE:
5132 case OP_VSPACE:
5133 for (i = min; i < max; i++)
5134 {
5135 BOOL gotspace;
5136 int len = 1;
5137 if (eptr >= md->end_subject)
5138 {
5139 SCHECK_PARTIAL();
5140 break;
5141 }
5142 GETCHARLEN(c, eptr, len);
5143 switch(c)
5144 {
5145 default: gotspace = FALSE; break;
5146 case 0x0a: /* LF */
5147 case 0x0b: /* VT */
5148 case 0x0c: /* FF */
5149 case 0x0d: /* CR */
5150 case 0x85: /* NEL */
5151 case 0x2028: /* LINE SEPARATOR */
5152 case 0x2029: /* PARAGRAPH SEPARATOR */
5153 gotspace = TRUE;
5154 break;
5155 }
5156 if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5157 eptr += len;
5158 }
5159 break;
5160
5161 case OP_NOT_DIGIT:
5162 for (i = min; i < max; i++)
5163 {
5164 int len = 1;
5165 if (eptr >= md->end_subject)
5166 {
5167 SCHECK_PARTIAL();
5168 break;
5169 }
5170 GETCHARLEN(c, eptr, len);
5171 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5172 eptr+= len;
5173 }
5174 break;
5175
5176 case OP_DIGIT:
5177 for (i = min; i < max; i++)
5178 {
5179 int len = 1;
5180 if (eptr >= md->end_subject)
5181 {
5182 SCHECK_PARTIAL();
5183 break;
5184 }
5185 GETCHARLEN(c, eptr, len);
5186 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5187 eptr+= len;
5188 }
5189 break;
5190
5191 case OP_NOT_WHITESPACE:
5192 for (i = min; i < max; i++)
5193 {
5194 int len = 1;
5195 if (eptr >= md->end_subject)
5196 {
5197 SCHECK_PARTIAL();
5198 break;
5199 }
5200 GETCHARLEN(c, eptr, len);
5201 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5202 eptr+= len;
5203 }
5204 break;
5205
5206 case OP_WHITESPACE:
5207 for (i = min; i < max; i++)
5208 {
5209 int len = 1;
5210 if (eptr >= md->end_subject)
5211 {
5212 SCHECK_PARTIAL();
5213 break;
5214 }
5215 GETCHARLEN(c, eptr, len);
5216 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5217 eptr+= len;
5218 }
5219 break;
5220
5221 case OP_NOT_WORDCHAR:
5222 for (i = min; i < max; i++)
5223 {
5224 int len = 1;
5225 if (eptr >= md->end_subject)
5226 {
5227 SCHECK_PARTIAL();
5228 break;
5229 }
5230 GETCHARLEN(c, eptr, len);
5231 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5232 eptr+= len;
5233 }
5234 break;
5235
5236 case OP_WORDCHAR:
5237 for (i = min; i < max; i++)
5238 {
5239 int len = 1;
5240 if (eptr >= md->end_subject)
5241 {
5242 SCHECK_PARTIAL();
5243 break;
5244 }
5245 GETCHARLEN(c, eptr, len);
5246 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5247 eptr+= len;
5248 }
5249 break;
5250
5251 default:
5252 RRETURN(PCRE_ERROR_INTERNAL);
5253 }
5254
5255 /* eptr is now past the end of the maximum run */
5256
5257 if (possessive) continue;
5258 for(;;)
5259 {
5260 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
5261 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5262 if (eptr-- == pp) break; /* Stop if tried at original pos */
5263 BACKCHAR(eptr);
5264 }
5265 }
5266 else
5267 #endif /* SUPPORT_UTF8 */
5268
5269 /* Not UTF-8 mode */
5270 {
5271 switch(ctype)
5272 {
5273 case OP_ANY:
5274 for (i = min; i < max; i++)
5275 {
5276 if (eptr >= md->end_subject)
5277 {
5278 SCHECK_PARTIAL();
5279 break;
5280 }
5281 if (IS_NEWLINE(eptr)) break;
5282 eptr++;
5283 }
5284 break;
5285
5286 case OP_ALLANY:
5287 case OP_ANYBYTE:
5288 c = max - min;
5289 if (c > (unsigned int)(md->end_subject - eptr))
5290 {
5291 eptr = md->end_subject;
5292 SCHECK_PARTIAL();
5293 }
5294 else eptr += c;
5295 break;
5296
5297 case OP_ANYNL:
5298 for (i = min; i < max; i++)
5299 {
5300 if (eptr >= md->end_subject)
5301 {
5302 SCHECK_PARTIAL();
5303 break;
5304 }
5305 c = *eptr;
5306 if (c == 0x000d)
5307 {
5308 if (++eptr >= md->end_subject) break;
5309 if (*eptr == 0x000a) eptr++;
5310 }
5311 else
5312 {
5313 if (c != 0x000a &&
5314 (md->bsr_anycrlf ||
5315 (c != 0x000b && c != 0x000c && c != 0x0085)))
5316 break;
5317 eptr++;
5318 }
5319 }
5320 break;
5321
5322 case OP_NOT_HSPACE:
5323 for (i = min; i < max; i++)
5324 {
5325 if (eptr >= md->end_subject)
5326 {
5327 SCHECK_PARTIAL();
5328 break;
5329 }
5330 c = *eptr;
5331 if (c == 0x09 || c == 0x20 || c == 0xa0) break;
5332 eptr++;
5333 }
5334 break;
5335
5336 case OP_HSPACE:
5337 for (i = min; i < max; i++)
5338 {
5339 if (eptr >= md->end_subject)
5340 {
5341 SCHECK_PARTIAL();
5342 break;
5343 }
5344 c = *eptr;
5345 if (c != 0x09 && c != 0x20 && c != 0xa0) break;
5346 eptr++;
5347 }
5348 break;
5349
5350 case OP_NOT_VSPACE:
5351 for (i = min; i < max; i++)
5352 {
5353 if (eptr >= md->end_subject)
5354 {
5355 SCHECK_PARTIAL();
5356 break;
5357 }
5358 c = *eptr;
5359 if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
5360 break;
5361 eptr++;
5362 }
5363 break;
5364
5365 case OP_VSPACE:
5366 for (i = min; i < max; i++)
5367 {
5368 if (eptr >= md->end_subject)
5369 {
5370 SCHECK_PARTIAL();
5371 break;
5372 }
5373 c = *eptr;
5374 if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
5375 break;
5376 eptr++;
5377 }
5378 break;
5379
5380 case OP_NOT_DIGIT:
5381 for (i = min; i < max; i++)
5382 {
5383 if (eptr >= md->end_subject)
5384 {
5385 SCHECK_PARTIAL();
5386 break;
5387 }
5388 if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
5389 eptr++;
5390 }
5391 break;
5392
5393 case OP_DIGIT:
5394 for (i = min; i < max; i++)
5395 {
5396 if (eptr >= md->end_subject)
5397 {
5398 SCHECK_PARTIAL();
5399 break;
5400 }
5401 if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
5402 eptr++;
5403 }
5404 break;
5405
5406 case OP_NOT_WHITESPACE:
5407 for (i = min; i < max; i++)
5408 {
5409 if (eptr >= md->end_subject)
5410 {
5411 SCHECK_PARTIAL();
5412 break;
5413 }
5414 if ((md->ctypes[*eptr] & ctype_space) != 0) break;
5415 eptr++;
5416 }
5417 break;
5418
5419 case OP_WHITESPACE:
5420 for (i = min; i < max; i++)
5421 {
5422 if (eptr >= md->end_subject)
5423 {
5424 SCHECK_PARTIAL();
5425 break;
5426 }
5427 if ((md->ctypes[*eptr] & ctype_space) == 0) break;
5428 eptr++;
5429 }
5430 break;
5431
5432 case OP_NOT_WORDCHAR:
5433 for (i = min; i < max; i++)
5434 {
5435 if (eptr >= md->end_subject)
5436 {
5437 SCHECK_PARTIAL();
5438 break;
5439 }
5440 if ((md->ctypes[*eptr] & ctype_word) != 0) break;
5441 eptr++;
5442 }
5443 break;
5444
5445 case OP_WORDCHAR:
5446 for (i = min; i < max; i++)
5447 {
5448 if (eptr >= md->end_subject)
5449 {
5450 SCHECK_PARTIAL();
5451 break;
5452 }
5453 if ((md->ctypes[*eptr] & ctype_word) == 0) break;
5454 eptr++;
5455 }
5456 break;
5457
5458 default:
5459 RRETURN(PCRE_ERROR_INTERNAL);
5460 }
5461
5462 /* eptr is now past the end of the maximum run */
5463
5464 if (possessive) continue;
5465 while (eptr >= pp)
5466 {
5467 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
5468 eptr--;
5469 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5470 }
5471 }
5472
5473 /* Get here if we can't make it match with any permitted repetitions */
5474
5475 MRRETURN(MATCH_NOMATCH);
5476 }
5477 /* Control never gets here */
5478
5479 /* There's been some horrible disaster. Arrival here can only mean there is
5480 something seriously wrong in the code above or the OP_xxx definitions. */
5481
5482 default:
5483 DPRINTF(("Unknown opcode %d\n", *ecode));
5484 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
5485 }
5486
5487 /* Do not stick any code in here without much thought; it is assumed
5488 that "continue" in the code above comes out to here to repeat the main
5489 loop. */
5490
5491 } /* End of main loop */
5492 /* Control never reaches here */
5493
5494
5495 /* When compiling to use the heap rather than the stack for recursive calls to
5496 match(), the RRETURN() macro jumps here. The number that is saved in
5497 frame->Xwhere indicates which label we actually want to return to. */
5498
5499 #ifdef NO_RECURSE
5500 #define LBL(val) case val: goto L_RM##val;
5501 HEAP_RETURN:
5502 switch (frame->Xwhere)
5503 {
5504 LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
5505 LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5506 LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5507 LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5508 LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
5509 #ifdef SUPPORT_UTF8
5510 LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5511 LBL(32) LBL(34) LBL(42) LBL(46)
5512 #ifdef SUPPORT_UCP
5513 LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5514 LBL(59) LBL(60) LBL(61) LBL(62)
5515 #endif /* SUPPORT_UCP */
5516 #endif /* SUPPORT_UTF8 */
5517 default:
5518 DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
5519 return PCRE_ERROR_INTERNAL;
5520 }
5521 #undef LBL
5522 #endif /* NO_RECURSE */
5523 }
5524
5525
5526 /***************************************************************************
5527 ****************************************************************************
5528 RECURSION IN THE match() FUNCTION
5529
5530 Undefine all the macros that were defined above to handle this. */
5531
5532 #ifdef NO_RECURSE
5533 #undef eptr
5534 #undef ecode
5535 #undef mstart
5536 #undef offset_top
5537 #undef ims
5538 #undef eptrb
5539 #undef flags
5540
5541 #undef callpat
5542 #undef charptr
5543 #undef data
5544 #undef next
5545 #undef pp
5546 #undef prev
5547 #undef saved_eptr
5548
5549 #undef new_recursive
5550
5551 #undef cur_is_word
5552 #undef condition
5553 #undef prev_is_word
5554
5555 #undef original_ims
5556
5557 #undef ctype
5558 #undef length
5559 #undef max
5560 #undef min
5561 #undef number
5562 #undef offset
5563 #undef op
5564 #undef save_capture_last
5565 #undef save_offset1
5566 #undef save_offset2
5567 #undef save_offset3
5568 #undef stacksave
5569
5570 #undef newptrb
5571
5572 #endif
5573
5574 /* These two are defined as macros in both cases */
5575
5576 #undef fc
5577 #undef fi
5578
5579 /***************************************************************************
5580 ***************************************************************************/
5581
5582
5583
5584 /*************************************************
5585 * Execute a Regular Expression *
5586 *************************************************/
5587
5588 /* This function applies a compiled re to a subject string and picks out
5589 portions of the string if it matches. Two elements in the vector are set for
5590 each substring: the offsets to the start and end of the substring.
5591
5592 Arguments:
5593 argument_re points to the compiled expression
5594 extra_data points to extra data or is NULL
5595 subject points to the subject string
5596 length length of subject string (may contain binary zeros)
5597 start_offset where to start in the subject string
5598 options option bits
5599 offsets points to a vector of ints to be filled in with offsets
5600 offsetcount the number of elements in the vector
5601
5602 Returns: > 0 => success; value is the number of elements filled in
5603 = 0 => success, but offsets is not big enough
5604 -1 => failed to match
5605 < -1 => some kind of unexpected problem
5606 */
5607
5608 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
5609 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
5610 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5611 int offsetcount)
5612 {
5613 int rc, resetcount, ocount;
5614 int first_byte = -1;
5615 int req_byte = -1;
5616 int req_byte2 = -1;
5617 int newline;
5618 unsigned long int ims;
5619 BOOL using_temporary_offsets = FALSE;
5620 BOOL anchored;
5621 BOOL startline;
5622 BOOL firstline;
5623 BOOL first_byte_caseless = FALSE;
5624 BOOL req_byte_caseless = FALSE;
5625 BOOL utf8;
5626 match_data match_block;
5627 match_data *md = &match_block;
5628 const uschar *tables;
5629 const uschar *start_bits = NULL;
5630 USPTR start_match = (USPTR)subject + start_offset;
5631 USPTR end_subject;
5632 USPTR start_partial = NULL;
5633 USPTR req_byte_ptr = start_match - 1;
5634
5635 pcre_study_data internal_study;
5636 const pcre_study_data *study;
5637
5638 real_pcre internal_re;
5639 const real_pcre *external_re = (const real_pcre *)argument_re;
5640 const real_pcre *re = external_re;
5641
5642 /* Plausibility checks */
5643
5644 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
5645 if (re == NULL || subject == NULL ||
5646 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5647 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5648 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5649
5650 /* This information is for finding all the numbers associated with a given
5651 name, for condition testing. */
5652
5653 md->name_table = (uschar *)re + re->name_table_offset;
5654 md->name_count = re->name_count;
5655 md->name_entry_size = re->name_entry_size;
5656
5657 /* Fish out the optional data from the extra_data structure, first setting
5658 the default values. */
5659
5660 study = NULL;
5661 md->match_limit = MATCH_LIMIT;
5662 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
5663 md->callout_data = NULL;
5664
5665 /* The table pointer is always in native byte order. */
5666
5667 tables = external_re->tables;
5668
5669 if (extra_data != NULL)
5670 {
5671 register unsigned int flags = extra_data->flags;
5672 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
5673 study = (const pcre_study_data *)extra_data->study_data;
5674 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
5675 md->match_limit = extra_data->match_limit;
5676 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
5677 md->match_limit_recursion = extra_data->match_limit_recursion;
5678 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
5679 md->callout_data = extra_data->callout_data;
5680 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
5681 }
5682
5683 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
5684 is a feature that makes it possible to save compiled regex and re-use them
5685 in other programs later. */
5686
5687 if (tables == NULL) tables = _pcre_default_tables;
5688
5689 /* Check that the first field in the block is the magic number. If it is not,
5690 test for a regex that was compiled on a host of opposite endianness. If this is
5691 the case, flipped values are put in internal_re and internal_study if there was
5692 study data too. */
5693
5694 if (re->magic_number != MAGIC_NUMBER)
5695 {
5696 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
5697 if (re == NULL) return PCRE_ERROR_BADMAGIC;
5698 if (study != NULL) study = &internal_study;
5699 }
5700
5701 /* Set up other data */
5702
5703 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
5704 startline = (re->flags & PCRE_STARTLINE) != 0;
5705 firstline = (re->options & PCRE_FIRSTLINE) != 0;
5706
5707 /* The code starts after the real_pcre block and the capture name table. */
5708
5709 md->start_code = (const uschar *)external_re + re->name_table_offset +
5710 re->name_count * re->name_entry_size;
5711
5712 md->start_subject = (USPTR)subject;
5713 md->start_offset = start_offset;
5714 md->end_subject = md->start_subject + length;
5715 end_subject = md->end_subject;
5716
5717 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
5718 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5719 md->use_ucp = (re->options & PCRE_UCP) != 0;
5720 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5721
5722 md->notbol = (options & PCRE_NOTBOL) != 0;
5723 md->noteol = (options & PCRE_NOTEOL) != 0;
5724 md->notempty = (options & PCRE_NOTEMPTY) != 0;
5725 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
5726 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5727 ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5728 md->hitend = FALSE;
5729 md->mark = NULL; /* In case never set */
5730
5731 md->recursive = NULL; /* No recursion at top level */
5732
5733 md->lcc = tables + lcc_offset;
5734 md->ctypes = tables + ctypes_offset;
5735
5736 /* Handle different \R options. */
5737
5738 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
5739 {
5740 case 0:
5741 if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
5742 md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
5743 else
5744 #ifdef BSR_ANYCRLF
5745 md->bsr_anycrlf = TRUE;
5746 #else
5747 md->bsr_anycrlf = FALSE;
5748 #endif
5749 break;
5750
5751 case PCRE_BSR_ANYCRLF:
5752 md->bsr_anycrlf = TRUE;
5753 break;
5754
5755 case PCRE_BSR_UNICODE:
5756 md->bsr_anycrlf = FALSE;
5757 break;
5758
5759 default: return PCRE_ERROR_BADNEWLINE;
5760 }
5761
5762 /* Handle different types of newline. The three bits give eight cases. If
5763 nothing is set at run time, whatever was used at compile time applies. */
5764
5765 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
5766 (pcre_uint32)options) & PCRE_NEWLINE_BITS)
5767 {
5768 case 0: newline = NEWLINE; break; /* Compile-time default */
5769 case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
5770 case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
5771 case PCRE_NEWLINE_CR+
5772 PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
5773 case PCRE_NEWLINE_ANY: newline = -1; break;
5774 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5775 default: return PCRE_ERROR_BADNEWLINE;
5776 }
5777
5778 if (newline == -2)
5779 {
5780 md->nltype = NLTYPE_ANYCRLF;
5781 }
5782 else if (newline < 0)
5783 {
5784 md->nltype = NLTYPE_ANY;
5785 }
5786 else
5787 {
5788 md->nltype = NLTYPE_FIXED;
5789 if (newline > 255)
5790 {
5791 md->nllen = 2;
5792 md->nl[0] = (newline >> 8) & 255;
5793 md->nl[1] = newline & 255;
5794 }
5795 else
5796 {
5797 md->nllen = 1;
5798 md->nl[0] = newline;
5799 }
5800 }
5801
5802 /* Partial matching was originally supported only for a restricted set of
5803 regexes; from release 8.00 there are no restrictions, but the bits are still
5804 defined (though never set). So there's no harm in leaving this code. */
5805
5806 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5807 return PCRE_ERROR_BADPARTIAL;
5808
5809 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
5810 back the character offset. */
5811
5812 #ifdef SUPPORT_UTF8
5813 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5814 {
5815 int tb;
5816 if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0)
5817 return (tb == length && md->partial > 1)?
5818 PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5819 if (start_offset > 0 && start_offset < length)
5820 {
5821 tb = ((USPTR)subject)[start_offset] & 0xc0;
5822 if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
5823 }
5824 }
5825 #endif
5826
5827 /* The ims options can vary during the matching as a result of the presence
5828 of (?ims) items in the pattern. They are kept in a local variable so that
5829 restoring at the exit of a group is easy. */
5830
5831 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
5832
5833 /* If the expression has got more back references than the offsets supplied can
5834 hold, we get a temporary chunk of working store to use during the matching.
5835 Otherwise, we can use the vector supplied, rounding down its size to a multiple
5836 of 3. */
5837
5838 ocount = offsetcount - (offsetcount % 3);
5839
5840 if (re->top_backref > 0 && re->top_backref >= ocount/3)
5841 {
5842 ocount = re->top_backref * 3 + 3;
5843 md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
5844 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
5845 using_temporary_offsets = TRUE;
5846 DPRINTF(("Got memory to hold back references\n"));
5847 }
5848 else md->offset_vector = offsets;
5849
5850 md->offset_end = ocount;
5851 md->offset_max = (2*ocount)/3;
5852 md->offset_overflow = FALSE;
5853 md->capture_last = -1;
5854
5855 /* Compute the minimum number of offsets that we need to reset each time. Doing
5856 this makes a huge difference to execution time when there aren't many brackets
5857 in the pattern. */
5858
5859 resetcount = 2 + re->top_bracket * 2;
5860 if (resetcount > offsetcount) resetcount = ocount;
5861
5862 /* Reset the working variable associated with each extraction. These should
5863 never be used unless previously set, but they get saved and restored, and so we
5864 initialize them to avoid reading uninitialized locations. */
5865
5866 if (md->offset_vector != NULL)
5867 {
5868 register int *iptr = md->offset_vector + ocount;
5869 register int *iend = iptr - resetcount/2 + 1;
5870 while (--iptr >= iend) *iptr = -1;
5871 }
5872
5873 /* Set up the first character to match, if available. The first_byte value is
5874 never set for an anchored regular expression, but the anchoring may be forced
5875 at run time, so we have to test for anchoring. The first char may be unset for
5876 an unanchored pattern, of course. If there's no first char and the pattern was
5877 studied, there may be a bitmap of possible first characters. */
5878
5879 if (!anchored)
5880 {
5881 if ((re->flags & PCRE_FIRSTSET) != 0)
5882 {
5883 first_byte = re->first_byte & 255;
5884 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
5885 first_byte = md->lcc[first_byte];
5886 }
5887 else
5888 if (!startline && study != NULL &&
5889 (study->flags & PCRE_STUDY_MAPPED) != 0)
5890 start_bits = study->start_bits;
5891 }
5892
5893 /* For anchored or unanchored matches, there may be a "last known required
5894 character" set. */
5895
5896 if ((re->flags & PCRE_REQCHSET) != 0)
5897 {
5898 req_byte = re->req_byte & 255;
5899 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
5900 req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
5901 }
5902
5903
5904 /* ==========================================================================*/
5905
5906 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
5907 the loop runs just once. */
5908
5909 for(;;)
5910 {
5911 USPTR save_end_subject = end_subject;
5912 USPTR new_start_match;
5913
5914 /* Reset the maximum number of extractions we might see. */
5915
5916 if (md->offset_vector != NULL)
5917 {
5918 register int *iptr = md->offset_vector;
5919 register int *iend = iptr + resetcount;
5920 while (iptr < iend) *iptr++ = -1;
5921 }
5922
5923 /* If firstline is TRUE, the start of the match is constrained to the first
5924 line of a multiline string. That is, the match must be before or at the first
5925 newline. Implement this by temporarily adjusting end_subject so that we stop
5926 scanning at a newline. If the match fails at the newline, later code breaks
5927 this loop. */
5928
5929 if (firstline)
5930 {
5931 USPTR t = start_match;
5932 #ifdef SUPPORT_UTF8
5933 if (utf8)
5934 {
5935 while (t < md->end_subject && !IS_NEWLINE(t))
5936 {
5937 t++;
5938 while (t < end_subject && (*t & 0xc0) == 0x80) t++;
5939 }
5940 }
5941 else
5942 #endif
5943 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
5944 end_subject = t;
5945 }
5946
5947 /* There are some optimizations that avoid running the match if a known
5948 starting point is not found, or if a known later character is not present.
5949 However, there is an option that disables these, for testing and for ensuring
5950 that all callouts do actually occur. The option can be set in the regex by
5951 (*NO_START_OPT) or passed in match-time options. */
5952
5953 if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
5954 {
5955 /* Advance to a unique first byte if there is one. */
5956
5957 if (first_byte >= 0)
5958 {
5959 if (first_byte_caseless)
5960 while (start_match < end_subject && md->lcc[*start_match] != first_byte)
5961 start_match++;
5962 else
5963 while (start_match < end_subject && *start_match != first_byte)
5964 start_match++;
5965 }
5966
5967 /* Or to just after a linebreak for a multiline match */
5968
5969 else if (startline)
5970 {
5971 if (start_match > md->start_subject + start_offset)
5972 {
5973 #ifdef SUPPORT_UTF8
5974 if (utf8)
5975 {
5976 while (start_match < end_subject && !WAS_NEWLINE(start_match))
5977 {
5978 start_match++;
5979 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5980 start_match++;
5981 }
5982 }
5983 else
5984 #endif
5985 while (start_match < end_subject && !WAS_NEWLINE(start_match))
5986 start_match++;
5987
5988 /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
5989 and we are now at a LF, advance the match position by one more character.
5990 */
5991
5992 if (start_match[-1] == CHAR_CR &&
5993 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
5994 start_match < end_subject &&
5995 *start_match == CHAR_NL)
5996 start_match++;
5997 }
5998 }
5999
6000 /* Or to a non-unique first byte after study */
6001
6002 else if (start_bits != NULL)
6003 {
6004 while (start_match < end_subject)
6005 {
6006 register unsigned int c = *start_match;
6007 if ((start_bits[c/8] & (1 << (c&7))) == 0)
6008 {
6009 start_match++;
6010 #ifdef SUPPORT_UTF8
6011 if (utf8)
6012 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
6013 start_match++;
6014 #endif
6015 }
6016 else break;
6017 }
6018 }
6019 } /* Starting optimizations */
6020
6021 /* Restore fudged end_subject */
6022
6023 end_subject = save_end_subject;
6024
6025 /* The following two optimizations are disabled for partial matching or if
6026 disabling is explicitly requested. */
6027
6028 if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6029 {
6030 /* If the pattern was studied, a minimum subject length may be set. This is
6031 a lower bound; no actual string of that length may actually match the
6032 pattern. Although the value is, strictly, in characters, we treat it as
6033 bytes to avoid spending too much time in this optimization. */
6034
6035 if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6036 (pcre_uint32)(end_subject - start_match) < study->minlength)
6037 {
6038 rc = MATCH_NOMATCH;
6039 break;
6040 }
6041
6042 /* If req_byte is set, we know that that character must appear in the
6043 subject for the match to succeed. If the first character is set, req_byte
6044 must be later in the subject; otherwise the test starts at the match point.
6045 This optimization can save a huge amount of backtracking in patterns with
6046 nested unlimited repeats that aren't going to match. Writing separate code
6047 for cased/caseless versions makes it go faster, as does using an
6048 autoincrement and backing off on a match.
6049
6050 HOWEVER: when the subject string is very, very long, searching to its end
6051 can take a long time, and give bad performance on quite ordinary patterns.
6052 This showed up when somebody was matching something like /^\d+C/ on a
6053 32-megabyte string... so we don't do this when the string is sufficiently
6054 long. */
6055
6056 if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
6057 {
6058 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
6059
6060 /* We don't need to repeat the search if we haven't yet reached the
6061 place we found it at last time. */
6062
6063 if (p > req_byte_ptr)
6064 {
6065 if (req_byte_caseless)
6066 {
6067 while (p < end_subject)
6068 {
6069 register int pp = *p++;
6070 if (pp == req_byte || pp == req_byte2) { p--; break; }
6071 }
6072 }
6073 else
6074 {
6075 while (p < end_subject)
6076 {
6077 if (*p++ == req_byte) { p--; break; }
6078 }
6079 }
6080
6081 /* If we can't find the required character, break the matching loop,
6082 forcing a match failure. */
6083
6084 if (p >= end_subject)
6085 {
6086 rc = MATCH_NOMATCH;
6087 break;
6088 }
6089
6090 /* If we have found the required character, save the point where we
6091 found it, so that we don't search again next time round the loop if
6092 the start hasn't passed this character yet. */
6093
6094 req_byte_ptr = p;
6095 }
6096 }
6097 }
6098
6099 #ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */
6100 printf(">>>> Match against: ");
6101 pchars(start_match, end_subject - start_match, TRUE, md);
6102 printf("\n");
6103 #endif
6104
6105 /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6106 first starting point for which a partial match was found. */
6107
6108 md->start_match_ptr = start_match;
6109 md->start_used_ptr = start_match;
6110 md->match_call_count = 0;
6111 rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
6112 0, 0);
6113 if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6114
6115 switch(rc)
6116 {
6117 /* SKIP passes back the next starting point explicitly, but if it is the
6118 same as the match we have just done, treat it as NOMATCH. */
6119
6120 case MATCH_SKIP:
6121 if (md->start_match_ptr != start_match)
6122 {
6123 new_start_match = md->start_match_ptr;
6124 break;
6125 }
6126 /* Fall through */
6127
6128 /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6129 the SKIP's arg was not found. We also treat this as NOMATCH. */
6130
6131 case MATCH_SKIP_ARG:
6132 /* Fall through */
6133
6134 /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6135 exactly like PRUNE. */
6136
6137 case MATCH_NOMATCH:
6138 case MATCH_PRUNE:
6139 case MATCH_THEN:
6140 new_start_match = start_match + 1;
6141 #ifdef SUPPORT_UTF8
6142 if (utf8)
6143 while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
6144 new_start_match++;
6145 #endif
6146 break;
6147
6148 /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6149
6150 case MATCH_COMMIT:
6151 rc = MATCH_NOMATCH;
6152 goto ENDLOOP;
6153
6154 /* Any other return is either a match, or some kind of error. */
6155
6156 default:
6157 goto ENDLOOP;
6158 }
6159
6160 /* Control reaches here for the various types of "no match at this point"
6161 result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6162
6163 rc = MATCH_NOMATCH;
6164
6165 /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6166 newline in the subject (though it may continue over the newline). Therefore,
6167 if we have just failed to match, starting at a newline, do not continue. */
6168
6169 if (firstline && IS_NEWLINE(start_match)) break;
6170
6171 /* Advance to new matching position */
6172
6173 start_match = new_start_match;
6174
6175 /* Break the loop if the pattern is anchored or if we have passed the end of
6176 the subject. */
6177
6178 if (anchored || start_match > end_subject) break;
6179
6180 /* If we have just passed a CR and we are now at a LF, and the pattern does
6181 not contain any explicit matches for \r or \n, and the newline option is CRLF
6182 or ANY or ANYCRLF, advance the match position by one more character. */
6183
6184 if (start_match[-1] == CHAR_CR &&
6185 start_match < end_subject &&
6186 *start_match == CHAR_NL &&
6187 (re->flags & PCRE_HASCRORLF) == 0 &&
6188 (md->nltype == NLTYPE_ANY ||
6189 md->nltype == NLTYPE_ANYCRLF ||
6190 md->nllen == 2))
6191 start_match++;
6192
6193 md->mark = NULL; /* Reset for start of next match attempt */
6194 } /* End of for(;;) "bumpalong" loop */
6195
6196 /* ==========================================================================*/
6197
6198 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
6199 conditions is true:
6200
6201 (1) The pattern is anchored or the match was failed by (*COMMIT);
6202
6203 (2) We are past the end of the subject;
6204
6205 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
6206 this option requests that a match occur at or before the first newline in
6207 the subject.
6208
6209 When we have a match and the offset vector is big enough to deal with any
6210 backreferences, captured substring offsets will already be set up. In the case
6211 where we had to get some local store to hold offsets for backreference
6212 processing, copy those that we can. In this case there need not be overflow if
6213 certain parts of the pattern were not used, even though there are more
6214 capturing parentheses than vector slots. */
6215
6216 ENDLOOP:
6217
6218 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
6219 {
6220 if (using_temporary_offsets)
6221 {
6222 if (offsetcount >= 4)
6223 {
6224 memcpy(offsets + 2, md->offset_vector + 2,
6225 (offsetcount - 2) * sizeof(int));
6226 DPRINTF(("Copied offsets from temporary memory\n"));
6227 }
6228 if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
6229 DPRINTF(("Freeing temporary memory\n"));
6230 (pcre_free)(md->offset_vector);
6231 }
6232
6233 /* Set the return code to the number of captured strings, or 0 if there are
6234 too many to fit into the vector. */
6235
6236 rc = md->offset_overflow? 0 : md->end_offset_top/2;
6237
6238 /* If there is space, set up the whole thing as substring 0. The value of
6239 md->start_match_ptr might be modified if \K was encountered on the success
6240 matching path. */
6241
6242 if (offsetcount < 2) rc = 0; else
6243 {
6244 offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6245 offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6246 }
6247
6248 DPRINTF((">>>> returning %d\n", rc));
6249 goto RETURN_MARK;
6250 }
6251
6252 /* Control gets here if there has been an error, or if the overall match
6253 attempt has failed at all permitted starting positions. */
6254
6255 if (using_temporary_offsets)
6256 {
6257 DPRINTF(("Freeing temporary memory\n"));
6258 (pcre_free)(md->offset_vector);
6259 }
6260
6261 /* For anything other than nomatch or partial match, just return the code. */
6262
6263 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
6264 {
6265 DPRINTF((">>>> error: returning %d\n", rc));
6266 return rc;
6267 }
6268
6269 /* Handle partial matches - disable any mark data */
6270
6271 if (start_partial != NULL)
6272 {
6273 DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
6274 md->mark = NULL;
6275 if (offsetcount > 1)
6276 {
6277 offsets[0] = (int)(start_partial - (USPTR)subject);
6278 offsets[1] = (int)(end_subject - (USPTR)subject);
6279 }
6280 rc = PCRE_ERROR_PARTIAL;
6281 }
6282
6283 /* This is the classic nomatch case */
6284
6285 else
6286 {
6287 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
6288 rc = PCRE_ERROR_NOMATCH;
6289 }
6290
6291 /* Return the MARK data if it has been requested. */
6292
6293 RETURN_MARK:
6294
6295 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6296 *(extra_data->mark) = (unsigned char *)(md->mark);
6297 return rc;
6298 }
6299
6300 /* End of pcre_exec.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5