/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

<
revision 115 by ph10, Fri Mar 9 12:23:37 2007 UTC revision 510 by ph10, Sat Mar 27 17:45:29 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55  /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  /* Undefine some potentially clashing cpp symbols */
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
56    
57  #define EPTR_WORK_SIZE (1000)  #undef min
58    #undef max
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 65  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_SKIP_ARG     (-996)
78    #define MATCH_THEN         (-995)
79    
80    /* This is a convenience macro for code that occurs many times. */
81    
82    #define MRRETURN(ra) \
83      { \
84      md->mark = markptr; \
85      RRETURN(ra); \
86      }
87    
88  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
89  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
90  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 78  static const char rep_max[] = { 0, 0, 0, Line 98  static const char rep_max[] = { 0, 0, 0,
98    
99    
100    
101  #ifdef DEBUG  #ifdef PCRE_DEBUG
102  /*************************************************  /*************************************************
103  *        Debugging function to print chars       *  *        Debugging function to print chars       *
104  *************************************************/  *************************************************/
# Line 130  match_ref(int offset, register USPTR ept Line 150  match_ref(int offset, register USPTR ept
150  {  {
151  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
152    
153  #ifdef DEBUG  #ifdef PCRE_DEBUG
154  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
155    printf("matching subject <null>");    printf("matching subject <null>");
156  else  else
# Line 147  printf("\n"); Line 167  printf("\n");
167    
168  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
169    
170  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
171    properly if Unicode properties are supported. Otherwise, we can check only
172    ASCII characters. */
173    
174  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
175    {    {
176    #ifdef SUPPORT_UTF8
177    #ifdef SUPPORT_UCP
178      if (md->utf8)
179        {
180        USPTR endptr = eptr + length;
181        while (eptr < endptr)
182          {
183          int c, d;
184          GETCHARINC(c, eptr);
185          GETCHARINC(d, p);
186          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
187          }
188        }
189      else
190    #endif
191    #endif
192    
193      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
194      is no UCP support. */
195    
196    while (length-- > 0)    while (length-- > 0)
197      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
198    }    }
199    
200    /* In the caseful case, we can just compare the bytes, whether or not we
201    are in UTF-8 mode. */
202    
203  else  else
204    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
205    
# Line 183  calls by keeping local variables that ne Line 229  calls by keeping local variables that ne
229  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
230  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
231  always used to.  always used to.
232    
233    The original heap-recursive code used longjmp(). However, it seems that this
234    can be very slow on some operating systems. Following a suggestion from Stan
235    Switzer, the use of longjmp() has been abolished, at the cost of having to
236    provide a unique number for each call to RMATCH. There is no way of generating
237    a sequence of numbers at compile time in C. I have given them names, to make
238    them stand out more clearly.
239    
240    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
241    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
242    tests. Furthermore, not using longjmp() means that local dynamic variables
243    don't have indeterminate values; this has meant that the frame size can be
244    reduced because the result can be "passed back" by straight setting of the
245    variable instead of being passed in the frame.
246  ****************************************************************************  ****************************************************************************
247  ***************************************************************************/  ***************************************************************************/
248    
249    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
250    below must be updated in sync.  */
251    
252    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
253           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
254           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
255           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
256           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
257           RM51,  RM52, RM53, RM54 };
258    
259  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
260  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
261    actually used in this definition. */
262    
263  #ifndef NO_RECURSE  #ifndef NO_RECURSE
264  #define REGISTER register  #define REGISTER register
265  #ifdef DEBUG  
266  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #ifdef PCRE_DEBUG
267    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
268    { \    { \
269    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
270    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
271    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
272    }    }
273  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 205  versions and production versions. */ Line 276  versions and production versions. */
276    return ra; \    return ra; \
277    }    }
278  #else  #else
279  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
280    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
281  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
282  #endif  #endif
283    
284  #else  #else
285    
286    
287  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
288  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
289  match(), which never changes. */  argument of match(), which never changes. */
290    
291  #define REGISTER  #define REGISTER
292    
293  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
294    {\    {\
295    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
296    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
297      {\    newframe->Xeptr = ra;\
298      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
299      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
300      newframe->Xoffset_top = rc;\    newframe->Xmarkptr = markptr;\
301      newframe->Xims = re;\    newframe->Xoffset_top = rc;\
302      newframe->Xeptrb = rf;\    newframe->Xims = re;\
303      newframe->Xflags = rg;\    newframe->Xeptrb = rf;\
304      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xflags = rg;\
305      newframe->Xprevframe = frame;\    newframe->Xrdepth = frame->Xrdepth + 1;\
306      frame = newframe;\    newframe->Xprevframe = frame;\
307      DPRINTF(("restarting from line %d\n", __LINE__));\    frame = newframe;\
308      goto HEAP_RECURSE;\    DPRINTF(("restarting from line %d\n", __LINE__));\
309      }\    goto HEAP_RECURSE;\
310    else\    L_##rw:\
311      {\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
312    }    }
313    
314  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 251  match(), which never changes. */ Line 318  match(), which never changes. */
318    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
319    if (frame != NULL)\    if (frame != NULL)\
320      {\      {\
321      frame->Xresult = ra;\      rrc = ra;\
322      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
323      }\      }\
324    return ra;\    return ra;\
325    }    }
# Line 266  typedef struct heapframe { Line 332  typedef struct heapframe {
332    
333    /* Function arguments that may change */    /* Function arguments that may change */
334    
335    const uschar *Xeptr;    USPTR Xeptr;
336    const uschar *Xecode;    const uschar *Xecode;
337      USPTR Xmstart;
338      USPTR Xmarkptr;
339    int Xoffset_top;    int Xoffset_top;
340    long int Xims;    long int Xims;
341    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 276  typedef struct heapframe { Line 344  typedef struct heapframe {
344    
345    /* Function local variables */    /* Function local variables */
346    
347    const uschar *Xcallpat;    USPTR Xcallpat;
348    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
349    const uschar *Xdata;    USPTR Xcharptr;
350    const uschar *Xnext;  #endif
351    const uschar *Xpp;    USPTR Xdata;
352    const uschar *Xprev;    USPTR Xnext;
353    const uschar *Xsaved_eptr;    USPTR Xpp;
354      USPTR Xprev;
355      USPTR Xsaved_eptr;
356    
357    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
358    
# Line 299  typedef struct heapframe { Line 369  typedef struct heapframe {
369    int Xprop_category;    int Xprop_category;
370    int Xprop_chartype;    int Xprop_chartype;
371    int Xprop_script;    int Xprop_script;
372    int Xoclength;    int Xoclength;
373    uschar Xocchars[8];    uschar Xocchars[8];
374  #endif  #endif
375    
376      int Xcodelink;
377    int Xctype;    int Xctype;
378    unsigned int Xfc;    unsigned int Xfc;
379    int Xfi;    int Xfi;
# Line 318  typedef struct heapframe { Line 389  typedef struct heapframe {
389    
390    eptrblock Xnewptrb;    eptrblock Xnewptrb;
391    
392    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
393    
394    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
395    
396  } heapframe;  } heapframe;
397    
# Line 339  typedef struct heapframe { Line 409  typedef struct heapframe {
409    
410  /* This function is called recursively in many circumstances. Whenever it  /* This function is called recursively in many circumstances. Whenever it
411  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
412  same response.  same response. */
413    
414  Performance note: It might be tempting to extract commonly used fields from the  /* These macros pack up tests that are used for partial matching, and which
415  md structure (e.g. utf8, end_subject) into individual variables to improve  appears several times in the code. We set the "hit end" flag if the pointer is
416    at the end of the subject and also past the start of the subject (i.e.
417    something has been matched). For hard partial matching, we then return
418    immediately. The second one is used when we already know we are past the end of
419    the subject. */
420    
421    #define CHECK_PARTIAL()\
422      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
423        {\
424        md->hitend = TRUE;\
425        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
426        }
427    
428    #define SCHECK_PARTIAL()\
429      if (md->partial != 0 && eptr > mstart)\
430        {\
431        md->hitend = TRUE;\
432        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
433        }
434    
435    
436    /* Performance note: It might be tempting to extract commonly used fields from
437    the md structure (e.g. utf8, end_subject) into individual variables to improve
438  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
439  made performance worse.  made performance worse.
440    
441  Arguments:  Arguments:
442     eptr        pointer to current character in subject     eptr        pointer to current character in subject
443     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
444       mstart      pointer to the current match start position (can be modified
445                     by encountering \K)
446       markptr     pointer to the most recent MARK name, or NULL
447     offset_top  current top pointer     offset_top  current top pointer
448     md          pointer to "static" info for the match     md          pointer to "static" info for the match
449     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 358  Arguments: Line 453  Arguments:
453                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
454                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
455                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
456     rdepth      the recursion depth     rdepth      the recursion depth
457    
458  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
459                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
460                   a negative MATCH_xxx value for PRUNE, SKIP, etc
461                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
462                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
463  */  */
464    
465  static int  static int
466  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
467    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
468    int flags, unsigned int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
469  {  {
470  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
471  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 382  register unsigned int c;   /* Character Line 477  register unsigned int c;   /* Character
477  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
478    
479  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
480    int condcode;
481    
482  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
483  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 396  frame->Xprevframe = NULL;            /* Line 492  frame->Xprevframe = NULL;            /*
492    
493  frame->Xeptr = eptr;  frame->Xeptr = eptr;
494  frame->Xecode = ecode;  frame->Xecode = ecode;
495    frame->Xmstart = mstart;
496    frame->Xmarkptr = markptr;
497  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
498  frame->Xims = ims;  frame->Xims = ims;
499  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 410  HEAP_RECURSE: Line 508  HEAP_RECURSE:
508    
509  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
510  #define ecode              frame->Xecode  #define ecode              frame->Xecode
511    #define mstart             frame->Xmstart
512    #define markptr            frame->Xmarkptr
513  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
514  #define ims                frame->Xims  #define ims                frame->Xims
515  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 422  HEAP_RECURSE: Line 522  HEAP_RECURSE:
522  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
523  #endif  #endif
524  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
525    #define codelink           frame->Xcodelink
526  #define data               frame->Xdata  #define data               frame->Xdata
527  #define next               frame->Xnext  #define next               frame->Xnext
528  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 502  int oclength; Line 603  int oclength;
603  uschar occhars[8];  uschar occhars[8];
604  #endif  #endif
605    
606    int codelink;
607  int ctype;  int ctype;
608  int length;  int length;
609  int max;  int max;
# Line 535  TAIL_RECURSE: Line 637  TAIL_RECURSE:
637  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
638  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
639  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
640  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
641  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
642  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
643  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
644    
645    #ifdef SUPPORT_UTF8
646    utf8 = md->utf8;       /* Local copy of the flag */
647    #else
648    utf8 = FALSE;
649    #endif
650    
651  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
652  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
653    
# Line 548  if (rdepth >= md->match_limit_recursion) Line 656  if (rdepth >= md->match_limit_recursion)
656    
657  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
658    
 #ifdef SUPPORT_UTF8  
 utf8 = md->utf8;       /* Local copy of the flag */  
 #else  
 utf8 = FALSE;  
 #endif  
   
659  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
660  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
661  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
662  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
663  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
664  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
665  already used. */  block that is used is on the stack, so a new one may be required for each
666    match(). */
667    
668  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
669    {    {
670    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
671    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
672      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
673    }    }
674    
675  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 583  for (;;) Line 679  for (;;)
679    minimize = possessive = FALSE;    minimize = possessive = FALSE;
680    op = *ecode;    op = *ecode;
681    
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
682    switch(op)    switch(op)
683      {      {
684        case OP_MARK:
685        markptr = ecode + 2;
686        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
687          ims, eptrb, flags, RM51);
688    
689        /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
690        argument, and we must check whether that argument matches this MARK's
691        argument. It is passed back in md->start_match_ptr (an overloading of that
692        variable). If it does match, we reset that variable to the current subject
693        position and return MATCH_SKIP. Otherwise, pass back the return code
694        unaltered. */
695    
696        if (rrc == MATCH_SKIP_ARG &&
697            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
698          {
699          md->start_match_ptr = eptr;
700          RRETURN(MATCH_SKIP);
701          }
702    
703        if (md->mark == NULL) md->mark = markptr;
704        RRETURN(rrc);
705    
706        case OP_FAIL:
707        MRRETURN(MATCH_NOMATCH);
708    
709        case OP_COMMIT:
710        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
711          ims, eptrb, flags, RM52);
712        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
713        MRRETURN(MATCH_COMMIT);
714    
715        case OP_PRUNE:
716        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
717          ims, eptrb, flags, RM51);
718        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
719        MRRETURN(MATCH_PRUNE);
720    
721        case OP_PRUNE_ARG:
722        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
723          ims, eptrb, flags, RM51);
724        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
725        md->mark = ecode + 2;
726        RRETURN(MATCH_PRUNE);
727    
728        case OP_SKIP:
729        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
730          ims, eptrb, flags, RM53);
731        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
732        md->start_match_ptr = eptr;   /* Pass back current position */
733        MRRETURN(MATCH_SKIP);
734    
735        case OP_SKIP_ARG:
736        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
737          ims, eptrb, flags, RM53);
738        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
739    
740        /* Pass back the current skip name by overloading md->start_match_ptr and
741        returning the special MATCH_SKIP_ARG return code. This will either be
742        caught by a matching MARK, or get to the top, where it is treated the same
743        as PRUNE. */
744    
745        md->start_match_ptr = ecode + 2;
746        RRETURN(MATCH_SKIP_ARG);
747    
748        case OP_THEN:
749        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
750          ims, eptrb, flags, RM54);
751        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
752        MRRETURN(MATCH_THEN);
753    
754        case OP_THEN_ARG:
755        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
756          ims, eptrb, flags, RM54);
757        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
758        md->mark = ecode + 2;
759        RRETURN(MATCH_THEN);
760    
761      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
762      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
763      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 612  for (;;) Line 777  for (;;)
777      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
778      offset = number << 1;      offset = number << 1;
779    
780  #ifdef DEBUG  #ifdef PCRE_DEBUG
781      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
782      printf("subject=");      printf("subject=");
783      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
# Line 632  for (;;) Line 797  for (;;)
797        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
798        do        do
799          {          {
800          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
801            ims, eptrb, flags);            ims, eptrb, flags, RM1);
802          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
803          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
804          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
805          }          }
# Line 646  for (;;) Line 811  for (;;)
811        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
812        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
813    
814          if (rrc != MATCH_THEN) md->mark = markptr;
815        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
816        }        }
817    
818      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
819      bracket. */      as a non-capturing bracket. */
820    
821        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
822        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
823    
824      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
825    
826        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
827        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
828    
829      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
830      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
831      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
832      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
833        is set.*/
834    
835      case OP_BRA:      case OP_BRA:
836      case OP_SBRA:      case OP_SBRA:
# Line 665  for (;;) Line 838  for (;;)
838      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
839      for (;;)      for (;;)
840        {        {
841        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
842          {          {
843          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
844          flags |= match_tail_recursed;            {
845          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
846          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
847              goto TAIL_RECURSE;
848              }
849    
850            /* Possibly empty group; can't use tail recursion. */
851    
852            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
853              eptrb, flags, RM48);
854            if (rrc == MATCH_NOMATCH) md->mark = markptr;
855            RRETURN(rrc);
856          }          }
857    
858        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
859        otherwise return. */        otherwise return. */
860    
861        RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
862          eptrb, flags);          eptrb, flags, RM2);
863        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
864        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
865        }        }
866      /* Control never reaches here. */      /* Control never reaches here. */
# Line 691  for (;;) Line 873  for (;;)
873    
874      case OP_COND:      case OP_COND:
875      case OP_SCOND:      case OP_SCOND:
876      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
877    
878        /* Because of the way auto-callout works during compile, a callout item is
879        inserted between OP_COND and an assertion condition. */
880    
881        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
882        {        {
883        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        if (pcre_callout != NULL)
884        condition = md->recursive != NULL &&          {
885          (offset == RREF_ANY || offset == md->recursive->group_num);          pcre_callout_block cb;
886        ecode += condition? 3 : GET(ecode, 1);          cb.version          = 1;   /* Version 1 of the callout block */
887            cb.callout_number   = ecode[LINK_SIZE+2];
888            cb.offset_vector    = md->offset_vector;
889            cb.subject          = (PCRE_SPTR)md->start_subject;
890            cb.subject_length   = md->end_subject - md->start_subject;
891            cb.start_match      = mstart - md->start_subject;
892            cb.current_position = eptr - md->start_subject;
893            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
894            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
895            cb.capture_top      = offset_top/2;
896            cb.capture_last     = md->capture_last;
897            cb.callout_data     = md->callout_data;
898            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
899            if (rrc < 0) RRETURN(rrc);
900            }
901          ecode += _pcre_OP_lengths[OP_CALLOUT];
902          }
903    
904        condcode = ecode[LINK_SIZE+1];
905    
906        /* Now see what the actual condition is */
907    
908        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
909          {
910          if (md->recursive == NULL)                /* Not recursing => FALSE */
911            {
912            condition = FALSE;
913            ecode += GET(ecode, 1);
914            }
915          else
916            {
917            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
918            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
919    
920            /* If the test is for recursion into a specific subpattern, and it is
921            false, but the test was set up by name, scan the table to see if the
922            name refers to any other numbers, and test them. The condition is true
923            if any one is set. */
924    
925            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
926              {
927              uschar *slotA = md->name_table;
928              for (i = 0; i < md->name_count; i++)
929                {
930                if (GET2(slotA, 0) == recno) break;
931                slotA += md->name_entry_size;
932                }
933    
934              /* Found a name for the number - there can be only one; duplicate
935              names for different numbers are allowed, but not vice versa. First
936              scan down for duplicates. */
937    
938              if (i < md->name_count)
939                {
940                uschar *slotB = slotA;
941                while (slotB > md->name_table)
942                  {
943                  slotB -= md->name_entry_size;
944                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
945                    {
946                    condition = GET2(slotB, 0) == md->recursive->group_num;
947                    if (condition) break;
948                    }
949                  else break;
950                  }
951    
952                /* Scan up for duplicates */
953    
954                if (!condition)
955                  {
956                  slotB = slotA;
957                  for (i++; i < md->name_count; i++)
958                    {
959                    slotB += md->name_entry_size;
960                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
961                      {
962                      condition = GET2(slotB, 0) == md->recursive->group_num;
963                      if (condition) break;
964                      }
965                    else break;
966                    }
967                  }
968                }
969              }
970    
971            /* Chose branch according to the condition */
972    
973            ecode += condition? 3 : GET(ecode, 1);
974            }
975        }        }
976    
977      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
978        {        {
979        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
980        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
981    
982          /* If the numbered capture is unset, but the reference was by name,
983          scan the table to see if the name refers to any other numbers, and test
984          them. The condition is true if any one is set. This is tediously similar
985          to the code above, but not close enough to try to amalgamate. */
986    
987          if (!condition && condcode == OP_NCREF)
988            {
989            int refno = offset >> 1;
990            uschar *slotA = md->name_table;
991    
992            for (i = 0; i < md->name_count; i++)
993              {
994              if (GET2(slotA, 0) == refno) break;
995              slotA += md->name_entry_size;
996              }
997    
998            /* Found a name for the number - there can be only one; duplicate names
999            for different numbers are allowed, but not vice versa. First scan down
1000            for duplicates. */
1001    
1002            if (i < md->name_count)
1003              {
1004              uschar *slotB = slotA;
1005              while (slotB > md->name_table)
1006                {
1007                slotB -= md->name_entry_size;
1008                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1009                  {
1010                  offset = GET2(slotB, 0) << 1;
1011                  condition = offset < offset_top &&
1012                    md->offset_vector[offset] >= 0;
1013                  if (condition) break;
1014                  }
1015                else break;
1016                }
1017    
1018              /* Scan up for duplicates */
1019    
1020              if (!condition)
1021                {
1022                slotB = slotA;
1023                for (i++; i < md->name_count; i++)
1024                  {
1025                  slotB += md->name_entry_size;
1026                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1027                    {
1028                    offset = GET2(slotB, 0) << 1;
1029                    condition = offset < offset_top &&
1030                      md->offset_vector[offset] >= 0;
1031                    if (condition) break;
1032                    }
1033                  else break;
1034                  }
1035                }
1036              }
1037            }
1038    
1039          /* Chose branch according to the condition */
1040    
1041        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
1042        }        }
1043    
1044      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
1045        {        {
1046        condition = FALSE;        condition = FALSE;
1047        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 718  for (;;) Line 1053  for (;;)
1053    
1054      else      else
1055        {        {
1056        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1057            match_condassert);            match_condassert, RM3);
1058        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1059          {          {
1060          condition = TRUE;          condition = TRUE;
1061          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1062          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1063          }          }
1064        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1065          {          {
1066          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1067          }          }
1068        else        else
1069          {          {
1070          condition = FALSE;          condition = FALSE;
1071          ecode += GET(ecode, 1);          ecode += codelink;
1072          }          }
1073        }        }
1074    
1075      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1076      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
1077      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
1078        group. If the second alternative doesn't exist, we can just plough on. */
1079    
1080      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1081        {        {
1082        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1083        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
1084        goto TAIL_RECURSE;          {
1085            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1086            RRETURN(rrc);
1087            }
1088          else                       /* Group must match something */
1089            {
1090            flags = 0;
1091            goto TAIL_RECURSE;
1092            }
1093        }        }
1094      else      else                         /* Condition false & no alternative */
1095        {        {
1096        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1097        }        }
1098      break;      break;
1099    
1100    
1101      /* End of the pattern. If we are in a top-level recursion, we should      /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1102      restore the offsets appropriately and continue from after the call. */      to close any currently open capturing brackets. */
1103    
1104        case OP_CLOSE:
1105        number = GET2(ecode, 1);
1106        offset = number << 1;
1107    
1108    #ifdef PCRE_DEBUG
1109          printf("end bracket %d at *ACCEPT", number);
1110          printf("\n");
1111    #endif
1112    
1113        md->capture_last = number;
1114        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1115          {
1116          md->offset_vector[offset] =
1117            md->offset_vector[md->offset_end - number];
1118          md->offset_vector[offset+1] = eptr - md->start_subject;
1119          if (offset_top <= offset) offset_top = offset + 2;
1120          }
1121        ecode += 3;
1122        break;
1123    
1124    
1125        /* End of the pattern, either real or forced. If we are in a top-level
1126        recursion, we should restore the offsets appropriately and continue from
1127        after the call. */
1128    
1129        case OP_ACCEPT:
1130      case OP_END:      case OP_END:
1131      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1132        {        {
# Line 765  for (;;) Line 1135  for (;;)
1135        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1136        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1137          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1138        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1139        ims = original_ims;        ims = original_ims;
1140        ecode = rec->after_call;        ecode = rec->after_call;
1141        break;        break;
1142        }        }
1143    
1144      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1145      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1146        the subject. In both cases, backtracking will then try other alternatives,
1147      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if any. */
1148      md->end_match_ptr = eptr;          /* Record where we ended */  
1149      md->end_offset_top = offset_top;   /* and how many extracts were taken */      if (eptr == mstart &&
1150      RRETURN(MATCH_MATCH);          (md->notempty ||
1151              (md->notempty_atstart &&
1152                mstart == md->start_subject + md->start_offset)))
1153          MRRETURN(MATCH_NOMATCH);
1154    
1155        /* Otherwise, we have a match. */
1156    
1157        md->end_match_ptr = eptr;           /* Record where we ended */
1158        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1159        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1160        MRRETURN(MATCH_MATCH);
1161    
1162      /* Change option settings */      /* Change option settings */
1163    
# Line 797  for (;;) Line 1177  for (;;)
1177      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1178      do      do
1179        {        {
1180        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1181        if (rrc == MATCH_MATCH) break;          RM4);
1182        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_MATCH)
1183            {
1184            mstart = md->start_match_ptr;   /* In case \K reset it */
1185            break;
1186            }
1187          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1188        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1189        }        }
1190      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1191      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1192    
1193      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1194    
# Line 817  for (;;) Line 1202  for (;;)
1202      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1203      continue;      continue;
1204    
1205      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1206        PRUNE, or COMMIT means we must assume failure without checking subsequent
1207        branches. */
1208    
1209      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1210      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1211      do      do
1212        {        {
1213        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1214        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);          RM5);
1215        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_MATCH) MRRETURN(MATCH_NOMATCH);
1216          if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1217            {
1218            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1219            break;
1220            }
1221          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1222        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1223        }        }
1224      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 848  for (;;) Line 1241  for (;;)
1241        while (i-- > 0)        while (i-- > 0)
1242          {          {
1243          eptr--;          eptr--;
1244          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1245          BACKCHAR(eptr)          BACKCHAR(eptr);
1246          }          }
1247        }        }
1248      else      else
# Line 859  for (;;) Line 1252  for (;;)
1252    
1253        {        {
1254        eptr -= GET(ecode, 1);        eptr -= GET(ecode, 1);
1255        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1256        }        }
1257    
1258      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1259    
1260        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1261      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1262      break;      break;
1263    
# Line 880  for (;;) Line 1274  for (;;)
1274        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1275        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1276        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
1277        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
1278        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
1279        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1280        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1281        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1282        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1283        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1284        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1285        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1286        }        }
1287      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 942  for (;;) Line 1336  for (;;)
1336    
1337        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1338              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1339        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1340    
1341        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1342        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 952  for (;;) Line 1345  for (;;)
1345        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1346        do        do
1347          {          {
1348          RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1349            md, ims, eptrb, flags);            md, ims, eptrb, flags, RM6);
1350          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
1351            {            {
1352            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1353            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1354            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1355              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1356            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1357            }            }
1358          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1359            {            {
1360            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1361              if (new_recursive.offset_save != stacksave)
1362                (pcre_free)(new_recursive.offset_save);
1363            RRETURN(rrc);            RRETURN(rrc);
1364            }            }
1365    
# Line 979  for (;;) Line 1374  for (;;)
1374        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1375        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1376          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1377        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1378        }        }
1379      /* Control never reaches here */      /* Control never reaches here */
1380    
# Line 988  for (;;) Line 1383  for (;;)
1383      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1384      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1385      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1386      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1387        the start-of-match value in case it was changed by \K. */
1388    
1389      case OP_ONCE:      case OP_ONCE:
1390      prev = ecode;      prev = ecode;
# Line 996  for (;;) Line 1392  for (;;)
1392    
1393      do      do
1394        {        {
1395        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1396          eptrb, 0);        if (rrc == MATCH_MATCH)
1397        if (rrc == MATCH_MATCH) break;          {
1398        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          mstart = md->start_match_ptr;
1399            break;
1400            }
1401          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1402        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1403        }        }
1404      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1042  for (;;) Line 1441  for (;;)
1441    
1442      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1443        {        {
1444        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1445        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1446        ecode = prev;        ecode = prev;
1447        flags = match_tail_recursed;        flags = 0;
1448        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1449        }        }
1450      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1451        {        {
1452        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1453        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1454        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1455        flags = match_tail_recursed;        flags = 0;
1456        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1457        }        }
1458      /* Control never gets here */      /* Control never gets here */
# Line 1065  for (;;) Line 1464  for (;;)
1464      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1465      break;      break;
1466    
1467      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1468      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1469      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1470      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1471      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1472    
1473      case OP_BRAZERO:      case OP_BRAZERO:
1474        {        {
1475        next = ecode+1;        next = ecode+1;
1476        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1477        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1478        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1479        ecode = next + 1 + LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
# Line 1085  for (;;) Line 1484  for (;;)
1484        {        {
1485        next = ecode+1;        next = ecode+1;
1486        do next += GET(next, 1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1487        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1488        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1489        ecode++;        ecode++;
1490        }        }
1491      break;      break;
1492    
1493        case OP_SKIPZERO:
1494          {
1495          next = ecode+1;
1496          do next += GET(next,1); while (*next == OP_ALT);
1497          ecode = next + 1 + LINK_SIZE;
1498          }
1499        break;
1500    
1501      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1502    
1503      case OP_KET:      case OP_KET:
# Line 1109  for (;;) Line 1516  for (;;)
1516        }        }
1517      else saved_eptr = NULL;      else saved_eptr = NULL;
1518    
1519      /* If we are at the end of an assertion group, stop matching and return      /* If we are at the end of an assertion group or an atomic group, stop
1520      MATCH_MATCH, but record the current high water mark for use by positive      matching and return MATCH_MATCH, but record the current high water mark for
1521      assertions. Do this also for the "once" (atomic) groups. */      use by positive assertions. We also need to record the match start in case
1522        it was changed by \K. */
1523    
1524      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1525          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1119  for (;;) Line 1527  for (;;)
1527        {        {
1528        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1529        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1530        RRETURN(MATCH_MATCH);        md->start_match_ptr = mstart;
1531          MRRETURN(MATCH_MATCH);
1532        }        }
1533    
1534      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
# Line 1133  for (;;) Line 1542  for (;;)
1542        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1543        offset = number << 1;        offset = number << 1;
1544    
1545  #ifdef DEBUG  #ifdef PCRE_DEBUG
1546        printf("end bracket %d", number);        printf("end bracket %d", number);
1547        printf("\n");        printf("\n");
1548  #endif  #endif
# Line 1155  for (;;) Line 1564  for (;;)
1564          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1565          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1566          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
         md->start_match = rec->save_start;  
1567          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1568            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1569            offset_top = rec->save_offset_top;
1570          ecode = rec->after_call;          ecode = rec->after_call;
1571          ims = original_ims;          ims = original_ims;
1572          break;          break;
# Line 1184  for (;;) Line 1593  for (;;)
1593    
1594      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1595      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1596      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1597        unlimited repeat of a group that can match an empty string. */
1598    
1599      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1600    
1601      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1602        {        {
1603        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1604        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1605          if (flags != 0)    /* Could match an empty string */
1606            {
1607            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1608            RRETURN(rrc);
1609            }
1610        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1611        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1612        }        }
1613      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1614        {        {
1615        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1616        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1617        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1618        flags = match_tail_recursed;        flags = 0;
1619        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1620        }        }
1621      /* Control never gets here */      /* Control never gets here */
# Line 1209  for (;;) Line 1623  for (;;)
1623      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1624    
1625      case OP_CIRC:      case OP_CIRC:
1626      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1627      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1628        {        {
1629        if (eptr != md->start_subject &&        if (eptr != md->start_subject &&
1630            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1631          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1632        ecode++;        ecode++;
1633        break;        break;
1634        }        }
# Line 1223  for (;;) Line 1637  for (;;)
1637      /* Start of subject assertion */      /* Start of subject assertion */
1638    
1639      case OP_SOD:      case OP_SOD:
1640      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1641      ecode++;      ecode++;
1642      break;      break;
1643    
1644      /* Start of match assertion */      /* Start of match assertion */
1645    
1646      case OP_SOM:      case OP_SOM:
1647      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1648        ecode++;
1649        break;
1650    
1651        /* Reset the start of match point */
1652    
1653        case OP_SET_SOM:
1654        mstart = eptr;
1655      ecode++;      ecode++;
1656      break;      break;
1657    
# Line 1241  for (;;) Line 1662  for (;;)
1662      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1663        {        {
1664        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1665          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1666        else        else
1667          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1668        ecode++;        ecode++;
1669        break;        break;
1670        }        }
1671      else      else
1672        {        {
1673        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1674        if (!md->endonly)        if (!md->endonly)
1675          {          {
1676          if (eptr != md->end_subject &&          if (eptr != md->end_subject &&
1677              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1678            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
1679          ecode++;          ecode++;
1680          break;          break;
1681          }          }
# Line 1264  for (;;) Line 1685  for (;;)
1685      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1686    
1687      case OP_EOD:      case OP_EOD:
1688      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1689      ecode++;      ecode++;
1690      break;      break;
1691    
# Line 1273  for (;;) Line 1694  for (;;)
1694      case OP_EODN:      case OP_EODN:
1695      if (eptr != md->end_subject &&      if (eptr != md->end_subject &&
1696          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1697        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1698      ecode++;      ecode++;
1699      break;      break;
1700    
# Line 1285  for (;;) Line 1706  for (;;)
1706    
1707        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1708        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1709        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1710          partial matching. */
1711    
1712  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1713        if (utf8)        if (utf8)
1714          {          {
1715          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1716            {            {
1717            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1718            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1719              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1720            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1721            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1722            }            }
1723          if (eptr >= md->end_subject) cur_is_word = FALSE; else          if (eptr >= md->end_subject)
1724              {
1725              SCHECK_PARTIAL();
1726              cur_is_word = FALSE;
1727              }
1728            else
1729            {            {
1730            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1731            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1306  for (;;) Line 1734  for (;;)
1734        else        else
1735  #endif  #endif
1736    
1737        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode */
1738    
1739          {          {
1740          prev_is_word = (eptr != md->start_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1741            ((md->ctypes[eptr[-1]] & ctype_word) != 0);            {
1742          cur_is_word = (eptr < md->end_subject) &&            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1743            ((md->ctypes[*eptr] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1744              }
1745            if (eptr >= md->end_subject)
1746              {
1747              SCHECK_PARTIAL();
1748              cur_is_word = FALSE;
1749              }
1750            else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1751          }          }
1752    
1753        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
1754    
1755        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
1756             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1757          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1758        }        }
1759      break;      break;
1760    
1761      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1762    
1763      case OP_ANY:      case OP_ANY:
1764      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1765        /* Fall through */
1766    
1767        case OP_ALLANY:
1768        if (eptr++ >= md->end_subject)
1769        {        {
1770        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);        SCHECK_PARTIAL();
1771          MRRETURN(MATCH_NOMATCH);
1772        }        }
1773      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1774      ecode++;      ecode++;
1775      break;      break;
1776    
# Line 1340  for (;;) Line 1778  for (;;)
1778      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1779    
1780      case OP_ANYBYTE:      case OP_ANYBYTE:
1781      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1782          {
1783          SCHECK_PARTIAL();
1784          MRRETURN(MATCH_NOMATCH);
1785          }
1786      ecode++;      ecode++;
1787      break;      break;
1788    
1789      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1790      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1791          {
1792          SCHECK_PARTIAL();
1793          MRRETURN(MATCH_NOMATCH);
1794          }
1795      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1796      if (      if (
1797  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1353  for (;;) Line 1799  for (;;)
1799  #endif  #endif
1800         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1801         )         )
1802        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1803      ecode++;      ecode++;
1804      break;      break;
1805    
1806      case OP_DIGIT:      case OP_DIGIT:
1807      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1808          {
1809          SCHECK_PARTIAL();
1810          MRRETURN(MATCH_NOMATCH);
1811          }
1812      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1813      if (      if (
1814  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1366  for (;;) Line 1816  for (;;)
1816  #endif  #endif
1817         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1818         )         )
1819        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1820      ecode++;      ecode++;
1821      break;      break;
1822    
1823      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1824      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1825          {
1826          SCHECK_PARTIAL();
1827          MRRETURN(MATCH_NOMATCH);
1828          }
1829      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1830      if (      if (
1831  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1379  for (;;) Line 1833  for (;;)
1833  #endif  #endif
1834         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1835         )         )
1836        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1837      ecode++;      ecode++;
1838      break;      break;
1839    
1840      case OP_WHITESPACE:      case OP_WHITESPACE:
1841      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1842          {
1843          SCHECK_PARTIAL();
1844          MRRETURN(MATCH_NOMATCH);
1845          }
1846      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1847      if (      if (
1848  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1392  for (;;) Line 1850  for (;;)
1850  #endif  #endif
1851         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1852         )         )
1853        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1854      ecode++;      ecode++;
1855      break;      break;
1856    
1857      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1858      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1859          {
1860          SCHECK_PARTIAL();
1861          MRRETURN(MATCH_NOMATCH);
1862          }
1863      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1864      if (      if (
1865  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1405  for (;;) Line 1867  for (;;)
1867  #endif  #endif
1868         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1869         )         )
1870        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1871      ecode++;      ecode++;
1872      break;      break;
1873    
1874      case OP_WORDCHAR:      case OP_WORDCHAR:
1875      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1876          {
1877          SCHECK_PARTIAL();
1878          MRRETURN(MATCH_NOMATCH);
1879          }
1880      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1881      if (      if (
1882  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1418  for (;;) Line 1884  for (;;)
1884  #endif  #endif
1885         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1886         )         )
1887        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1888      ecode++;      ecode++;
1889      break;      break;
1890    
1891      case OP_ANYNL:      case OP_ANYNL:
1892      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1893          {
1894          SCHECK_PARTIAL();
1895          MRRETURN(MATCH_NOMATCH);
1896          }
1897      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1898      switch(c)      switch(c)
1899        {        {
1900        default: RRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
1901        case 0x000d:        case 0x000d:
1902        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1903        break;        break;
1904    
1905        case 0x000a:        case 0x000a:
1906          break;
1907    
1908        case 0x000b:        case 0x000b:
1909        case 0x000c:        case 0x000c:
1910        case 0x0085:        case 0x0085:
1911        case 0x2028:        case 0x2028:
1912        case 0x2029:        case 0x2029:
1913          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1914        break;        break;
1915        }        }
1916      ecode++;      ecode++;
1917      break;      break;
1918    
1919  #ifdef SUPPORT_UCP      case OP_NOT_HSPACE:
1920      /* Check the next character by Unicode property. We will get here only      if (eptr >= md->end_subject)
1921      if the support is in the binary; otherwise a compile-time error occurs. */        {
1922          SCHECK_PARTIAL();
1923      case OP_PROP:        MRRETURN(MATCH_NOMATCH);
1924      case OP_NOTPROP:        }
     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);  
1925      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1926        switch(c)
1927        {        {
1928        int chartype, script;        default: break;
1929        int category = _pcre_ucp_findprop(c, &chartype, &script);        case 0x09:      /* HT */
1930          case 0x20:      /* SPACE */
1931        switch(ecode[1])        case 0xa0:      /* NBSP */
1932          {        case 0x1680:    /* OGHAM SPACE MARK */
1933          case PT_ANY:        case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1934          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);        case 0x2000:    /* EN QUAD */
1935          break;        case 0x2001:    /* EM QUAD */
1936          case 0x2002:    /* EN SPACE */
1937          case PT_LAMP:        case 0x2003:    /* EM SPACE */
1938          if ((chartype == ucp_Lu ||        case 0x2004:    /* THREE-PER-EM SPACE */
1939               chartype == ucp_Ll ||        case 0x2005:    /* FOUR-PER-EM SPACE */
1940               chartype == ucp_Lt) == (op == OP_NOTPROP))        case 0x2006:    /* SIX-PER-EM SPACE */
1941            RRETURN(MATCH_NOMATCH);        case 0x2007:    /* FIGURE SPACE */
1942           break;        case 0x2008:    /* PUNCTUATION SPACE */
1943          case 0x2009:    /* THIN SPACE */
1944          case PT_GC:        case 0x200A:    /* HAIR SPACE */
1945          if ((ecode[2] != category) == (op == OP_PROP))        case 0x202f:    /* NARROW NO-BREAK SPACE */
1946            RRETURN(MATCH_NOMATCH);        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1947          break;        case 0x3000:    /* IDEOGRAPHIC SPACE */
1948          MRRETURN(MATCH_NOMATCH);
         case PT_PC:  
         if ((ecode[2] != chartype) == (op == OP_PROP))  
           RRETURN(MATCH_NOMATCH);  
         break;  
   
         case PT_SC:  
         if ((ecode[2] != script) == (op == OP_PROP))  
           RRETURN(MATCH_NOMATCH);  
         break;  
   
         default:  
         RRETURN(PCRE_ERROR_INTERNAL);  
         }  
   
       ecode += 3;  
1949        }        }
1950        ecode++;
1951      break;      break;
1952    
1953      /* Match an extended Unicode sequence. We will get here only if the support      case OP_HSPACE:
1954      is in the binary; otherwise a compile-time error occurs. */      if (eptr >= md->end_subject)
   
     case OP_EXTUNI:  
     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);  
     GETCHARINCTEST(c, eptr);  
1955        {        {
1956        int chartype, script;        SCHECK_PARTIAL();
1957        int category = _pcre_ucp_findprop(c, &chartype, &script);        MRRETURN(MATCH_NOMATCH);
1958        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        }
1959        GETCHARINCTEST(c, eptr);
1960        switch(c)
1961          {
1962          default: MRRETURN(MATCH_NOMATCH);
1963          case 0x09:      /* HT */
1964          case 0x20:      /* SPACE */
1965          case 0xa0:      /* NBSP */
1966          case 0x1680:    /* OGHAM SPACE MARK */
1967          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1968          case 0x2000:    /* EN QUAD */
1969          case 0x2001:    /* EM QUAD */
1970          case 0x2002:    /* EN SPACE */
1971          case 0x2003:    /* EM SPACE */
1972          case 0x2004:    /* THREE-PER-EM SPACE */
1973          case 0x2005:    /* FOUR-PER-EM SPACE */
1974          case 0x2006:    /* SIX-PER-EM SPACE */
1975          case 0x2007:    /* FIGURE SPACE */
1976          case 0x2008:    /* PUNCTUATION SPACE */
1977          case 0x2009:    /* THIN SPACE */
1978          case 0x200A:    /* HAIR SPACE */
1979          case 0x202f:    /* NARROW NO-BREAK SPACE */
1980          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1981          case 0x3000:    /* IDEOGRAPHIC SPACE */
1982          break;
1983          }
1984        ecode++;
1985        break;
1986    
1987        case OP_NOT_VSPACE:
1988        if (eptr >= md->end_subject)
1989          {
1990          SCHECK_PARTIAL();
1991          MRRETURN(MATCH_NOMATCH);
1992          }
1993        GETCHARINCTEST(c, eptr);
1994        switch(c)
1995          {
1996          default: break;
1997          case 0x0a:      /* LF */
1998          case 0x0b:      /* VT */
1999          case 0x0c:      /* FF */
2000          case 0x0d:      /* CR */
2001          case 0x85:      /* NEL */
2002          case 0x2028:    /* LINE SEPARATOR */
2003          case 0x2029:    /* PARAGRAPH SEPARATOR */
2004          MRRETURN(MATCH_NOMATCH);
2005          }
2006        ecode++;
2007        break;
2008    
2009        case OP_VSPACE:
2010        if (eptr >= md->end_subject)
2011          {
2012          SCHECK_PARTIAL();
2013          MRRETURN(MATCH_NOMATCH);
2014          }
2015        GETCHARINCTEST(c, eptr);
2016        switch(c)
2017          {
2018          default: MRRETURN(MATCH_NOMATCH);
2019          case 0x0a:      /* LF */
2020          case 0x0b:      /* VT */
2021          case 0x0c:      /* FF */
2022          case 0x0d:      /* CR */
2023          case 0x85:      /* NEL */
2024          case 0x2028:    /* LINE SEPARATOR */
2025          case 0x2029:    /* PARAGRAPH SEPARATOR */
2026          break;
2027          }
2028        ecode++;
2029        break;
2030    
2031    #ifdef SUPPORT_UCP
2032        /* Check the next character by Unicode property. We will get here only
2033        if the support is in the binary; otherwise a compile-time error occurs. */
2034    
2035        case OP_PROP:
2036        case OP_NOTPROP:
2037        if (eptr >= md->end_subject)
2038          {
2039          SCHECK_PARTIAL();
2040          MRRETURN(MATCH_NOMATCH);
2041          }
2042        GETCHARINCTEST(c, eptr);
2043          {
2044          const ucd_record *prop = GET_UCD(c);
2045    
2046          switch(ecode[1])
2047            {
2048            case PT_ANY:
2049            if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2050            break;
2051    
2052            case PT_LAMP:
2053            if ((prop->chartype == ucp_Lu ||
2054                 prop->chartype == ucp_Ll ||
2055                 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2056              MRRETURN(MATCH_NOMATCH);
2057             break;
2058    
2059            case PT_GC:
2060            if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2061              MRRETURN(MATCH_NOMATCH);
2062            break;
2063    
2064            case PT_PC:
2065            if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2066              MRRETURN(MATCH_NOMATCH);
2067            break;
2068    
2069            case PT_SC:
2070            if ((ecode[2] != prop->script) == (op == OP_PROP))
2071              MRRETURN(MATCH_NOMATCH);
2072            break;
2073    
2074            default:
2075            RRETURN(PCRE_ERROR_INTERNAL);
2076            }
2077    
2078          ecode += 3;
2079          }
2080        break;
2081    
2082        /* Match an extended Unicode sequence. We will get here only if the support
2083        is in the binary; otherwise a compile-time error occurs. */
2084    
2085        case OP_EXTUNI:
2086        if (eptr >= md->end_subject)
2087          {
2088          SCHECK_PARTIAL();
2089          MRRETURN(MATCH_NOMATCH);
2090          }
2091        GETCHARINCTEST(c, eptr);
2092          {
2093          int category = UCD_CATEGORY(c);
2094          if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2095        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2096          {          {
2097          int len = 1;          int len = 1;
# Line 1507  for (;;) Line 2099  for (;;)
2099            {            {
2100            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2101            }            }
2102          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2103          if (category != ucp_M) break;          if (category != ucp_M) break;
2104          eptr += len;          eptr += len;
2105          }          }
# Line 1528  for (;;) Line 2120  for (;;)
2120      case OP_REF:      case OP_REF:
2121        {        {
2122        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2123        ecode += 3;                                 /* Advance past item */        ecode += 3;
2124    
2125          /* If the reference is unset, there are two possibilities:
2126    
2127          (a) In the default, Perl-compatible state, set the length to be longer
2128          than the amount of subject left; this ensures that every attempt at a
2129          match fails. We can't just fail here, because of the possibility of
2130          quantifiers with zero minima.
2131    
2132          (b) If the JavaScript compatibility flag is set, set the length to zero
2133          so that the back reference matches an empty string.
2134    
2135        /* If the reference is unset, set the length to be longer than the amount        Otherwise, set the length to the length of what was matched by the
2136        of subject left; this ensures that every attempt at a match fails. We        referenced subpattern. */
2137        can't just fail here, because of the possibility of quantifiers with zero  
2138        minima. */        if (offset >= offset_top || md->offset_vector[offset] < 0)
2139            length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2140        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        else
2141          md->end_subject - eptr + 1 :          length = md->offset_vector[offset+1] - md->offset_vector[offset];
         md->offset_vector[offset+1] - md->offset_vector[offset];  
2142    
2143        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2144    
# Line 1566  for (;;) Line 2167  for (;;)
2167          break;          break;
2168    
2169          default:               /* No repeat follows */          default:               /* No repeat follows */
2170          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2171              {
2172              CHECK_PARTIAL();
2173              MRRETURN(MATCH_NOMATCH);
2174              }
2175          eptr += length;          eptr += length;
2176          continue;              /* With the main loop */          continue;              /* With the main loop */
2177          }          }
# Line 1582  for (;;) Line 2187  for (;;)
2187    
2188        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2189          {          {
2190          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2191              {
2192              CHECK_PARTIAL();
2193              MRRETURN(MATCH_NOMATCH);
2194              }
2195          eptr += length;          eptr += length;
2196          }          }
2197    
# Line 1597  for (;;) Line 2206  for (;;)
2206          {          {
2207          for (fi = min;; fi++)          for (fi = min;; fi++)
2208            {            {
2209            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2210            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2211            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2212              RRETURN(MATCH_NOMATCH);            if (!match_ref(offset, eptr, length, md, ims))
2213                {
2214                CHECK_PARTIAL();
2215                MRRETURN(MATCH_NOMATCH);
2216                }
2217            eptr += length;            eptr += length;
2218            }            }
2219          /* Control never gets here */          /* Control never gets here */
# Line 1613  for (;;) Line 2226  for (;;)
2226          pp = eptr;          pp = eptr;
2227          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2228            {            {
2229            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2230                {
2231                CHECK_PARTIAL();
2232                break;
2233                }
2234            eptr += length;            eptr += length;
2235            }            }
2236          while (eptr >= pp)          while (eptr >= pp)
2237            {            {
2238            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2239            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2240            eptr -= length;            eptr -= length;
2241            }            }
2242          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2243          }          }
2244        }        }
2245      /* Control never gets here */      /* Control never gets here */
2246    
   
   
2247      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2248      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2249      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1683  for (;;) Line 2298  for (;;)
2298          {          {
2299          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2300            {            {
2301            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2302                {
2303                SCHECK_PARTIAL();
2304                MRRETURN(MATCH_NOMATCH);
2305                }
2306            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2307            if (c > 255)            if (c > 255)
2308              {              {
2309              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2310              }              }
2311            else            else
2312              {              {
2313              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2314              }              }
2315            }            }
2316          }          }
# Line 1701  for (;;) Line 2320  for (;;)
2320          {          {
2321          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2322            {            {
2323            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2324                {
2325                SCHECK_PARTIAL();
2326                MRRETURN(MATCH_NOMATCH);
2327                }
2328            c = *eptr++;            c = *eptr++;
2329            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2330            }            }
2331          }          }
2332    
# Line 1723  for (;;) Line 2346  for (;;)
2346            {            {
2347            for (fi = min;; fi++)            for (fi = min;; fi++)
2348              {              {
2349              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2350              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2351              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2352                if (eptr >= md->end_subject)
2353                  {
2354                  SCHECK_PARTIAL();
2355                  MRRETURN(MATCH_NOMATCH);
2356                  }
2357              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2358              if (c > 255)              if (c > 255)
2359                {                {
2360                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2361                }                }
2362              else              else
2363                {                {
2364                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2365                }                }
2366              }              }
2367            }            }
# Line 1743  for (;;) Line 2371  for (;;)
2371            {            {
2372            for (fi = min;; fi++)            for (fi = min;; fi++)
2373              {              {
2374              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2375              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2376              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2377                if (eptr >= md->end_subject)
2378                  {
2379                  SCHECK_PARTIAL();
2380                  MRRETURN(MATCH_NOMATCH);
2381                  }
2382              c = *eptr++;              c = *eptr++;
2383              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2384              }              }
2385            }            }
2386          /* Control never gets here */          /* Control never gets here */
# Line 1766  for (;;) Line 2399  for (;;)
2399            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2400              {              {
2401              int len = 1;              int len = 1;
2402              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2403                  {
2404                  SCHECK_PARTIAL();
2405                  break;
2406                  }
2407              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2408              if (c > 255)              if (c > 255)
2409                {                {
# Line 1780  for (;;) Line 2417  for (;;)
2417              }              }
2418            for (;;)            for (;;)
2419              {              {
2420              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2421              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2422              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2423              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1792  for (;;) Line 2429  for (;;)
2429            {            {
2430            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2431              {              {
2432              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2433                  {
2434                  SCHECK_PARTIAL();
2435                  break;
2436                  }
2437              c = *eptr;              c = *eptr;
2438              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2439              eptr++;              eptr++;
2440              }              }
2441            while (eptr >= pp)            while (eptr >= pp)
2442              {              {
2443              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2444              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2445              eptr--;              eptr--;
2446              }              }
2447            }            }
2448    
2449          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2450          }          }
2451        }        }
2452      /* Control never gets here */      /* Control never gets here */
2453    
2454    
2455      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2456      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2457        mode, because Unicode properties are supported in non-UTF-8 mode. */
2458    
2459  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2460      case OP_XCLASS:      case OP_XCLASS:
# Line 1853  for (;;) Line 2495  for (;;)
2495    
2496        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2497          {          {
2498          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2499          GETCHARINC(c, eptr);            {
2500          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2501              MRRETURN(MATCH_NOMATCH);
2502              }
2503            GETCHARINCTEST(c, eptr);
2504            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2505          }          }
2506    
2507        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1870  for (;;) Line 2516  for (;;)
2516          {          {
2517          for (fi = min;; fi++)          for (fi = min;; fi++)
2518            {            {
2519            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2520            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2521            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2522            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2523            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2524                SCHECK_PARTIAL();
2525                MRRETURN(MATCH_NOMATCH);
2526                }
2527              GETCHARINCTEST(c, eptr);
2528              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2529            }            }
2530          /* Control never gets here */          /* Control never gets here */
2531          }          }
# Line 1887  for (;;) Line 2538  for (;;)
2538          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2539            {            {
2540            int len = 1;            int len = 1;
2541            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2542            GETCHARLEN(c, eptr, len);              {
2543                SCHECK_PARTIAL();
2544                break;
2545                }
2546              GETCHARLENTEST(c, eptr, len);
2547            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2548            eptr += len;            eptr += len;
2549            }            }
2550          for(;;)          for(;;)
2551            {            {
2552            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2553            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2554            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2555            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2556            }            }
2557          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2558          }          }
2559    
2560        /* Control never gets here */        /* Control never gets here */
# Line 1915  for (;;) Line 2570  for (;;)
2570        length = 1;        length = 1;
2571        ecode++;        ecode++;
2572        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2573        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2574        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2575            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2576            MRRETURN(MATCH_NOMATCH);
2577            }
2578          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2579        }        }
2580      else      else
2581  #endif  #endif
2582    
2583      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2584        {        {
2585        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2586        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2587            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2588            MRRETURN(MATCH_NOMATCH);
2589            }
2590          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2591        ecode += 2;        ecode += 2;
2592        }        }
2593      break;      break;
# Line 1939  for (;;) Line 2602  for (;;)
2602        ecode++;        ecode++;
2603        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2604    
2605        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2606            {
2607            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2608            MRRETURN(MATCH_NOMATCH);
2609            }
2610    
2611        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2612        can use the fast lookup table. */        can use the fast lookup table. */
2613    
2614        if (fc < 128)        if (fc < 128)
2615          {          {
2616          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2617          }          }
2618    
2619        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
# Line 1963  for (;;) Line 2630  for (;;)
2630          if (fc != dc)          if (fc != dc)
2631            {            {
2632  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2633            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2634  #endif  #endif
2635              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2636            }            }
2637          }          }
2638        }        }
# Line 1974  for (;;) Line 2641  for (;;)
2641    
2642      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2643        {        {
2644        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2645        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2646            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2647            MRRETURN(MATCH_NOMATCH);
2648            }
2649          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2650        ecode += 2;        ecode += 2;
2651        }        }
2652      break;      break;
# Line 2028  for (;;) Line 2699  for (;;)
2699      case OP_MINQUERY:      case OP_MINQUERY:
2700      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2701      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2702    
2703      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2704      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2705      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2706    
2707      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2708    
2709      REPEATCHAR:      REPEATCHAR:
2710  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2043  for (;;) Line 2713  for (;;)
2713        length = 1;        length = 1;
2714        charptr = ecode;        charptr = ecode;
2715        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2716        ecode += length;        ecode += length;
2717    
2718        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 2054  for (;;) Line 2723  for (;;)
2723  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2724          unsigned int othercase;          unsigned int othercase;
2725          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2726              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2727            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2728          else oclength = 0;          else oclength = 0;
2729  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2730    
2731          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2732            {            {
2733            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2734  #ifdef SUPPORT_UCP              memcmp(eptr, charptr, length) == 0) eptr += length;
2735            /* Need braces because of following else */  #ifdef SUPPORT_UCP
2736            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }            else if (oclength > 0 &&
2737                       eptr <= md->end_subject - oclength &&
2738                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2739    #endif  /* SUPPORT_UCP */
2740            else            else
2741              {              {
2742              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2743              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2744              }              }
 #else   /* without SUPPORT_UCP */  
           else { RRETURN(MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2745            }            }
2746    
2747          if (min == max) continue;          if (min == max) continue;
# Line 2081  for (;;) Line 2750  for (;;)
2750            {            {
2751            for (fi = min;; fi++)            for (fi = min;; fi++)
2752              {              {
2753              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2754              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2755              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2756              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2757  #ifdef SUPPORT_UCP                memcmp(eptr, charptr, length) == 0) eptr += length;
2758              /* Need braces because of following else */  #ifdef SUPPORT_UCP
2759              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }              else if (oclength > 0 &&
2760                         eptr <= md->end_subject - oclength &&
2761                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2762    #endif  /* SUPPORT_UCP */
2763              else              else
2764                {                {
2765                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2766                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2767                }                }
 #else   /* without SUPPORT_UCP */  
             else { RRETURN (MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2768              }              }
2769            /* Control never gets here */            /* Control never gets here */
2770            }            }
# Line 2105  for (;;) Line 2774  for (;;)
2774            pp = eptr;            pp = eptr;
2775            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2776              {              {
2777              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2778              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2779  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2780              else if (oclength == 0) break;              else if (oclength > 0 &&
2781                         eptr <= md->end_subject - oclength &&
2782                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2783    #endif  /* SUPPORT_UCP */
2784              else              else
2785                {                {
2786                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2787                eptr += oclength;                break;
2788                }                }
 #else   /* without SUPPORT_UCP */  
             else break;  
 #endif  /* SUPPORT_UCP */  
2789              }              }
2790    
2791            if (possessive) continue;            if (possessive) continue;
2792            while (eptr >= pp)  
2793             {            for(;;)
2794             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              {
2795             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2796                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2797                if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2798  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2799             eptr--;              eptr--;
2800             BACKCHAR(eptr);              BACKCHAR(eptr);
2801  #else   /* without SUPPORT_UCP */  #else   /* without SUPPORT_UCP */
2802             eptr -= length;              eptr -= length;
2803  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2804             }              }
           RRETURN(MATCH_NOMATCH);  
2805            }            }
2806          /* Control never gets here */          /* Control never gets here */
2807          }          }
# Line 2144  for (;;) Line 2814  for (;;)
2814  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2815    
2816      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2817        {  
2818        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2819    
2820      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2821      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2165  for (;;) Line 2833  for (;;)
2833        {        {
2834        fc = md->lcc[fc];        fc = md->lcc[fc];
2835        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2836          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2837            if (eptr >= md->end_subject)
2838              {
2839              SCHECK_PARTIAL();
2840              MRRETURN(MATCH_NOMATCH);
2841              }
2842            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2843            }
2844        if (min == max) continue;        if (min == max) continue;
2845        if (minimize)        if (minimize)
2846          {          {
2847          for (fi = min;; fi++)          for (fi = min;; fi++)
2848            {            {
2849            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2850            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2851            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2852                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2853              RRETURN(MATCH_NOMATCH);              {
2854                SCHECK_PARTIAL();
2855                MRRETURN(MATCH_NOMATCH);
2856                }
2857              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2858            }            }
2859          /* Control never gets here */          /* Control never gets here */
2860          }          }
# Line 2184  for (;;) Line 2863  for (;;)
2863          pp = eptr;          pp = eptr;
2864          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2865            {            {
2866            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2867                {
2868                SCHECK_PARTIAL();
2869                break;
2870                }
2871              if (fc != md->lcc[*eptr]) break;
2872            eptr++;            eptr++;
2873            }            }
2874    
2875          if (possessive) continue;          if (possessive) continue;
2876    
2877          while (eptr >= pp)          while (eptr >= pp)
2878            {            {
2879            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2880            eptr--;            eptr--;
2881            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2882            }            }
2883          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2884          }          }
2885        /* Control never gets here */        /* Control never gets here */
2886        }        }
# Line 2203  for (;;) Line 2889  for (;;)
2889    
2890      else      else
2891        {        {
2892        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2893            {
2894            if (eptr >= md->end_subject)
2895              {
2896              SCHECK_PARTIAL();
2897              MRRETURN(MATCH_NOMATCH);
2898              }
2899            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2900            }
2901    
2902        if (min == max) continue;        if (min == max) continue;
2903    
2904        if (minimize)        if (minimize)
2905          {          {
2906          for (fi = min;; fi++)          for (fi = min;; fi++)
2907            {            {
2908            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2909            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2910            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2911              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2912                {
2913                SCHECK_PARTIAL();
2914                MRRETURN(MATCH_NOMATCH);
2915                }
2916              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2917            }            }
2918          /* Control never gets here */          /* Control never gets here */
2919          }          }
# Line 2221  for (;;) Line 2922  for (;;)
2922          pp = eptr;          pp = eptr;
2923          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2924            {            {
2925            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
2926                {
2927                SCHECK_PARTIAL();
2928                break;
2929                }
2930              if (fc != *eptr) break;
2931            eptr++;            eptr++;
2932            }            }
2933          if (possessive) continue;          if (possessive) continue;
2934    
2935          while (eptr >= pp)          while (eptr >= pp)
2936            {            {
2937            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2938            eptr--;            eptr--;
2939            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2940            }            }
2941          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2942          }          }
2943        }        }
2944      /* Control never gets here */      /* Control never gets here */
# Line 2240  for (;;) Line 2947  for (;;)
2947      checking can be multibyte. */      checking can be multibyte. */
2948    
2949      case OP_NOT:      case OP_NOT:
2950      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2951          {
2952          SCHECK_PARTIAL();
2953          MRRETURN(MATCH_NOMATCH);
2954          }
2955      ecode++;      ecode++;
2956      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2957      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2249  for (;;) Line 2960  for (;;)
2960        if (c < 256)        if (c < 256)
2961  #endif  #endif
2962        c = md->lcc[c];        c = md->lcc[c];
2963        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
2964        }        }
2965      else      else
2966        {        {
2967        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
2968        }        }
2969      break;      break;
2970    
# Line 2317  for (;;) Line 3028  for (;;)
3028      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3029      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3030    
3031      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3032    
3033      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3034      fc = *ecode++;      fc = *ecode++;
3035    
3036      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2347  for (;;) Line 3055  for (;;)
3055          register unsigned int d;          register unsigned int d;
3056          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3057            {            {
3058              if (eptr >= md->end_subject)
3059                {
3060                SCHECK_PARTIAL();
3061                MRRETURN(MATCH_NOMATCH);
3062                }
3063            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3064            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3065            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3066            }            }
3067          }          }
3068        else        else
# Line 2358  for (;;) Line 3071  for (;;)
3071        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3072          {          {
3073          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3074            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3075              if (eptr >= md->end_subject)
3076                {
3077                SCHECK_PARTIAL();
3078                MRRETURN(MATCH_NOMATCH);
3079                }
3080              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3081              }
3082          }          }
3083    
3084        if (min == max) continue;        if (min == max) continue;
# Line 2372  for (;;) Line 3092  for (;;)
3092            register unsigned int d;            register unsigned int d;
3093            for (fi = min;; fi++)            for (fi = min;; fi++)
3094              {              {
3095              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3096              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3097                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3098                if (eptr >= md->end_subject)
3099                  {
3100                  SCHECK_PARTIAL();
3101                  MRRETURN(MATCH_NOMATCH);
3102                  }
3103              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3104              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3105              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3106              }              }
3107            }            }
3108          else          else
# Line 2386  for (;;) Line 3111  for (;;)
3111            {            {
3112            for (fi = min;; fi++)            for (fi = min;; fi++)
3113              {              {
3114              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3115              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3116              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3117                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3118                  {
3119                  SCHECK_PARTIAL();
3120                  MRRETURN(MATCH_NOMATCH);
3121                  }
3122                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3123              }              }
3124            }            }
3125          /* Control never gets here */          /* Control never gets here */
# Line 2409  for (;;) Line 3139  for (;;)
3139            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3140              {              {
3141              int len = 1;              int len = 1;
3142              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3143                  {
3144                  SCHECK_PARTIAL();
3145                  break;
3146                  }
3147              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3148              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3149              if (fc == d) break;              if (fc == d) break;
# Line 2418  for (;;) Line 3152  for (;;)
3152          if (possessive) continue;          if (possessive) continue;
3153          for(;;)          for(;;)
3154              {              {
3155              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3156              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3157              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3158              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2430  for (;;) Line 3164  for (;;)
3164            {            {
3165            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3166              {              {
3167              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3168                  {
3169                  SCHECK_PARTIAL();
3170                  break;
3171                  }
3172                if (fc == md->lcc[*eptr]) break;
3173              eptr++;              eptr++;
3174              }              }
3175            if (possessive) continue;            if (possessive) continue;
3176            while (eptr >= pp)            while (eptr >= pp)
3177              {              {
3178              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3179              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3180              eptr--;              eptr--;
3181              }              }
3182            }            }
3183    
3184          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3185          }          }
3186        /* Control never gets here */        /* Control never gets here */
3187        }        }
# Line 2458  for (;;) Line 3197  for (;;)
3197          register unsigned int d;          register unsigned int d;
3198          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3199            {            {
3200              if (eptr >= md->end_subject)
3201                {
3202                SCHECK_PARTIAL();
3203                MRRETURN(MATCH_NOMATCH);
3204                }
3205            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3206            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3207            }            }
3208          }          }
3209        else        else
# Line 2467  for (;;) Line 3211  for (;;)
3211        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3212          {          {
3213          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3214            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3215              if (eptr >= md->end_subject)
3216                {
3217                SCHECK_PARTIAL();
3218                MRRETURN(MATCH_NOMATCH);
3219                }
3220              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3221              }
3222          }          }
3223    
3224        if (min == max) continue;        if (min == max) continue;
# Line 2481  for (;;) Line 3232  for (;;)
3232            register unsigned int d;            register unsigned int d;
3233            for (fi = min;; fi++)            for (fi = min;; fi++)
3234              {              {
3235              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3236              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3237                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3238                if (eptr >= md->end_subject)
3239                  {
3240                  SCHECK_PARTIAL();
3241                  MRRETURN(MATCH_NOMATCH);
3242                  }
3243              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3244              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3245              }              }
3246            }            }
3247          else          else
# Line 2494  for (;;) Line 3250  for (;;)
3250            {            {
3251            for (fi = min;; fi++)            for (fi = min;; fi++)
3252              {              {
3253              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3254              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3255              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3256                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3257                  {
3258                  SCHECK_PARTIAL();
3259                  MRRETURN(MATCH_NOMATCH);
3260                  }
3261                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3262              }              }
3263            }            }
3264          /* Control never gets here */          /* Control never gets here */
# Line 2517  for (;;) Line 3278  for (;;)
3278            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3279              {              {
3280              int len = 1;              int len = 1;
3281              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3282                  {
3283                  SCHECK_PARTIAL();
3284                  break;
3285                  }
3286              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3287              if (fc == d) break;              if (fc == d) break;
3288              eptr += len;              eptr += len;
# Line 2525  for (;;) Line 3290  for (;;)
3290            if (possessive) continue;            if (possessive) continue;
3291            for(;;)            for(;;)
3292              {              {
3293              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3294              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3295              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3296              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2537  for (;;) Line 3302  for (;;)
3302            {            {
3303            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3304              {              {
3305              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3306                  {
3307                  SCHECK_PARTIAL();
3308                  break;
3309                  }
3310                if (fc == *eptr) break;
3311              eptr++;              eptr++;
3312              }              }
3313            if (possessive) continue;            if (possessive) continue;
3314            while (eptr >= pp)            while (eptr >= pp)
3315              {              {
3316              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3317              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3318              eptr--;              eptr--;
3319              }              }
3320            }            }
3321    
3322          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3323          }          }
3324        }        }
3325      /* Control never gets here */      /* Control never gets here */
# Line 2631  for (;;) Line 3401  for (;;)
3401    
3402      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3403      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3404      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3405      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3406      and single-bytes. */      and single-bytes. */
3407    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3408      if (min > 0)      if (min > 0)
3409        {        {
3410  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2646  for (;;) Line 3413  for (;;)
3413          switch(prop_type)          switch(prop_type)
3414            {            {
3415            case PT_ANY:            case PT_ANY:
3416            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3417            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3418              {              {
3419              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3420              GETCHARINC(c, eptr);                {
3421                  SCHECK_PARTIAL();
3422                  MRRETURN(MATCH_NOMATCH);
3423                  }
3424                GETCHARINCTEST(c, eptr);
3425              }              }
3426            break;            break;
3427    
3428            case PT_LAMP:            case PT_LAMP:
3429            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3430              {              {
3431              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3432              GETCHARINC(c, eptr);                {
3433              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3434                  MRRETURN(MATCH_NOMATCH);
3435                  }
3436                GETCHARINCTEST(c, eptr);
3437                prop_chartype = UCD_CHARTYPE(c);
3438              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3439                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3440                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3441                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3442              }              }
3443            break;            break;
3444    
3445            case PT_GC:            case PT_GC:
3446            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3447              {              {
3448              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3449              GETCHARINC(c, eptr);                {
3450              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3451                  MRRETURN(MATCH_NOMATCH);
3452                  }
3453                GETCHARINCTEST(c, eptr);
3454                prop_category = UCD_CATEGORY(c);
3455              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3456                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3457              }              }
3458            break;            break;
3459    
3460            case PT_PC:            case PT_PC:
3461            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3462              {              {
3463              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3464              GETCHARINC(c, eptr);                {
3465              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3466                  MRRETURN(MATCH_NOMATCH);
3467                  }
3468                GETCHARINCTEST(c, eptr);
3469                prop_chartype = UCD_CHARTYPE(c);
3470              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3471                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3472              }              }
3473            break;            break;
3474    
3475            case PT_SC:            case PT_SC:
3476            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3477              {              {
3478              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3479              GETCHARINC(c, eptr);                {
3480              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3481                  MRRETURN(MATCH_NOMATCH);
3482                  }
3483                GETCHARINCTEST(c, eptr);
3484                prop_script = UCD_SCRIPT(c);
3485              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3486                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3487              }              }
3488            break;            break;
3489    
# Line 2712  for (;;) Line 3499  for (;;)
3499          {          {
3500          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3501            {            {
3502              if (eptr >= md->end_subject)
3503                {
3504                SCHECK_PARTIAL();
3505                MRRETURN(MATCH_NOMATCH);
3506                }
3507            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3508            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3509            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3510            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3511              {              {
3512              int len = 1;              int len = 1;
3513              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3514                {                else { GETCHARLEN(c, eptr, len); }
3515                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3516              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3517              eptr += len;              eptr += len;
3518              }              }
# Line 2740  for (;;) Line 3530  for (;;)
3530          case OP_ANY:          case OP_ANY:
3531          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3532            {            {
3533            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3534                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))              {
3535              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3536                MRRETURN(MATCH_NOMATCH);
3537                }
3538              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3539              eptr++;
3540              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3541              }
3542            break;
3543    
3544            case OP_ALLANY:
3545            for (i = 1; i <= min; i++)
3546              {
3547              if (eptr >= md->end_subject)
3548                {
3549                SCHECK_PARTIAL();
3550                MRRETURN(MATCH_NOMATCH);
3551                }
3552            eptr++;            eptr++;
3553            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3554            }            }
3555          break;          break;
3556    
3557          case OP_ANYBYTE:          case OP_ANYBYTE:
3558            if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3559          eptr += min;          eptr += min;
3560          break;          break;
3561    
3562          case OP_ANYNL:          case OP_ANYNL:
3563          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3564            {            {
3565            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3566                {
3567                SCHECK_PARTIAL();
3568                MRRETURN(MATCH_NOMATCH);
3569                }
3570            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3571            switch(c)            switch(c)
3572              {              {
3573              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3574              case 0x000d:              case 0x000d:
3575              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3576              break;              break;
3577    
3578              case 0x000a:              case 0x000a:
3579                break;
3580    
3581              case 0x000b:              case 0x000b:
3582              case 0x000c:              case 0x000c:
3583              case 0x0085:              case 0x0085:
3584              case 0x2028:              case 0x2028:
3585              case 0x2029:              case 0x2029:
3586                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3587              break;              break;
3588              }              }
3589            }            }
3590          break;          break;
3591    
3592          case OP_NOT_DIGIT:          case OP_NOT_HSPACE:
3593          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3594            {            {
3595            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3596                {
3597                SCHECK_PARTIAL();
3598                MRRETURN(MATCH_NOMATCH);
3599                }
3600            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3601            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            switch(c)
3602              RRETURN(MATCH_NOMATCH);              {
3603                default: break;
3604                case 0x09:      /* HT */
3605                case 0x20:      /* SPACE */
3606                case 0xa0:      /* NBSP */
3607                case 0x1680:    /* OGHAM SPACE MARK */
3608                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3609                case 0x2000:    /* EN QUAD */
3610                case 0x2001:    /* EM QUAD */
3611                case 0x2002:    /* EN SPACE */
3612                case 0x2003:    /* EM SPACE */
3613                case 0x2004:    /* THREE-PER-EM SPACE */
3614                case 0x2005:    /* FOUR-PER-EM SPACE */
3615                case 0x2006:    /* SIX-PER-EM SPACE */
3616                case 0x2007:    /* FIGURE SPACE */
3617                case 0x2008:    /* PUNCTUATION SPACE */
3618                case 0x2009:    /* THIN SPACE */
3619                case 0x200A:    /* HAIR SPACE */
3620                case 0x202f:    /* NARROW NO-BREAK SPACE */
3621                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3622                case 0x3000:    /* IDEOGRAPHIC SPACE */
3623                MRRETURN(MATCH_NOMATCH);
3624                }
3625            }            }
3626          break;          break;
3627    
3628          case OP_DIGIT:          case OP_HSPACE:
3629          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3630            {            {
3631            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3632               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3633              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3634            /* No need to skip more bytes - we know it's a 1-byte character */              MRRETURN(MATCH_NOMATCH);
3635            }              }
3636          break;            GETCHARINC(c, eptr);
3637              switch(c)
3638          case OP_NOT_WHITESPACE:              {
3639                default: MRRETURN(MATCH_NOMATCH);
3640                case 0x09:      /* HT */
3641                case 0x20:      /* SPACE */
3642                case 0xa0:      /* NBSP */
3643                case 0x1680:    /* OGHAM SPACE MARK */
3644                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3645                case 0x2000:    /* EN QUAD */
3646                case 0x2001:    /* EM QUAD */
3647                case 0x2002:    /* EN SPACE */
3648                case 0x2003:    /* EM SPACE */
3649                case 0x2004:    /* THREE-PER-EM SPACE */
3650                case 0x2005:    /* FOUR-PER-EM SPACE */
3651                case 0x2006:    /* SIX-PER-EM SPACE */
3652                case 0x2007:    /* FIGURE SPACE */
3653                case 0x2008:    /* PUNCTUATION SPACE */
3654                case 0x2009:    /* THIN SPACE */
3655                case 0x200A:    /* HAIR SPACE */
3656                case 0x202f:    /* NARROW NO-BREAK SPACE */
3657                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3658                case 0x3000:    /* IDEOGRAPHIC SPACE */
3659                break;
3660                }
3661              }
3662            break;
3663    
3664            case OP_NOT_VSPACE:
3665          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3666            {            {
3667            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3668               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3669              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3670            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3671                }
3672              GETCHARINC(c, eptr);
3673              switch(c)
3674                {
3675                default: break;
3676                case 0x0a:      /* LF */
3677                case 0x0b:      /* VT */
3678                case 0x0c:      /* FF */
3679                case 0x0d:      /* CR */
3680                case 0x85:      /* NEL */
3681                case 0x2028:    /* LINE SEPARATOR */
3682                case 0x2029:    /* PARAGRAPH SEPARATOR */
3683                MRRETURN(MATCH_NOMATCH);
3684                }
3685              }
3686            break;
3687    
3688            case OP_VSPACE:
3689            for (i = 1; i <= min; i++)
3690              {
3691              if (eptr >= md->end_subject)
3692                {
3693                SCHECK_PARTIAL();
3694                MRRETURN(MATCH_NOMATCH);
3695                }
3696              GETCHARINC(c, eptr);
3697              switch(c)
3698                {
3699                default: MRRETURN(MATCH_NOMATCH);
3700                case 0x0a:      /* LF */
3701                case 0x0b:      /* VT */
3702                case 0x0c:      /* FF */
3703                case 0x0d:      /* CR */
3704                case 0x85:      /* NEL */
3705                case 0x2028:    /* LINE SEPARATOR */
3706                case 0x2029:    /* PARAGRAPH SEPARATOR */
3707                break;
3708                }
3709              }
3710            break;
3711    
3712            case OP_NOT_DIGIT:
3713            for (i = 1; i <= min; i++)
3714              {
3715              if (eptr >= md->end_subject)
3716                {
3717                SCHECK_PARTIAL();
3718                MRRETURN(MATCH_NOMATCH);
3719                }
3720              GETCHARINC(c, eptr);
3721              if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3722                MRRETURN(MATCH_NOMATCH);
3723              }
3724            break;
3725    
3726            case OP_DIGIT:
3727            for (i = 1; i <= min; i++)
3728              {
3729              if (eptr >= md->end_subject)
3730                {
3731                SCHECK_PARTIAL();
3732                MRRETURN(MATCH_NOMATCH);
3733                }
3734              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3735                MRRETURN(MATCH_NOMATCH);
3736              /* No need to skip more bytes - we know it's a 1-byte character */
3737              }
3738            break;
3739    
3740            case OP_NOT_WHITESPACE:
3741            for (i = 1; i <= min; i++)
3742              {
3743              if (eptr >= md->end_subject)
3744                {
3745                SCHECK_PARTIAL();
3746                MRRETURN(MATCH_NOMATCH);
3747                }
3748              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3749                MRRETURN(MATCH_NOMATCH);
3750              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3751            }            }
3752          break;          break;
3753    
3754          case OP_WHITESPACE:          case OP_WHITESPACE:
3755          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3756            {            {
3757            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3758               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3759              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3760                MRRETURN(MATCH_NOMATCH);
3761                }
3762              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3763                MRRETURN(MATCH_NOMATCH);
3764            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3765            }            }
3766          break;          break;
# Line 2817  for (;;) Line 3768  for (;;)
3768          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3769          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3770            {            {
3771            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3772               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))              {
3773              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3774            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3775                }
3776              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3777                MRRETURN(MATCH_NOMATCH);
3778              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3779            }            }
3780          break;          break;
3781    
3782          case OP_WORDCHAR:          case OP_WORDCHAR:
3783          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3784            {            {
3785            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3786               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3787              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3788                MRRETURN(MATCH_NOMATCH);
3789                }
3790              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3791                MRRETURN(MATCH_NOMATCH);
3792            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3793            }            }
3794          break;          break;
# Line 2842  for (;;) Line 3801  for (;;)
3801  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
3802    
3803        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
3804        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum        than OP_PROP and OP_NOTPROP. */
       number of bytes present, as this was tested above. */  
3805    
3806        switch(ctype)        switch(ctype)
3807          {          {
3808          case OP_ANY:          case OP_ANY:
3809          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3810            {            {
3811            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
3812              {              {
3813              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3814              eptr++;              MRRETURN(MATCH_NOMATCH);
3815              }              }
3816              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3817              eptr++;
3818            }            }
         else eptr += min;  
3819          break;          break;
3820    
3821          case OP_ANYBYTE:          case OP_ALLANY:
3822            if (eptr > md->end_subject - min)
3823              {
3824              SCHECK_PARTIAL();
3825              MRRETURN(MATCH_NOMATCH);
3826              }
3827          eptr += min;          eptr += min;
3828          break;          break;
3829    
3830          /* Because of the CRLF case, we can't assume the minimum number of          case OP_ANYBYTE:
3831          bytes are present in this case. */          if (eptr > md->end_subject - min)
3832              {
3833              SCHECK_PARTIAL();
3834              MRRETURN(MATCH_NOMATCH);
3835              }
3836            eptr += min;
3837            break;
3838    
3839          case OP_ANYNL:          case OP_ANYNL:
3840          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3841            {            {
3842            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3843                {
3844                SCHECK_PARTIAL();
3845                MRRETURN(MATCH_NOMATCH);
3846                }
3847            switch(*eptr++)            switch(*eptr++)
3848              {              {
3849              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3850              case 0x000d:              case 0x000d:
3851              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3852              break;              break;
3853              case 0x000a:              case 0x000a:
3854                break;
3855    
3856              case 0x000b:              case 0x000b:
3857              case 0x000c:              case 0x000c:
3858              case 0x0085:              case 0x0085:
3859                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3860                break;
3861                }
3862              }
3863            break;
3864    
3865            case OP_NOT_HSPACE:
3866            for (i = 1; i <= min; i++)
3867              {
3868              if (eptr >= md->end_subject)
3869                {
3870                SCHECK_PARTIAL();
3871                MRRETURN(MATCH_NOMATCH);
3872                }
3873              switch(*eptr++)
3874                {
3875                default: break;
3876                case 0x09:      /* HT */
3877                case 0x20:      /* SPACE */
3878                case 0xa0:      /* NBSP */
3879                MRRETURN(MATCH_NOMATCH);
3880                }
3881              }
3882            break;
3883    
3884            case OP_HSPACE:
3885            for (i = 1; i <= min; i++)
3886              {
3887              if (eptr >= md->end_subject)
3888                {
3889                SCHECK_PARTIAL();
3890                MRRETURN(MATCH_NOMATCH);
3891                }
3892              switch(*eptr++)
3893                {
3894                default: MRRETURN(MATCH_NOMATCH);
3895                case 0x09:      /* HT */
3896                case 0x20:      /* SPACE */
3897                case 0xa0:      /* NBSP */
3898                break;
3899                }
3900              }
3901            break;
3902    
3903            case OP_NOT_VSPACE:
3904            for (i = 1; i <= min; i++)
3905              {
3906              if (eptr >= md->end_subject)
3907                {
3908                SCHECK_PARTIAL();
3909                MRRETURN(MATCH_NOMATCH);
3910                }
3911              switch(*eptr++)
3912                {
3913                default: break;
3914                case 0x0a:      /* LF */
3915                case 0x0b:      /* VT */
3916                case 0x0c:      /* FF */
3917                case 0x0d:      /* CR */
3918                case 0x85:      /* NEL */
3919                MRRETURN(MATCH_NOMATCH);
3920                }
3921              }
3922            break;
3923    
3924            case OP_VSPACE:
3925            for (i = 1; i <= min; i++)
3926              {
3927              if (eptr >= md->end_subject)
3928                {
3929                SCHECK_PARTIAL();
3930                MRRETURN(MATCH_NOMATCH);
3931                }
3932              switch(*eptr++)
3933                {
3934                default: MRRETURN(MATCH_NOMATCH);
3935                case 0x0a:      /* LF */
3936                case 0x0b:      /* VT */
3937                case 0x0c:      /* FF */
3938                case 0x0d:      /* CR */
3939                case 0x85:      /* NEL */
3940              break;              break;
3941              }              }
3942            }            }
# Line 2887  for (;;) Line 3944  for (;;)
3944    
3945          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3946          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3947            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            {
3948              if (eptr >= md->end_subject)
3949                {
3950                SCHECK_PARTIAL();
3951                MRRETURN(MATCH_NOMATCH);
3952                }
3953              if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
3954              }
3955          break;          break;
3956    
3957          case OP_DIGIT:          case OP_DIGIT:
3958          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3959            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            {
3960              if (eptr >= md->end_subject)
3961                {
3962                SCHECK_PARTIAL();
3963                MRRETURN(MATCH_NOMATCH);
3964                }
3965              if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
3966              }
3967          break;          break;
3968    
3969          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3970          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3971            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            {
3972              if (eptr >= md->end_subject)
3973                {
3974                SCHECK_PARTIAL();
3975                MRRETURN(MATCH_NOMATCH);
3976                }
3977              if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
3978              }
3979          break;          break;
3980    
3981          case OP_WHITESPACE:          case OP_WHITESPACE:
3982          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3983            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            {
3984              if (eptr >= md->end_subject)
3985                {
3986                SCHECK_PARTIAL();
3987                MRRETURN(MATCH_NOMATCH);
3988                }
3989              if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
3990              }
3991          break;          break;
3992    
3993          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3994          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3995              {
3996              if (eptr >= md->end_subject)
3997                {
3998                SCHECK_PARTIAL();
3999                MRRETURN(MATCH_NOMATCH);
4000                }
4001            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if ((md->ctypes[*eptr++] & ctype_word) != 0)
4002              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4003              }
4004          break;          break;
4005    
4006          case OP_WORDCHAR:          case OP_WORDCHAR:
4007