/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 231 by ph10, Tue Sep 11 11:15:33 2007 UTC revision 602 by ph10, Wed May 25 08:29:03 2011 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2011 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 43  pattern matching using an NFA algorithm, Line 43  pattern matching using an NFA algorithm,
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
46  #include <config.h>  #include "config.h"
47  #endif  #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
# Line 71  defined PCRE_ERROR_xxx codes, which are Line 71  defined PCRE_ERROR_xxx codes, which are
71  /* Special internal returns from the match() function. Make them sufficiently  /* Special internal returns from the match() function. Make them sufficiently
72  negative to avoid the external error codes. */  negative to avoid the external error codes. */
73    
74  #define MATCH_COMMIT       (-999)  #define MATCH_ACCEPT       (-999)
75  #define MATCH_PRUNE        (-998)  #define MATCH_COMMIT       (-998)
76  #define MATCH_SKIP         (-997)  #define MATCH_PRUNE        (-997)
77  #define MATCH_THEN         (-996)  #define MATCH_SKIP         (-996)
78    #define MATCH_SKIP_ARG     (-995)
79    #define MATCH_THEN         (-994)
80    
81    /* This is a convenience macro for code that occurs many times. */
82    
83    #define MRRETURN(ra) \
84      { \
85      md->mark = markptr; \
86      RRETURN(ra); \
87      }
88    
89  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
90  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
# Line 89  static const char rep_max[] = { 0, 0, 0, Line 99  static const char rep_max[] = { 0, 0, 0,
99    
100    
101    
102  #ifdef DEBUG  #ifdef PCRE_DEBUG
103  /*************************************************  /*************************************************
104  *        Debugging function to print chars       *  *        Debugging function to print chars       *
105  *************************************************/  *************************************************/
# Line 122  while (length-- > 0) Line 132  while (length-- > 0)
132  *          Match a back-reference                *  *          Match a back-reference                *
133  *************************************************/  *************************************************/
134    
135  /* If a back reference hasn't been set, the length that is passed is greater  /* Normally, if a back reference hasn't been set, the length that is passed is
136  than the number of characters left in the string, so the match fails.  negative, so the match always fails. However, in JavaScript compatibility mode,
137    the length passed is zero. Note that in caseless UTF-8 mode, the number of
138    subject bytes matched may be different to the number of reference bytes.
139    
140  Arguments:  Arguments:
141    offset      index into the offset vector    offset      index into the offset vector
142    eptr        points into the subject    eptr        pointer into the subject
143    length      length to be matched    length      length of reference to be matched (number of bytes)
144    md          points to match data block    md          points to match data block
145    ims         the ims flags    caseless    TRUE if caseless
146    
147  Returns:      TRUE if matched  Returns:      < 0 if not matched, otherwise the number of subject bytes matched
148  */  */
149    
150  static BOOL  static int
151  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
152    unsigned long int ims)    BOOL caseless)
153  {  {
154  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR eptr_start = eptr;
155    register USPTR p = md->start_subject + md->offset_vector[offset];
156    
157  #ifdef DEBUG  #ifdef PCRE_DEBUG
158  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
159    printf("matching subject <null>");    printf("matching subject <null>");
160  else  else
# Line 154  pchars(p, length, FALSE, md); Line 167  pchars(p, length, FALSE, md);
167  printf("\n");  printf("\n");
168  #endif  #endif
169    
170  /* Always fail if not enough characters left */  /* Always fail if reference not set (and not JavaScript compatible). */
171    
172  if (length > md->end_subject - eptr) return FALSE;  if (length < 0) return -1;
173    
174  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175    properly if Unicode properties are supported. Otherwise, we can check only
176    ASCII characters. */
177    
178  if ((ims & PCRE_CASELESS) != 0)  if (caseless)
179    {    {
180    while (length-- > 0)  #ifdef SUPPORT_UTF8
181      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;  #ifdef SUPPORT_UCP
182      if (md->utf8)
183        {
184        /* Match characters up to the end of the reference. NOTE: the number of
185        bytes matched may differ, because there are some characters whose upper and
186        lower case versions code as different numbers of bytes. For example, U+023A
187        (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188        a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189        the latter. It is important, therefore, to check the length along the
190        reference, not along the subject (earlier code did this wrong). */
191    
192        USPTR endptr = p + length;
193        while (p < endptr)
194          {
195          int c, d;
196          if (eptr >= md->end_subject) return -1;
197          GETCHARINC(c, eptr);
198          GETCHARINC(d, p);
199          if (c != d && c != UCD_OTHERCASE(d)) return -1;
200          }
201        }
202      else
203    #endif
204    #endif
205    
206      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
207      is no UCP support. */
208        {
209        if (eptr + length > md->end_subject) return -1;
210        while (length-- > 0)
211          { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
212        }
213    }    }
214    
215    /* In the caseful case, we can just compare the bytes, whether or not we
216    are in UTF-8 mode. */
217    
218  else  else
219    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    {
220      if (eptr + length > md->end_subject) return -1;
221      while (length-- > 0) if (*p++ != *eptr++) return -1;
222      }
223    
224  return TRUE;  return eptr - eptr_start;
225  }  }
226    
227    
# Line 219  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 272  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
272         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
273         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
274         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
275         RM51,  RM52, RM53, RM54 };         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
276           RM61,  RM62 };
277    
278  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
279  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
280  actuall used in this definition. */  actually used in this definition. */
281    
282  #ifndef NO_RECURSE  #ifndef NO_RECURSE
283  #define REGISTER register  #define REGISTER register
284    
285  #ifdef DEBUG  #ifdef PCRE_DEBUG
286  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rw) \
287    { \    { \
288    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
289    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rdepth+1); \
290    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
291    }    }
292  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 241  actuall used in this definition. */ Line 295  actuall used in this definition. */
295    return ra; \    return ra; \
296    }    }
297  #else  #else
298  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rw) \
299    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rdepth+1)
300  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
301  #endif  #endif
302    
# Line 255  argument of match(), which never changes Line 309  argument of match(), which never changes
309    
310  #define REGISTER  #define REGISTER
311    
312  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\  #define RMATCH(ra,rb,rc,rd,re,rf,rw)\
313    {\    {\
314    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
315      if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
316    frame->Xwhere = rw; \    frame->Xwhere = rw; \
317    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
318    newframe->Xecode = rb;\    newframe->Xecode = rb;\
319    newframe->Xmstart = mstart;\    newframe->Xmstart = mstart;\
320      newframe->Xmarkptr = markptr;\
321    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
322    newframe->Xims = re;\    newframe->Xeptrb = re;\
323    newframe->Xeptrb = rf;\    newframe->Xflags = rf;\
   newframe->Xflags = rg;\  
324    newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
325    newframe->Xprevframe = frame;\    newframe->Xprevframe = frame;\
326    frame = newframe;\    frame = newframe;\
# Line 277  argument of match(), which never changes Line 332  argument of match(), which never changes
332    
333  #define RRETURN(ra)\  #define RRETURN(ra)\
334    {\    {\
335    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
336    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
337    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
338    if (frame != NULL)\    if (frame != NULL)\
339      {\      {\
340      rrc = ra;\      rrc = ra;\
# Line 296  typedef struct heapframe { Line 351  typedef struct heapframe {
351    
352    /* Function arguments that may change */    /* Function arguments that may change */
353    
354    const uschar *Xeptr;    USPTR Xeptr;
355    const uschar *Xecode;    const uschar *Xecode;
356    const uschar *Xmstart;    USPTR Xmstart;
357      USPTR Xmarkptr;
358    int Xoffset_top;    int Xoffset_top;
   long int Xims;  
359    eptrblock *Xeptrb;    eptrblock *Xeptrb;
360    int Xflags;    int Xflags;
361    unsigned int Xrdepth;    unsigned int Xrdepth;
362    
363    /* Function local variables */    /* Function local variables */
364    
365    const uschar *Xcallpat;    USPTR Xcallpat;
366    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
367    const uschar *Xdata;    USPTR Xcharptr;
368    const uschar *Xnext;  #endif
369    const uschar *Xpp;    USPTR Xdata;
370    const uschar *Xprev;    USPTR Xnext;
371    const uschar *Xsaved_eptr;    USPTR Xpp;
372      USPTR Xprev;
373      USPTR Xsaved_eptr;
374    
375    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
376    
# Line 321  typedef struct heapframe { Line 378  typedef struct heapframe {
378    BOOL Xcondition;    BOOL Xcondition;
379    BOOL Xprev_is_word;    BOOL Xprev_is_word;
380    
   unsigned long int Xoriginal_ims;  
   
381  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
382    int Xprop_type;    int Xprop_type;
383    int Xprop_value;    int Xprop_value;
# Line 334  typedef struct heapframe { Line 389  typedef struct heapframe {
389    uschar Xocchars[8];    uschar Xocchars[8];
390  #endif  #endif
391    
392      int Xcodelink;
393    int Xctype;    int Xctype;
394    unsigned int Xfc;    unsigned int Xfc;
395    int Xfi;    int Xfi;
# Line 369  typedef struct heapframe { Line 425  typedef struct heapframe {
425    
426  /* This function is called recursively in many circumstances. Whenever it  /* This function is called recursively in many circumstances. Whenever it
427  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
428  same response.  same response. */
429    
430    /* These macros pack up tests that are used for partial matching, and which
431    appears several times in the code. We set the "hit end" flag if the pointer is
432    at the end of the subject and also past the start of the subject (i.e.
433    something has been matched). For hard partial matching, we then return
434    immediately. The second one is used when we already know we are past the end of
435    the subject. */
436    
437    #define CHECK_PARTIAL()\
438      if (md->partial != 0 && eptr >= md->end_subject && \
439          eptr > md->start_used_ptr) \
440        { \
441        md->hitend = TRUE; \
442        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
443        }
444    
445    #define SCHECK_PARTIAL()\
446      if (md->partial != 0 && eptr > md->start_used_ptr) \
447        { \
448        md->hitend = TRUE; \
449        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
450        }
451    
452    
453  Performance note: It might be tempting to extract commonly used fields from the  /* Performance note: It might be tempting to extract commonly used fields from
454  md structure (e.g. utf8, end_subject) into individual variables to improve  the md structure (e.g. utf8, end_subject) into individual variables to improve
455  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
456  made performance worse.  made performance worse.
457    
# Line 381  Arguments: Line 460  Arguments:
460     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
461     mstart      pointer to the current match start position (can be modified     mstart      pointer to the current match start position (can be modified
462                   by encountering \K)                   by encountering \K)
463       markptr     pointer to the most recent MARK name, or NULL
464     offset_top  current top pointer     offset_top  current top pointer
465     md          pointer to "static" info for the match     md          pointer to "static" info for the match
    ims         current /i, /m, and /s options  
466     eptrb       pointer to chain of blocks containing eptr at start of     eptrb       pointer to chain of blocks containing eptr at start of
467                   brackets - for testing for empty matches                   brackets - for testing for empty matches
468     flags       can contain     flags       can contain
# Line 394  Arguments: Line 473  Arguments:
473    
474  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
475                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
476                   a negative MATCH_xxx value for PRUNE, SKIP, etc
477                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
478                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
479  */  */
480    
481  static int  static int
482  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
483    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb,
484    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
485  {  {
486  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
# Line 413  register unsigned int c;   /* Character Line 493  register unsigned int c;   /* Character
493  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
494    
495  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
496    BOOL caseless;
497    int condcode;
498    
499  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
500  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 420  heap storage. Set up the top-level frame Line 502  heap storage. Set up the top-level frame
502  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  heap whenever RMATCH() does a "recursion". See the macro definitions above. */
503    
504  #ifdef NO_RECURSE  #ifdef NO_RECURSE
505  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
506    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
507  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
508    
509  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 428  frame->Xprevframe = NULL;            /* Line 511  frame->Xprevframe = NULL;            /*
511  frame->Xeptr = eptr;  frame->Xeptr = eptr;
512  frame->Xecode = ecode;  frame->Xecode = ecode;
513  frame->Xmstart = mstart;  frame->Xmstart = mstart;
514    frame->Xmarkptr = markptr;
515  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
 frame->Xims = ims;  
516  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
517  frame->Xflags = flags;  frame->Xflags = flags;
518  frame->Xrdepth = rdepth;  frame->Xrdepth = rdepth;
# Line 443  HEAP_RECURSE: Line 526  HEAP_RECURSE:
526  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
527  #define ecode              frame->Xecode  #define ecode              frame->Xecode
528  #define mstart             frame->Xmstart  #define mstart             frame->Xmstart
529    #define markptr            frame->Xmarkptr
530  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
 #define ims                frame->Xims  
531  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
532  #define flags              frame->Xflags  #define flags              frame->Xflags
533  #define rdepth             frame->Xrdepth  #define rdepth             frame->Xrdepth
# Line 455  HEAP_RECURSE: Line 538  HEAP_RECURSE:
538  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
539  #endif  #endif
540  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
541    #define codelink           frame->Xcodelink
542  #define data               frame->Xdata  #define data               frame->Xdata
543  #define next               frame->Xnext  #define next               frame->Xnext
544  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 467  HEAP_RECURSE: Line 551  HEAP_RECURSE:
551  #define condition          frame->Xcondition  #define condition          frame->Xcondition
552  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
553    
 #define original_ims       frame->Xoriginal_ims  
   
554  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
555  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
556  #define prop_value         frame->Xprop_value  #define prop_value         frame->Xprop_value
# Line 522  BOOL cur_is_word;                  /* a Line 604  BOOL cur_is_word;                  /* a
604  BOOL condition;  BOOL condition;
605  BOOL prev_is_word;  BOOL prev_is_word;
606    
 unsigned long int original_ims;  
   
607  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
608  int prop_type;  int prop_type;
609  int prop_value;  int prop_value;
# Line 535  int oclength; Line 615  int oclength;
615  uschar occhars[8];  uschar occhars[8];
616  #endif  #endif
617    
618    int codelink;
619  int ctype;  int ctype;
620  int length;  int length;
621  int max;  int max;
# Line 568  TAIL_RECURSE: Line 649  TAIL_RECURSE:
649  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
650  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
651  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
652  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
653  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
654  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
655  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
# Line 585  haven't exceeded the recursive call limi Line 666  haven't exceeded the recursive call limi
666  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
667  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
668    
 original_ims = ims;    /* Save for resetting on ')' */  
   
669  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
670  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
671  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
# Line 610  for (;;) Line 689  for (;;)
689    minimize = possessive = FALSE;    minimize = possessive = FALSE;
690    op = *ecode;    op = *ecode;
691    
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > mstart)  
     md->hitend = TRUE;  
   
692    switch(op)    switch(op)
693      {      {
694        case OP_MARK:
695        markptr = ecode + 2;
696        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
697          eptrb, flags, RM55);
698    
699        /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
700        argument, and we must check whether that argument matches this MARK's
701        argument. It is passed back in md->start_match_ptr (an overloading of that
702        variable). If it does match, we reset that variable to the current subject
703        position and return MATCH_SKIP. Otherwise, pass back the return code
704        unaltered. */
705    
706        if (rrc == MATCH_SKIP_ARG &&
707            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
708          {
709          md->start_match_ptr = eptr;
710          RRETURN(MATCH_SKIP);
711          }
712    
713        if (md->mark == NULL) md->mark = markptr;
714        RRETURN(rrc);
715    
716      case OP_FAIL:      case OP_FAIL:
717      RRETURN(MATCH_NOMATCH);      MRRETURN(MATCH_NOMATCH);
718    
719        /* COMMIT overrides PRUNE, SKIP, and THEN */
720    
721        case OP_COMMIT:
722        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
723          eptrb, flags, RM52);
724        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
725            rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
726            rrc != MATCH_THEN)
727          RRETURN(rrc);
728        MRRETURN(MATCH_COMMIT);
729    
730        /* PRUNE overrides THEN */
731    
732      case OP_PRUNE:      case OP_PRUNE:
733      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
734        ims, eptrb, flags, RM51);        eptrb, flags, RM51);
735      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
736        MRRETURN(MATCH_PRUNE);
737    
738        case OP_PRUNE_ARG:
739        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
740          eptrb, flags, RM56);
741        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
742        md->mark = ecode + 2;
743      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
744    
745      case OP_COMMIT:      /* SKIP overrides PRUNE and THEN */
     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,  
       ims, eptrb, flags, RM52);  
     if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
     RRETURN(MATCH_COMMIT);  
746    
747      case OP_SKIP:      case OP_SKIP:
748      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
749        ims, eptrb, flags, RM53);        eptrb, flags, RM53);
750      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
751          RRETURN(rrc);
752      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
753      RRETURN(MATCH_SKIP);      MRRETURN(MATCH_SKIP);
754    
755        case OP_SKIP_ARG:
756        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
757          eptrb, flags, RM57);
758        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
759          RRETURN(rrc);
760    
761        /* Pass back the current skip name by overloading md->start_match_ptr and
762        returning the special MATCH_SKIP_ARG return code. This will either be
763        caught by a matching MARK, or get to the top, where it is treated the same
764        as PRUNE. */
765    
766        md->start_match_ptr = ecode + 2;
767        RRETURN(MATCH_SKIP_ARG);
768    
769        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
770        the alt that is at the start of the current branch. This makes it possible
771        to skip back past alternatives that precede the THEN within the current
772        branch. */
773    
774      case OP_THEN:      case OP_THEN:
775      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
776        ims, eptrb, flags, RM54);        eptrb, flags, RM54);
777        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
778        md->start_match_ptr = ecode - GET(ecode, 1);
779        MRRETURN(MATCH_THEN);
780    
781        case OP_THEN_ARG:
782        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
783          offset_top, md, eptrb, flags, RM58);
784      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
785        md->start_match_ptr = ecode - GET(ecode, 1);
786        md->mark = ecode + LINK_SIZE + 2;
787      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
788    
789      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
# Line 667  for (;;) Line 805  for (;;)
805      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
806      offset = number << 1;      offset = number << 1;
807    
808  #ifdef DEBUG  #ifdef PCRE_DEBUG
809      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
810      printf("subject=");      printf("subject=");
811      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
# Line 682  for (;;) Line 820  for (;;)
820        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
821    
822        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
823        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
824            (int)(eptr - md->start_subject);
825    
826        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
827        do        do
828          {          {
829          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
830            ims, eptrb, flags, RM1);            eptrb, flags, RM1);
831          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
832                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
833              RRETURN(rrc);
834          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
835          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
836          }          }
# Line 701  for (;;) Line 842  for (;;)
842        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
843        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
844    
845          if (rrc != MATCH_THEN) md->mark = markptr;
846        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
847        }        }
848    
# Line 738  for (;;) Line 880  for (;;)
880    
881          /* Possibly empty group; can't use tail recursion. */          /* Possibly empty group; can't use tail recursion. */
882    
883          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
884            eptrb, flags, RM48);            flags, RM48);
885            if (rrc == MATCH_NOMATCH) md->mark = markptr;
886          RRETURN(rrc);          RRETURN(rrc);
887          }          }
888    
889        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
890        otherwise return. */        otherwise return. */
891    
892        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
893          eptrb, flags, RM2);          flags, RM2);
894        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
895              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
896            RRETURN(rrc);
897        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
898        }        }
899      /* Control never reaches here. */      /* Control never reaches here. */
# Line 761  for (;;) Line 906  for (;;)
906    
907      case OP_COND:      case OP_COND:
908      case OP_SCOND:      case OP_SCOND:
909      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
910    
911        /* Because of the way auto-callout works during compile, a callout item is
912        inserted between OP_COND and an assertion condition. */
913    
914        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
915        {        {
916        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        if (pcre_callout != NULL)
917        condition = md->recursive != NULL &&          {
918          (offset == RREF_ANY || offset == md->recursive->group_num);          pcre_callout_block cb;
919        ecode += condition? 3 : GET(ecode, 1);          cb.version          = 1;   /* Version 1 of the callout block */
920            cb.callout_number   = ecode[LINK_SIZE+2];
921            cb.offset_vector    = md->offset_vector;
922            cb.subject          = (PCRE_SPTR)md->start_subject;
923            cb.subject_length   = (int)(md->end_subject - md->start_subject);
924            cb.start_match      = (int)(mstart - md->start_subject);
925            cb.current_position = (int)(eptr - md->start_subject);
926            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
927            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
928            cb.capture_top      = offset_top/2;
929            cb.capture_last     = md->capture_last;
930            cb.callout_data     = md->callout_data;
931            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
932            if (rrc < 0) RRETURN(rrc);
933            }
934          ecode += _pcre_OP_lengths[OP_CALLOUT];
935          }
936    
937        condcode = ecode[LINK_SIZE+1];
938    
939        /* Now see what the actual condition is */
940    
941        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
942          {
943          if (md->recursive == NULL)                /* Not recursing => FALSE */
944            {
945            condition = FALSE;
946            ecode += GET(ecode, 1);
947            }
948          else
949            {
950            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
951            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
952    
953            /* If the test is for recursion into a specific subpattern, and it is
954            false, but the test was set up by name, scan the table to see if the
955            name refers to any other numbers, and test them. The condition is true
956            if any one is set. */
957    
958            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
959              {
960              uschar *slotA = md->name_table;
961              for (i = 0; i < md->name_count; i++)
962                {
963                if (GET2(slotA, 0) == recno) break;
964                slotA += md->name_entry_size;
965                }
966    
967              /* Found a name for the number - there can be only one; duplicate
968              names for different numbers are allowed, but not vice versa. First
969              scan down for duplicates. */
970    
971              if (i < md->name_count)
972                {
973                uschar *slotB = slotA;
974                while (slotB > md->name_table)
975                  {
976                  slotB -= md->name_entry_size;
977                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
978                    {
979                    condition = GET2(slotB, 0) == md->recursive->group_num;
980                    if (condition) break;
981                    }
982                  else break;
983                  }
984    
985                /* Scan up for duplicates */
986    
987                if (!condition)
988                  {
989                  slotB = slotA;
990                  for (i++; i < md->name_count; i++)
991                    {
992                    slotB += md->name_entry_size;
993                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
994                      {
995                      condition = GET2(slotB, 0) == md->recursive->group_num;
996                      if (condition) break;
997                      }
998                    else break;
999                    }
1000                  }
1001                }
1002              }
1003    
1004            /* Chose branch according to the condition */
1005    
1006            ecode += condition? 3 : GET(ecode, 1);
1007            }
1008        }        }
1009    
1010      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1011        {        {
1012        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
1013        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
1014    
1015          /* If the numbered capture is unset, but the reference was by name,
1016          scan the table to see if the name refers to any other numbers, and test
1017          them. The condition is true if any one is set. This is tediously similar
1018          to the code above, but not close enough to try to amalgamate. */
1019    
1020          if (!condition && condcode == OP_NCREF)
1021            {
1022            int refno = offset >> 1;
1023            uschar *slotA = md->name_table;
1024    
1025            for (i = 0; i < md->name_count; i++)
1026              {
1027              if (GET2(slotA, 0) == refno) break;
1028              slotA += md->name_entry_size;
1029              }
1030    
1031            /* Found a name for the number - there can be only one; duplicate names
1032            for different numbers are allowed, but not vice versa. First scan down
1033            for duplicates. */
1034    
1035            if (i < md->name_count)
1036              {
1037              uschar *slotB = slotA;
1038              while (slotB > md->name_table)
1039                {
1040                slotB -= md->name_entry_size;
1041                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1042                  {
1043                  offset = GET2(slotB, 0) << 1;
1044                  condition = offset < offset_top &&
1045                    md->offset_vector[offset] >= 0;
1046                  if (condition) break;
1047                  }
1048                else break;
1049                }
1050    
1051              /* Scan up for duplicates */
1052    
1053              if (!condition)
1054                {
1055                slotB = slotA;
1056                for (i++; i < md->name_count; i++)
1057                  {
1058                  slotB += md->name_entry_size;
1059                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1060                    {
1061                    offset = GET2(slotB, 0) << 1;
1062                    condition = offset < offset_top &&
1063                      md->offset_vector[offset] >= 0;
1064                    if (condition) break;
1065                    }
1066                  else break;
1067                  }
1068                }
1069              }
1070            }
1071    
1072          /* Chose branch according to the condition */
1073    
1074        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
1075        }        }
1076    
1077      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
1078        {        {
1079        condition = FALSE;        condition = FALSE;
1080        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 788  for (;;) Line 1086  for (;;)
1086    
1087      else      else
1088        {        {
1089        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL,
1090            match_condassert, RM3);            match_condassert, RM3);
1091        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1092          {          {
# Line 796  for (;;) Line 1094  for (;;)
1094          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1095          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1096          }          }
1097        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)        else if (rrc != MATCH_NOMATCH &&
1098                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1099          {          {
1100          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1101          }          }
1102        else        else
1103          {          {
1104          condition = FALSE;          condition = FALSE;
1105          ecode += GET(ecode, 1);          ecode += codelink;
1106          }          }
1107        }        }
1108    
# Line 817  for (;;) Line 1116  for (;;)
1116        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1117        if (op == OP_SCOND)        /* Possibly empty group */        if (op == OP_SCOND)        /* Possibly empty group */
1118          {          {
1119          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);          RMATCH(eptr, ecode, offset_top, md, eptrb, match_cbegroup, RM49);
1120          RRETURN(rrc);          RRETURN(rrc);
1121          }          }
1122        else                       /* Group must match something */        else                       /* Group must match something */
# Line 826  for (;;) Line 1125  for (;;)
1125          goto TAIL_RECURSE;          goto TAIL_RECURSE;
1126          }          }
1127        }        }
1128      else                         /* Condition false & no 2nd alternative */      else                         /* Condition false & no alternative */
1129        {        {
1130        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1131        }        }
1132      break;      break;
1133    
1134    
1135        /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1136        to close any currently open capturing brackets. */
1137    
1138        case OP_CLOSE:
1139        number = GET2(ecode, 1);
1140        offset = number << 1;
1141    
1142    #ifdef PCRE_DEBUG
1143          printf("end bracket %d at *ACCEPT", number);
1144          printf("\n");
1145    #endif
1146    
1147        md->capture_last = number;
1148        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1149          {
1150          md->offset_vector[offset] =
1151            md->offset_vector[md->offset_end - number];
1152          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1153          if (offset_top <= offset) offset_top = offset + 2;
1154          }
1155        ecode += 3;
1156        break;
1157    
1158    
1159      /* End of the pattern, either real or forced. If we are in a top-level      /* End of the pattern, either real or forced. If we are in a top-level
1160      recursion, we should restore the offsets appropriately and continue from      recursion, we should restore the offsets appropriately and continue from
1161      after the call. */      after the call. */
# Line 846  for (;;) Line 1169  for (;;)
1169        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1170        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1171          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1172        mstart = rec->save_start;        offset_top = rec->save_offset_top;
       ims = original_ims;  
1173        ecode = rec->after_call;        ecode = rec->after_call;
1174        break;        break;
1175        }        }
1176    
1177      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1178      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1179        the subject. In both cases, backtracking will then try other alternatives,
1180        if any. */
1181    
1182        if (eptr == mstart &&
1183            (md->notempty ||
1184              (md->notempty_atstart &&
1185                mstart == md->start_subject + md->start_offset)))
1186          MRRETURN(MATCH_NOMATCH);
1187    
1188        /* Otherwise, we have a match. */
1189    
     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);  
1190      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
1191      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
1192      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
     RRETURN(MATCH_MATCH);  
1193    
1194      /* Change option settings */      /* For some reason, the macros don't work properly if an expression is
1195        given as the argument to MRRETURN when the heap is in use. */
1196    
1197      case OP_OPT:      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1198      ims = ecode[1];      MRRETURN(rrc);
     ecode += 2;  
     DPRINTF(("ims set to %02lx\n", ims));  
     break;  
1199    
1200      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
1201      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
# Line 879  for (;;) Line 1207  for (;;)
1207      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1208      do      do
1209        {        {
1210        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, 0,
1211          RM4);          RM4);
1212        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1213        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);          {
1214            mstart = md->start_match_ptr;   /* In case \K reset it */
1215            break;
1216            }
1217          if (rrc != MATCH_NOMATCH &&
1218              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1219            RRETURN(rrc);
1220        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1221        }        }
1222      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1223      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1224    
1225      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1226    
# Line 900  for (;;) Line 1234  for (;;)
1234      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1235      continue;      continue;
1236    
1237      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1238        PRUNE, or COMMIT means we must assume failure without checking subsequent
1239        branches. */
1240    
1241      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1242      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1243      do      do
1244        {        {
1245        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, 0,
1246          RM5);          RM5);
1247        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1248        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1249            {
1250            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1251            break;
1252            }
1253          if (rrc != MATCH_NOMATCH &&
1254              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1255            RRETURN(rrc);
1256        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1257        }        }
1258      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 932  for (;;) Line 1275  for (;;)
1275        while (i-- > 0)        while (i-- > 0)
1276          {          {
1277          eptr--;          eptr--;
1278          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1279          BACKCHAR(eptr);          BACKCHAR(eptr);
1280          }          }
1281        }        }
# Line 943  for (;;) Line 1286  for (;;)
1286    
1287        {        {
1288        eptr -= GET(ecode, 1);        eptr -= GET(ecode, 1);
1289        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1290        }        }
1291    
1292      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1293    
1294        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1295      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1296      break;      break;
1297    
# Line 963  for (;;) Line 1307  for (;;)
1307        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1308        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1309        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1310        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1311        cb.start_match      = mstart - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1312        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1313        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1314        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1315        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1316        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1317        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1318        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1319        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1320        }        }
1321      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 1026  for (;;) Line 1370  for (;;)
1370    
1371        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1372              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1373        new_recursive.save_start = mstart;        new_recursive.save_offset_top = offset_top;
       mstart = eptr;  
1374    
1375        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1376        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 1037  for (;;) Line 1380  for (;;)
1380        do        do
1381          {          {
1382          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1383            md, ims, eptrb, flags, RM6);            md, eptrb, flags, RM6);
1384          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1385            {            {
1386            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1387            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1388            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1389              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1390            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1391            }            }
1392          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH &&
1393                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1394            {            {
1395            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1396              if (new_recursive.offset_save != stacksave)
1397                (pcre_free)(new_recursive.offset_save);
1398            RRETURN(rrc);            RRETURN(rrc);
1399            }            }
1400    
# Line 1063  for (;;) Line 1409  for (;;)
1409        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1410        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1411          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1412        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1413        }        }
1414      /* Control never reaches here */      /* Control never reaches here */
1415    
# Line 1072  for (;;) Line 1418  for (;;)
1418      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1419      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1420      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1421      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1422        the start-of-match value in case it was changed by \K. */
1423    
1424      case OP_ONCE:      case OP_ONCE:
1425      prev = ecode;      prev = ecode;
# Line 1080  for (;;) Line 1427  for (;;)
1427    
1428      do      do
1429        {        {
1430        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, 0, RM7);
1431        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1432        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);          {
1433            mstart = md->start_match_ptr;
1434            break;
1435            }
1436          if (rrc != MATCH_NOMATCH &&
1437              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1438            RRETURN(rrc);
1439        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1440        }        }
1441      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1113  for (;;) Line 1466  for (;;)
1466    
1467      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1468      preceding bracket, in the appropriate order. The second "call" of match()      preceding bracket, in the appropriate order. The second "call" of match()
1469      uses tail recursion, to avoid using another stack frame. We need to reset      uses tail recursion, to avoid using another stack frame. */
     any options that changed within the bracket before re-running it, so  
     check the next opcode. */  
   
     if (ecode[1+LINK_SIZE] == OP_OPT)  
       {  
       ims = (ims & ~PCRE_IMS) | ecode[4];  
       DPRINTF(("ims set to %02lx at group repeat\n", ims));  
       }  
1470    
1471      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1472        {        {
1473        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, 0, RM8);
1474        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1475        ecode = prev;        ecode = prev;
1476        flags = 0;        flags = 0;
# Line 1133  for (;;) Line 1478  for (;;)
1478        }        }
1479      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1480        {        {
1481        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);        RMATCH(eptr, prev, offset_top, md, eptrb, match_cbegroup, RM9);
1482        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1483        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1484        flags = 0;        flags = 0;
# Line 1148  for (;;) Line 1493  for (;;)
1493      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1494      break;      break;
1495    
1496      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1497      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1498      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1499      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1500      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1501    
1502      case OP_BRAZERO:      case OP_BRAZERO:
1503        {        {
1504        next = ecode+1;        next = ecode+1;
1505        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);        RMATCH(eptr, next, offset_top, md, eptrb, 0, RM10);
1506        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1507        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1508        ecode = next + 1 + LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
# Line 1168  for (;;) Line 1513  for (;;)
1513        {        {
1514        next = ecode+1;        next = ecode+1;
1515        do next += GET(next, 1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1516        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, 0, RM11);
1517        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1518        ecode++;        ecode++;
1519        }        }
1520      break;      break;
1521    
1522        case OP_SKIPZERO:
1523          {
1524          next = ecode+1;
1525          do next += GET(next,1); while (*next == OP_ALT);
1526          ecode = next + 1 + LINK_SIZE;
1527          }
1528        break;
1529    
1530      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1531    
1532      case OP_KET:      case OP_KET:
# Line 1192  for (;;) Line 1545  for (;;)
1545        }        }
1546      else saved_eptr = NULL;      else saved_eptr = NULL;
1547    
1548      /* If we are at the end of an assertion group, stop matching and return      /* If we are at the end of an assertion group or an atomic group, stop
1549      MATCH_MATCH, but record the current high water mark for use by positive      matching and return MATCH_MATCH, but record the current high water mark for
1550      assertions. Do this also for the "once" (atomic) groups. */      use by positive assertions. We also need to record the match start in case
1551        it was changed by \K. */
1552    
1553      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1554          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1202  for (;;) Line 1556  for (;;)
1556        {        {
1557        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1558        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1559        RRETURN(MATCH_MATCH);        md->start_match_ptr = mstart;
1560          MRRETURN(MATCH_MATCH);
1561        }        }
1562    
1563      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
# Line 1216  for (;;) Line 1571  for (;;)
1571        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1572        offset = number << 1;        offset = number << 1;
1573    
1574  #ifdef DEBUG  #ifdef PCRE_DEBUG
1575        printf("end bracket %d", number);        printf("end bracket %d", number);
1576        printf("\n");        printf("\n");
1577  #endif  #endif
# Line 1226  for (;;) Line 1581  for (;;)
1581          {          {
1582          md->offset_vector[offset] =          md->offset_vector[offset] =
1583            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1584          md->offset_vector[offset+1] = eptr - md->start_subject;          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1585          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1586          }          }
1587    
# Line 1238  for (;;) Line 1593  for (;;)
1593          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1594          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1595          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
         mstart = rec->save_start;  
1596          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1597            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1598            offset_top = rec->save_offset_top;
1599          ecode = rec->after_call;          ecode = rec->after_call;
         ims = original_ims;  
1600          break;          break;
1601          }          }
1602        }        }
1603    
     /* For both capturing and non-capturing groups, reset the value of the ims  
     flags, in case they got changed during the group. */  
   
     ims = original_ims;  
     DPRINTF(("ims reset to %02lx\n", ims));  
   
1604      /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1605      happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1606      This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
# Line 1274  for (;;) Line 1622  for (;;)
1622    
1623      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1624        {        {
1625        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, 0, RM12);
1626        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1627        if (flags != 0)    /* Could match an empty string */        if (flags != 0)    /* Could match an empty string */
1628          {          {
1629          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);          RMATCH(eptr, prev, offset_top, md, eptrb, flags, RM50);
1630          RRETURN(rrc);          RRETURN(rrc);
1631          }          }
1632        ecode = prev;        ecode = prev;
# Line 1286  for (;;) Line 1634  for (;;)
1634        }        }
1635      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1636        {        {
1637        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);        RMATCH(eptr, prev, offset_top, md, eptrb, flags, RM13);
1638        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1639        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1640        flags = 0;        flags = 0;
# Line 1294  for (;;) Line 1642  for (;;)
1642        }        }
1643      /* Control never gets here */      /* Control never gets here */
1644    
1645      /* Start of subject unless notbol, or after internal newline if multiline */      /* Not multiline mode: start of subject assertion, unless notbol. */
1646    
1647      case OP_CIRC:      case OP_CIRC:
1648      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1649      if ((ims & PCRE_MULTILINE) != 0)  
       {  
       if (eptr != md->start_subject &&  
           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))  
         RRETURN(MATCH_NOMATCH);  
       ecode++;  
       break;  
       }  
     /* ... else fall through */  
   
1650      /* Start of subject assertion */      /* Start of subject assertion */
1651    
1652      case OP_SOD:      case OP_SOD:
1653      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1654        ecode++;
1655        break;
1656    
1657        /* Multiline mode: start of subject unless notbol, or after any newline. */
1658    
1659        case OP_CIRCM:
1660        if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1661        if (eptr != md->start_subject &&
1662            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1663          MRRETURN(MATCH_NOMATCH);
1664      ecode++;      ecode++;
1665      break;      break;
1666    
1667      /* Start of match assertion */      /* Start of match assertion */
1668    
1669      case OP_SOM:      case OP_SOM:
1670      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1671      ecode++;      ecode++;
1672      break;      break;
1673    
# Line 1329  for (;;) Line 1678  for (;;)
1678      ecode++;      ecode++;
1679      break;      break;
1680    
1681      /* Assert before internal newline if multiline, or before a terminating      /* Multiline mode: assert before any newline, or before end of subject
1682      newline unless endonly is set, else end of subject unless noteol is set. */      unless noteol is set. */
1683    
1684      case OP_DOLL:      case OP_DOLLM:
1685      if ((ims & PCRE_MULTILINE) != 0)      if (eptr < md->end_subject)
1686        {        { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
       if (eptr < md->end_subject)  
         { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }  
       else  
         { if (md->noteol) RRETURN(MATCH_NOMATCH); }  
       ecode++;  
       break;  
       }  
1687      else      else
1688        {        {
1689        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1690        if (!md->endonly)        SCHECK_PARTIAL();
         {  
         if (eptr != md->end_subject &&  
             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))  
           RRETURN(MATCH_NOMATCH);  
         ecode++;  
         break;  
         }  
1691        }        }
1692        ecode++;
1693        break;
1694    
1695        /* Not multiline mode: assert before a terminating newline or before end of
1696        subject unless noteol is set. */
1697    
1698        case OP_DOLL:
1699        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1700        if (!md->endonly) goto ASSERT_NL_OR_EOS;
1701    
1702      /* ... else fall through for endonly */      /* ... else fall through for endonly */
1703    
1704      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1705    
1706      case OP_EOD:      case OP_EOD:
1707      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1708        SCHECK_PARTIAL();
1709      ecode++;      ecode++;
1710      break;      break;
1711    
1712      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1713    
1714      case OP_EODN:      case OP_EODN:
1715      if (eptr != md->end_subject &&      ASSERT_NL_OR_EOS:
1716        if (eptr < md->end_subject &&
1717          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1718        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1719    
1720        /* Either at end of string or \n before end. */
1721    
1722        SCHECK_PARTIAL();
1723      ecode++;      ecode++;
1724      break;      break;
1725    
# Line 1380  for (;;) Line 1731  for (;;)
1731    
1732        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1733        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1734        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1735          partial matching. */
1736    
1737  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1738        if (utf8)        if (utf8)
1739          {          {
1740            /* Get status of previous character */
1741    
1742          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1743            {            {
1744            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1745            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1746              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1747            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1748    #ifdef SUPPORT_UCP
1749              if (md->use_ucp)
1750                {
1751                if (c == '_') prev_is_word = TRUE; else
1752                  {
1753                  int cat = UCD_CATEGORY(c);
1754                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1755                  }
1756                }
1757              else
1758    #endif
1759            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1760            }            }
1761          if (eptr >= md->end_subject) cur_is_word = FALSE; else  
1762            /* Get status of next character */
1763    
1764            if (eptr >= md->end_subject)
1765              {
1766              SCHECK_PARTIAL();
1767              cur_is_word = FALSE;
1768              }
1769            else
1770            {            {
1771            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1772    #ifdef SUPPORT_UCP
1773              if (md->use_ucp)
1774                {
1775                if (c == '_') cur_is_word = TRUE; else
1776                  {
1777                  int cat = UCD_CATEGORY(c);
1778                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1779                  }
1780                }
1781              else
1782    #endif
1783            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1784            }            }
1785          }          }
1786        else        else
1787  #endif  #endif
1788    
1789        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1790          consistency with the behaviour of \w we do use it in this case. */
1791    
1792          {          {
1793          prev_is_word = (eptr != md->start_subject) &&          /* Get status of previous character */
1794            ((md->ctypes[eptr[-1]] & ctype_word) != 0);  
1795          cur_is_word = (eptr < md->end_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1796            ((md->ctypes[*eptr] & ctype_word) != 0);            {
1797              if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1798    #ifdef SUPPORT_UCP
1799              if (md->use_ucp)
1800                {
1801                c = eptr[-1];
1802                if (c == '_') prev_is_word = TRUE; else
1803                  {
1804                  int cat = UCD_CATEGORY(c);
1805                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1806                  }
1807                }
1808              else
1809    #endif
1810              prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1811              }
1812    
1813            /* Get status of next character */
1814    
1815            if (eptr >= md->end_subject)
1816              {
1817              SCHECK_PARTIAL();
1818              cur_is_word = FALSE;
1819              }
1820            else
1821    #ifdef SUPPORT_UCP
1822            if (md->use_ucp)
1823              {
1824              c = *eptr;
1825              if (c == '_') cur_is_word = TRUE; else
1826                {
1827                int cat = UCD_CATEGORY(c);
1828                cur_is_word = (cat == ucp_L || cat == ucp_N);
1829                }
1830              }
1831            else
1832    #endif
1833            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1834          }          }
1835    
1836        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
1837    
1838        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
1839             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1840          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1841        }        }
1842      break;      break;
1843    
1844      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1845    
1846      case OP_ANY:      case OP_ANY:
1847      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1848        /* Fall through */
1849    
1850        case OP_ALLANY:
1851        if (eptr++ >= md->end_subject)
1852        {        {
1853        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);        SCHECK_PARTIAL();
1854          MRRETURN(MATCH_NOMATCH);
1855        }        }
1856      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1857      ecode++;      ecode++;
1858      break;      break;
1859    
# Line 1435  for (;;) Line 1861  for (;;)
1861      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1862    
1863      case OP_ANYBYTE:      case OP_ANYBYTE:
1864      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1865          {
1866          SCHECK_PARTIAL();
1867          MRRETURN(MATCH_NOMATCH);
1868          }
1869      ecode++;      ecode++;
1870      break;      break;
1871    
1872      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1873      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1874          {
1875          SCHECK_PARTIAL();
1876          MRRETURN(MATCH_NOMATCH);
1877          }
1878      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1879      if (      if (
1880  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1448  for (;;) Line 1882  for (;;)
1882  #endif  #endif
1883         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1884         )         )
1885        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1886      ecode++;      ecode++;
1887      break;      break;
1888    
1889      case OP_DIGIT:      case OP_DIGIT:
1890      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1891          {
1892          SCHECK_PARTIAL();
1893          MRRETURN(MATCH_NOMATCH);
1894          }
1895      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1896      if (      if (
1897  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1461  for (;;) Line 1899  for (;;)
1899  #endif  #endif
1900         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1901         )         )
1902        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1903      ecode++;      ecode++;
1904      break;      break;
1905    
1906      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1907      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1908          {
1909          SCHECK_PARTIAL();
1910          MRRETURN(MATCH_NOMATCH);
1911          }
1912      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1913      if (      if (
1914  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1474  for (;;) Line 1916  for (;;)
1916  #endif  #endif
1917         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1918         )         )
1919        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1920      ecode++;      ecode++;
1921      break;      break;
1922    
1923      case OP_WHITESPACE:      case OP_WHITESPACE:
1924      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1925          {
1926          SCHECK_PARTIAL();
1927          MRRETURN(MATCH_NOMATCH);
1928          }
1929      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1930      if (      if (
1931  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1487  for (;;) Line 1933  for (;;)
1933  #endif  #endif
1934         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1935         )         )
1936        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1937      ecode++;      ecode++;
1938      break;      break;
1939    
1940      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1941      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1942          {
1943          SCHECK_PARTIAL();
1944          MRRETURN(MATCH_NOMATCH);
1945          }
1946      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1947      if (      if (
1948  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1500  for (;;) Line 1950  for (;;)
1950  #endif  #endif
1951         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1952         )         )
1953        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1954      ecode++;      ecode++;
1955      break;      break;
1956    
1957      case OP_WORDCHAR:      case OP_WORDCHAR:
1958      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1959          {
1960          SCHECK_PARTIAL();
1961          MRRETURN(MATCH_NOMATCH);
1962          }
1963      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1964      if (      if (
1965  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1513  for (;;) Line 1967  for (;;)
1967  #endif  #endif
1968         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1969         )         )
1970        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1971      ecode++;      ecode++;
1972      break;      break;
1973    
1974      case OP_ANYNL:      case OP_ANYNL:
1975      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1976          {
1977          SCHECK_PARTIAL();
1978          MRRETURN(MATCH_NOMATCH);
1979          }
1980      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1981      switch(c)      switch(c)
1982        {        {
1983        default: RRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
1984    
1985        case 0x000d:        case 0x000d:
1986        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1987        break;        break;
# Line 1535  for (;;) Line 1994  for (;;)
1994        case 0x0085:        case 0x0085:
1995        case 0x2028:        case 0x2028:
1996        case 0x2029:        case 0x2029:
1997        if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);        if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1998        break;        break;
1999        }        }
2000      ecode++;      ecode++;
2001      break;      break;
2002    
2003      case OP_NOT_HSPACE:      case OP_NOT_HSPACE:
2004      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2005          {
2006          SCHECK_PARTIAL();
2007          MRRETURN(MATCH_NOMATCH);
2008          }
2009      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2010      switch(c)      switch(c)
2011        {        {
# Line 1566  for (;;) Line 2029  for (;;)
2029        case 0x202f:    /* NARROW NO-BREAK SPACE */        case 0x202f:    /* NARROW NO-BREAK SPACE */
2030        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2031        case 0x3000:    /* IDEOGRAPHIC SPACE */        case 0x3000:    /* IDEOGRAPHIC SPACE */
2032        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2033        }        }
2034      ecode++;      ecode++;
2035      break;      break;
2036    
2037      case OP_HSPACE:      case OP_HSPACE:
2038      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2039          {
2040          SCHECK_PARTIAL();
2041          MRRETURN(MATCH_NOMATCH);
2042          }
2043      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2044      switch(c)      switch(c)
2045        {        {
2046        default: RRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
2047        case 0x09:      /* HT */        case 0x09:      /* HT */
2048        case 0x20:      /* SPACE */        case 0x20:      /* SPACE */
2049        case 0xa0:      /* NBSP */        case 0xa0:      /* NBSP */
# Line 1602  for (;;) Line 2069  for (;;)
2069      break;      break;
2070    
2071      case OP_NOT_VSPACE:      case OP_NOT_VSPACE:
2072      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2073          {
2074          SCHECK_PARTIAL();
2075          MRRETURN(MATCH_NOMATCH);
2076          }
2077      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2078      switch(c)      switch(c)
2079        {        {
# Line 1614  for (;;) Line 2085  for (;;)
2085        case 0x85:      /* NEL */        case 0x85:      /* NEL */
2086        case 0x2028:    /* LINE SEPARATOR */        case 0x2028:    /* LINE SEPARATOR */
2087        case 0x2029:    /* PARAGRAPH SEPARATOR */        case 0x2029:    /* PARAGRAPH SEPARATOR */
2088        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2089        }        }
2090      ecode++;      ecode++;
2091      break;      break;
2092    
2093      case OP_VSPACE:      case OP_VSPACE:
2094      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2095          {
2096          SCHECK_PARTIAL();
2097          MRRETURN(MATCH_NOMATCH);
2098          }
2099      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2100      switch(c)      switch(c)
2101        {        {
2102        default: RRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
2103        case 0x0a:      /* LF */        case 0x0a:      /* LF */
2104        case 0x0b:      /* VT */        case 0x0b:      /* VT */
2105        case 0x0c:      /* FF */        case 0x0c:      /* FF */
# Line 1643  for (;;) Line 2118  for (;;)
2118    
2119      case OP_PROP:      case OP_PROP:
2120      case OP_NOTPROP:      case OP_NOTPROP:
2121      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2122          {
2123          SCHECK_PARTIAL();
2124          MRRETURN(MATCH_NOMATCH);
2125          }
2126      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2127        {        {
2128        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
2129    
2130        switch(ecode[1])        switch(ecode[1])
2131          {          {
2132          case PT_ANY:          case PT_ANY:
2133          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2134          break;          break;
2135    
2136          case PT_LAMP:          case PT_LAMP:
2137          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2138               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2139               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2140            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2141           break;          break;
2142    
2143          case PT_GC:          case PT_GC:
2144          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2145            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2146          break;          break;
2147    
2148          case PT_PC:          case PT_PC:
2149          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2150            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2151          break;          break;
2152    
2153          case PT_SC:          case PT_SC:
2154          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2155            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2156            break;
2157    
2158            /* These are specials */
2159    
2160            case PT_ALNUM:
2161            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2162                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2163              MRRETURN(MATCH_NOMATCH);
2164            break;
2165    
2166            case PT_SPACE:    /* Perl space */
2167            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2168                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2169                   == (op == OP_NOTPROP))
2170              MRRETURN(MATCH_NOMATCH);
2171            break;
2172    
2173            case PT_PXSPACE:  /* POSIX space */
2174            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2175                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2176                 c == CHAR_FF || c == CHAR_CR)
2177                   == (op == OP_NOTPROP))
2178              MRRETURN(MATCH_NOMATCH);
2179            break;
2180    
2181            case PT_WORD:
2182            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2183                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2184                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2185              MRRETURN(MATCH_NOMATCH);
2186          break;          break;
2187    
2188            /* This should never occur */
2189    
2190          default:          default:
2191          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
2192          }          }
# Line 1689  for (;;) Line 2199  for (;;)
2199      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2200    
2201      case OP_EXTUNI:      case OP_EXTUNI:
2202      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2203          {
2204          SCHECK_PARTIAL();
2205          MRRETURN(MATCH_NOMATCH);
2206          }
2207      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2208        {        {
2209        int chartype, script;        int category = UCD_CATEGORY(c);
2210        int category = _pcre_ucp_findprop(c, &chartype, &script);        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2211        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2212          {          {
2213          int len = 1;          int len = 1;
# Line 1702  for (;;) Line 2215  for (;;)
2215            {            {
2216            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2217            }            }
2218          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2219          if (category != ucp_M) break;          if (category != ucp_M) break;
2220          eptr += len;          eptr += len;
2221          }          }
# Line 1721  for (;;) Line 2234  for (;;)
2234      loops). */      loops). */
2235    
2236      case OP_REF:      case OP_REF:
2237        {      case OP_REFI:
2238        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      caseless = op == OP_REFI;
2239        ecode += 3;                                 /* Advance past item */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2240        ecode += 3;
2241    
2242        /* If the reference is unset, set the length to be longer than the amount      /* If the reference is unset, there are two possibilities:
       of subject left; this ensures that every attempt at a match fails. We  
       can't just fail here, because of the possibility of quantifiers with zero  
       minima. */  
   
       length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
         md->end_subject - eptr + 1 :  
         md->offset_vector[offset+1] - md->offset_vector[offset];  
2243    
2244        /* Set up for repetition, or handle the non-repeated case */      (a) In the default, Perl-compatible state, set the length negative;
2245        this ensures that every attempt at a match fails. We can't just fail
2246        here, because of the possibility of quantifiers with zero minima.
2247    
2248        switch (*ecode)      (b) If the JavaScript compatibility flag is set, set the length to zero
2249          {      so that the back reference matches an empty string.
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         c = *ecode++ - OP_CRSTAR;  
         minimize = (c & 1) != 0;  
         min = rep_min[c];                 /* Pick up values from tables; */  
         max = rep_max[c];                 /* zero for max => infinity */  
         if (max == 0) max = INT_MAX;  
         break;  
2250    
2251          case OP_CRRANGE:      Otherwise, set the length to the length of what was matched by the
2252          case OP_CRMINRANGE:      referenced subpattern. */
         minimize = (*ecode == OP_CRMINRANGE);  
         min = GET2(ecode, 1);  
         max = GET2(ecode, 3);  
         if (max == 0) max = INT_MAX;  
         ecode += 5;  
         break;  
2253    
2254          default:               /* No repeat follows */      if (offset >= offset_top || md->offset_vector[offset] < 0)
2255          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);        length = (md->jscript_compat)? 0 : -1;
2256          eptr += length;      else
2257          continue;              /* With the main loop */        length = md->offset_vector[offset+1] - md->offset_vector[offset];
2258    
2259        /* Set up for repetition, or handle the non-repeated case */
2260    
2261        switch (*ecode)
2262          {
2263          case OP_CRSTAR:
2264          case OP_CRMINSTAR:
2265          case OP_CRPLUS:
2266          case OP_CRMINPLUS:
2267          case OP_CRQUERY:
2268          case OP_CRMINQUERY:
2269          c = *ecode++ - OP_CRSTAR;
2270          minimize = (c & 1) != 0;
2271          min = rep_min[c];                 /* Pick up values from tables; */
2272          max = rep_max[c];                 /* zero for max => infinity */
2273          if (max == 0) max = INT_MAX;
2274          break;
2275    
2276          case OP_CRRANGE:
2277          case OP_CRMINRANGE:
2278          minimize = (*ecode == OP_CRMINRANGE);
2279          min = GET2(ecode, 1);
2280          max = GET2(ecode, 3);
2281          if (max == 0) max = INT_MAX;
2282          ecode += 5;
2283          break;
2284    
2285          default:               /* No repeat follows */
2286          if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2287            {
2288            CHECK_PARTIAL();
2289            MRRETURN(MATCH_NOMATCH);
2290          }          }
2291          eptr += length;
2292          continue;              /* With the main loop */
2293          }
2294    
2295        /* If the length of the reference is zero, just continue with the      /* Handle repeated back references. If the length of the reference is
2296        main loop. */      zero, just continue with the main loop. */
2297    
2298        if (length == 0) continue;      if (length == 0) continue;
2299    
2300        /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2301        the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
2302        address of eptr, so that eptr can be a register variable. */      address of eptr, so that eptr can be a register variable. */
2303    
2304        for (i = 1; i <= min; i++)      for (i = 1; i <= min; i++)
2305          {
2306          int slength;
2307          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2308          {          {
2309          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          CHECK_PARTIAL();
2310          eptr += length;          MRRETURN(MATCH_NOMATCH);
2311          }          }
2312          eptr += slength;
2313          }
2314    
2315        /* If min = max, continue at the same level without recursion.      /* If min = max, continue at the same level without recursion.
2316        They are not both allowed to be zero. */      They are not both allowed to be zero. */
2317    
2318        if (min == max) continue;      if (min == max) continue;
2319    
2320        /* If minimizing, keep trying and advancing the pointer */      /* If minimizing, keep trying and advancing the pointer */
2321    
2322        if (minimize)      if (minimize)
2323          {
2324          for (fi = min;; fi++)
2325          {          {
2326          for (fi = min;; fi++)          int slength;
2327            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM14);
2328            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2329            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2330            if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2331            {            {
2332            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);            CHECK_PARTIAL();
2333            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            MRRETURN(MATCH_NOMATCH);
           if (fi >= max || !match_ref(offset, eptr, length, md, ims))  
             RRETURN(MATCH_NOMATCH);  
           eptr += length;  
2334            }            }
2335          /* Control never gets here */          eptr += slength;
2336          }          }
2337          /* Control never gets here */
2338          }
2339    
2340        /* If maximizing, find the longest string and work backwards */      /* If maximizing, find the longest string and work backwards */
2341    
2342        else      else
2343          {
2344          pp = eptr;
2345          for (i = min; i < max; i++)
2346          {          {
2347          pp = eptr;          int slength;
2348          for (i = min; i < max; i++)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
           {  
           if (!match_ref(offset, eptr, length, md, ims)) break;  
           eptr += length;  
           }  
         while (eptr >= pp)  
2349            {            {
2350            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);            CHECK_PARTIAL();
2351            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            break;
           eptr -= length;  
2352            }            }
2353          RRETURN(MATCH_NOMATCH);          eptr += slength;
2354            }
2355          while (eptr >= pp)
2356            {
2357            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM15);
2358            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2359            eptr -= length;
2360          }          }
2361          MRRETURN(MATCH_NOMATCH);
2362        }        }
2363      /* Control never gets here */      /* Control never gets here */
2364    
   
   
2365      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2366      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2367      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1878  for (;;) Line 2416  for (;;)
2416          {          {
2417          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2418            {            {
2419            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2420                {
2421                SCHECK_PARTIAL();
2422                MRRETURN(MATCH_NOMATCH);
2423                }
2424            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2425            if (c > 255)            if (c > 255)
2426              {              {
2427              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2428              }              }
2429            else            else
2430              {              {
2431              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2432              }              }
2433            }            }
2434          }          }
# Line 1896  for (;;) Line 2438  for (;;)
2438          {          {
2439          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2440            {            {
2441            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2442                {
2443                SCHECK_PARTIAL();
2444                MRRETURN(MATCH_NOMATCH);
2445                }
2446            c = *eptr++;            c = *eptr++;
2447            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2448            }            }
2449          }          }
2450    
# Line 1918  for (;;) Line 2464  for (;;)
2464            {            {
2465            for (fi = min;; fi++)            for (fi = min;; fi++)
2466              {              {
2467              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM16);
2468              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2469              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2470                if (eptr >= md->end_subject)
2471                  {
2472                  SCHECK_PARTIAL();
2473                  MRRETURN(MATCH_NOMATCH);
2474                  }
2475              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2476              if (c > 255)              if (c > 255)
2477                {                {
2478                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2479                }                }
2480              else              else
2481                {                {
2482                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2483                }                }
2484              }              }
2485            }            }
# Line 1938  for (;;) Line 2489  for (;;)
2489            {            {
2490            for (fi = min;; fi++)            for (fi = min;; fi++)
2491              {              {
2492              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM17);
2493              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2494              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2495                if (eptr >= md->end_subject)
2496                  {
2497                  SCHECK_PARTIAL();
2498                  MRRETURN(MATCH_NOMATCH);
2499                  }
2500              c = *eptr++;              c = *eptr++;
2501              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2502              }              }
2503            }            }
2504          /* Control never gets here */          /* Control never gets here */
# Line 1961  for (;;) Line 2517  for (;;)
2517            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2518              {              {
2519              int len = 1;              int len = 1;
2520              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2521                  {
2522                  SCHECK_PARTIAL();
2523                  break;
2524                  }
2525              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2526              if (c > 255)              if (c > 255)
2527                {                {
# Line 1975  for (;;) Line 2535  for (;;)
2535              }              }
2536            for (;;)            for (;;)
2537              {              {
2538              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM18);
2539              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2540              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2541              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1987  for (;;) Line 2547  for (;;)
2547            {            {
2548            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2549              {              {
2550              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2551                  {
2552                  SCHECK_PARTIAL();
2553                  break;
2554                  }
2555              c = *eptr;              c = *eptr;
2556              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2557              eptr++;              eptr++;
2558              }              }
2559            while (eptr >= pp)            while (eptr >= pp)
2560              {              {
2561              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM19);
2562              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2563              eptr--;              eptr--;
2564              }              }
2565            }            }
2566    
2567          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2568          }          }
2569        }        }
2570      /* Control never gets here */      /* Control never gets here */
2571    
2572    
2573      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2574      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2575        mode, because Unicode properties are supported in non-UTF-8 mode. */
2576    
2577  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2578      case OP_XCLASS:      case OP_XCLASS:
# Line 2048  for (;;) Line 2613  for (;;)
2613    
2614        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2615          {          {
2616          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2617          GETCHARINC(c, eptr);            {
2618          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2619              MRRETURN(MATCH_NOMATCH);
2620              }
2621            GETCHARINCTEST(c, eptr);
2622            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2623          }          }
2624    
2625        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 2065  for (;;) Line 2634  for (;;)
2634          {          {
2635          for (fi = min;; fi++)          for (fi = min;; fi++)
2636            {            {
2637            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM20);
2638            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2639            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2640            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2641            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2642                SCHECK_PARTIAL();
2643                MRRETURN(MATCH_NOMATCH);
2644                }
2645              GETCHARINCTEST(c, eptr);
2646              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2647            }            }
2648          /* Control never gets here */          /* Control never gets here */
2649          }          }
# Line 2082  for (;;) Line 2656  for (;;)
2656          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2657            {            {
2658            int len = 1;            int len = 1;
2659            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2660            GETCHARLEN(c, eptr, len);              {
2661                SCHECK_PARTIAL();
2662                break;
2663                }
2664              GETCHARLENTEST(c, eptr, len);
2665            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2666            eptr += len;            eptr += len;
2667            }            }
2668          for(;;)          for(;;)
2669            {            {
2670            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM21);
2671            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2672            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2673            if (utf8) BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
2674            }            }
2675          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2676          }          }
2677    
2678        /* Control never gets here */        /* Control never gets here */
# Line 2110  for (;;) Line 2688  for (;;)
2688        length = 1;        length = 1;
2689        ecode++;        ecode++;
2690        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2691        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2692        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2693            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2694            MRRETURN(MATCH_NOMATCH);
2695            }
2696          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2697        }        }
2698      else      else
2699  #endif  #endif
2700    
2701      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2702        {        {
2703        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2704        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2705            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2706            MRRETURN(MATCH_NOMATCH);
2707            }
2708          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2709        ecode += 2;        ecode += 2;
2710        }        }
2711      break;      break;
2712    
2713      /* Match a single character, caselessly */      /* Match a single character, caselessly */
2714    
2715      case OP_CHARNC:      case OP_CHARI:
2716  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2717      if (utf8)      if (utf8)
2718        {        {
# Line 2134  for (;;) Line 2720  for (;;)
2720        ecode++;        ecode++;
2721        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2722    
2723        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2724            {
2725            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2726            MRRETURN(MATCH_NOMATCH);
2727            }
2728    
2729        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2730        can use the fast lookup table. */        can use the fast lookup table. */
2731    
2732        if (fc < 128)        if (fc < 128)
2733          {          {
2734          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2735          }          }
2736    
2737        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
# Line 2158  for (;;) Line 2748  for (;;)
2748          if (fc != dc)          if (fc != dc)
2749            {            {
2750  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2751            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2752  #endif  #endif
2753              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2754            }            }
2755          }          }
2756        }        }
# Line 2169  for (;;) Line 2759  for (;;)
2759    
2760      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2761        {        {
2762        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2763        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2764            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2765            MRRETURN(MATCH_NOMATCH);
2766            }
2767          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2768        ecode += 2;        ecode += 2;
2769        }        }
2770      break;      break;
# Line 2178  for (;;) Line 2772  for (;;)
2772      /* Match a single character repeatedly. */      /* Match a single character repeatedly. */
2773    
2774      case OP_EXACT:      case OP_EXACT:
2775        case OP_EXACTI:
2776      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2777      ecode += 3;      ecode += 3;
2778      goto REPEATCHAR;      goto REPEATCHAR;
2779    
2780      case OP_POSUPTO:      case OP_POSUPTO:
2781        case OP_POSUPTOI:
2782      possessive = TRUE;      possessive = TRUE;
2783      /* Fall through */      /* Fall through */
2784    
2785      case OP_UPTO:      case OP_UPTO:
2786        case OP_UPTOI:
2787      case OP_MINUPTO:      case OP_MINUPTO:
2788        case OP_MINUPTOI:
2789      min = 0;      min = 0;
2790      max = GET2(ecode, 1);      max = GET2(ecode, 1);
2791      minimize = *ecode == OP_MINUPTO;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
2792      ecode += 3;      ecode += 3;
2793      goto REPEATCHAR;      goto REPEATCHAR;
2794    
2795      case OP_POSSTAR:      case OP_POSSTAR:
2796        case OP_POSSTARI:
2797      possessive = TRUE;      possessive = TRUE;
2798      min = 0;      min = 0;
2799      max = INT_MAX;      max = INT_MAX;
# Line 2202  for (;;) Line 2801  for (;;)
2801      goto REPEATCHAR;      goto REPEATCHAR;
2802    
2803      case OP_POSPLUS:      case OP_POSPLUS:
2804        case OP_POSPLUSI:
2805      possessive = TRUE;      possessive = TRUE;
2806      min = 1;      min = 1;
2807      max = INT_MAX;      max = INT_MAX;
# Line 2209  for (;;) Line 2809  for (;;)
2809      goto REPEATCHAR;      goto REPEATCHAR;
2810    
2811      case OP_POSQUERY:      case OP_POSQUERY:
2812        case OP_POSQUERYI:
2813      possessive = TRUE;      possessive = TRUE;
2814      min = 0;      min = 0;
2815      max = 1;      max = 1;
# Line 2216  for (;;) Line 2817  for (;;)
2817      goto REPEATCHAR;      goto REPEATCHAR;
2818    
2819      case OP_STAR:      case OP_STAR:
2820        case OP_STARI:
2821      case OP_MINSTAR:      case OP_MINSTAR:
2822        case OP_MINSTARI:
2823      case OP_PLUS:      case OP_PLUS:
2824        case OP_PLUSI:
2825      case OP_MINPLUS:      case OP_MINPLUS:
2826        case OP_MINPLUSI:
2827      case OP_QUERY:      case OP_QUERY:
2828        case OP_QUERYI:
2829      case OP_MINQUERY:      case OP_MINQUERY:
2830      c = *ecode++ - OP_STAR;      case OP_MINQUERYI:
2831        c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
2832      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2833      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2834      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2835      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2836    
2837      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2838    
2839      REPEATCHAR:      REPEATCHAR:
2840  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2238  for (;;) Line 2843  for (;;)
2843        length = 1;        length = 1;
2844        charptr = ecode;        charptr = ecode;
2845        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2846        ecode += length;        ecode += length;
2847    
2848        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 2248  for (;;) Line 2852  for (;;)
2852          {          {
2853  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2854          unsigned int othercase;          unsigned int othercase;
2855          if ((ims & PCRE_CASELESS) != 0 &&          if (op >= OP_STARI &&     /* Caseless */
2856              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2857            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2858          else oclength = 0;          else oclength = 0;
2859  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2860    
2861          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2862            {            {
2863            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2864                memcmp(eptr, charptr, length) == 0) eptr += length;
2865  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2866            /* Need braces because of following else */            else if (oclength > 0 &&
2867            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                     eptr <= md->end_subject - oclength &&
2868                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2869    #endif  /* SUPPORT_UCP */
2870            else            else
2871              {              {
2872              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2873              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2874              }              }
 #else   /* without SUPPORT_UCP */  
           else { RRETURN(MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2875            }            }
2876    
2877          if (min == max) continue;          if (min == max) continue;
# Line 2276  for (;;) Line 2880  for (;;)
2880            {            {
2881            for (fi = min;; fi++)            for (fi = min;; fi++)
2882              {              {
2883              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM22);
2884              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2885              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2886              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2887                  memcmp(eptr, charptr, length) == 0) eptr += length;
2888  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2889              /* Need braces because of following else */              else if (oclength > 0 &&
2890              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                       eptr <= md->end_subject - oclength &&
2891                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2892    #endif  /* SUPPORT_UCP */
2893              else              else
2894                {                {
2895                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2896                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2897                }                }
 #else   /* without SUPPORT_UCP */  
             else { RRETURN (MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2898              }              }
2899            /* Control never gets here */            /* Control never gets here */
2900            }            }
# Line 2300  for (;;) Line 2904  for (;;)
2904            pp = eptr;            pp = eptr;
2905            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2906              {              {
2907              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2908              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2909  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2910              else if (oclength == 0) break;              else if (oclength > 0 &&
2911                         eptr <= md->end_subject - oclength &&
2912                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2913    #endif  /* SUPPORT_UCP */
2914              else              else
2915                {                {
2916                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2917                eptr += oclength;                break;
2918                }                }
 #else   /* without SUPPORT_UCP */  
             else break;  
 #endif  /* SUPPORT_UCP */  
2919              }              }
2920    
2921            if (possessive) continue;            if (possessive) continue;
2922    
2923            for(;;)            for(;;)
2924             {              {
2925             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM23);
2926             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2927             if (eptr == pp) RRETURN(MATCH_NOMATCH);              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2928  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2929             eptr--;              eptr--;
2930             BACKCHAR(eptr);              BACKCHAR(eptr);
2931  #else   /* without SUPPORT_UCP */  #else   /* without SUPPORT_UCP */
2932             eptr -= length;              eptr -= length;
2933  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2934             }              }
2935            }            }
2936          /* Control never gets here */          /* Control never gets here */
2937          }          }
# Line 2339  for (;;) Line 2944  for (;;)
2944  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2945    
2946      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2947        {  
2948        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2949    
2950      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2951      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2356  for (;;) Line 2959  for (;;)
2959      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2960        max, eptr));        max, eptr));
2961    
2962      if ((ims & PCRE_CASELESS) != 0)      if (op >= OP_STARI)  /* Caseless */
2963        {        {
2964        fc = md->lcc[fc];        fc = md->lcc[fc];
2965        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2966          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2967            if (eptr >= md->end_subject)
2968              {
2969              SCHECK_PARTIAL();
2970              MRRETURN(MATCH_NOMATCH);
2971              }
2972            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2973            }
2974        if (min == max) continue;        if (min == max) continue;
2975        if (minimize)        if (minimize)
2976          {          {
2977          for (fi = min;; fi++)          for (fi = min;; fi++)
2978            {            {
2979            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM24);
2980            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2981            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2982                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2983              RRETURN(MATCH_NOMATCH);              {
2984                SCHECK_PARTIAL();
2985                MRRETURN(MATCH_NOMATCH);
2986                }
2987              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2988            }            }
2989          /* Control never gets here */          /* Control never gets here */
2990          }          }
# Line 2379  for (;;) Line 2993  for (;;)
2993          pp = eptr;          pp = eptr;
2994          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2995            {            {
2996            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2997                {
2998                SCHECK_PARTIAL();
2999                break;
3000                }
3001              if (fc != md->lcc[*eptr]) break;
3002            eptr++;            eptr++;
3003            }            }
3004    
3005          if (possessive) continue;          if (possessive) continue;
3006    
3007          while (eptr >= pp)          while (eptr >= pp)
3008            {            {
3009            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM25);
3010            eptr--;            eptr--;
3011            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3012            }            }
3013          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3014          }          }
3015        /* Control never gets here */        /* Control never gets here */
3016        }        }
# Line 2398  for (;;) Line 3019  for (;;)
3019    
3020      else      else
3021        {        {
3022        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
3023            {
3024            if (eptr >= md->end_subject)
3025              {
3026              SCHECK_PARTIAL();
3027              MRRETURN(MATCH_NOMATCH);
3028              }
3029            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3030            }
3031    
3032        if (min == max) continue;        if (min == max) continue;
3033    
3034        if (minimize)        if (minimize)
3035          {          {
3036          for (fi = min;; fi++)          for (fi = min;; fi++)
3037            {            {
3038            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM26);
3039            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3040            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3041              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3042                {
3043                SCHECK_PARTIAL();
3044                MRRETURN(MATCH_NOMATCH);
3045                }
3046              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3047            }            }
3048          /* Control never gets here */          /* Control never gets here */
3049          }          }
# Line 2416  for (;;) Line 3052  for (;;)
3052          pp = eptr;          pp = eptr;
3053          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3054            {            {
3055            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
3056                {
3057                SCHECK_PARTIAL();
3058                break;
3059                }
3060              if (fc != *eptr) break;
3061            eptr++;            eptr++;
3062            }            }
3063          if (possessive) continue;          if (possessive) continue;
3064    
3065          while (eptr >= pp)          while (eptr >= pp)
3066            {            {
3067            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM27);
3068            eptr--;            eptr--;
3069            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3070            }            }
3071          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3072          }          }
3073        }        }
3074      /* Control never gets here */      /* Control never gets here */
# Line 2435  for (;;) Line 3077  for (;;)
3077      checking can be multibyte. */      checking can be multibyte. */
3078    
3079      case OP_NOT:      case OP_NOT:
3080      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      case OP_NOTI:
3081        if (eptr >= md->end_subject)
3082          {
3083          SCHECK_PARTIAL();
3084          MRRETURN(MATCH_NOMATCH);
3085          }
3086      ecode++;      ecode++;
3087      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3088      if ((ims & PCRE_CASELESS) != 0)      if (op == OP_NOTI)         /* The caseless case */
3089        {        {
3090  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3091        if (c < 256)        if (c < 256)
3092  #endif  #endif
3093        c = md->lcc[c];        c = md->lcc[c];
3094        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3095        }        }
3096      else      else    /* Caseful */
3097        {        {
3098        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3099        }        }
3100      break;      break;
3101    
# Line 2460  for (;;) Line 3107  for (;;)
3107      about... */      about... */
3108    
3109      case OP_NOTEXACT:      case OP_NOTEXACT:
3110        case OP_NOTEXACTI:
3111      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3112      ecode += 3;      ecode += 3;
3113      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3114    
3115      case OP_NOTUPTO:      case OP_NOTUPTO:
3116        case OP_NOTUPTOI:
3117      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
3118        case OP_NOTMINUPTOI:
3119      min = 0;      min = 0;
3120      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3121      minimize = *ecode == OP_NOTMINUPTO;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3122      ecode += 3;      ecode += 3;
3123      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3124    
3125      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
3126        case OP_NOTPOSSTARI:
3127      possessive = TRUE;      possessive = TRUE;
3128      min = 0;      min = 0;
3129      max = INT_MAX;      max = INT_MAX;
# Line 2480  for (;;) Line 3131  for (;;)
3131      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3132    
3133      case OP_NOTPOSPLUS:      case OP_NOTPOSPLUS:
3134        case OP_NOTPOSPLUSI:
3135      possessive = TRUE;      possessive = TRUE;
3136      min = 1;      min = 1;
3137      max = INT_MAX;      max = INT_MAX;
# Line 2487  for (;;) Line 3139  for (;;)
3139      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3140    
3141      case OP_NOTPOSQUERY:      case OP_NOTPOSQUERY:
3142        case OP_NOTPOSQUERYI:
3143      possessive = TRUE;      possessive = TRUE;
3144      min = 0;      min = 0;
3145      max = 1;      max = 1;
# Line 2494  for (;;) Line 3147  for (;;)
3147      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3148    
3149      case OP_NOTPOSUPTO:      case OP_NOTPOSUPTO:
3150        case OP_NOTPOSUPTOI:
3151      possessive = TRUE;      possessive = TRUE;
3152      min = 0;      min = 0;
3153      max = GET2(ecode, 1);      max = GET2(ecode, 1);
# Line 2501  for (;;) Line 3155  for (;;)
3155      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3156    
3157      case OP_NOTSTAR:      case OP_NOTSTAR:
3158        case OP_NOTSTARI:
3159      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
3160        case OP_NOTMINSTARI:
3161      case OP_NOTPLUS:      case OP_NOTPLUS:
3162        case OP_NOTPLUSI:
3163      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
3164        case OP_NOTMINPLUSI:
3165      case OP_NOTQUERY:      case OP_NOTQUERY:
3166        case OP_NOTQUERYI:
3167      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
3168      c = *ecode++ - OP_NOTSTAR;      case OP_NOTMINQUERYI:
3169        c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3170      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
3171      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
3172      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3173      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3174    
3175      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3176    
3177      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3178      fc = *ecode++;      fc = *ecode++;
3179    
3180      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2531  for (;;) Line 3188  for (;;)
3188      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3189        max, eptr));        max, eptr));
3190    
3191      if ((ims & PCRE_CASELESS) != 0)      if (op >= OP_NOTSTARI)     /* Caseless */
3192        {        {
3193        fc = md->lcc[fc];        fc = md->lcc[fc];
3194    
# Line 2542  for (;;) Line 3199  for (;;)
3199          register unsigned int d;          register unsigned int d;
3200          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3201            {            {
3202              if (eptr >= md->end_subject)
3203                {
3204                SCHECK_PARTIAL();
3205                MRRETURN(MATCH_NOMATCH);
3206                }
3207            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3208            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3209            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3210            }            }
3211          }          }
3212        else        else
# Line 2553  for (;;) Line 3215  for (;;)
3215        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3216          {          {
3217          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3218            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3219              if (eptr >= md->end_subject)
3220                {
3221                SCHECK_PARTIAL();
3222                MRRETURN(MATCH_NOMATCH);
3223                }
3224              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3225              }
3226          }          }
3227    
3228        if (min == max) continue;        if (min == max) continue;
# Line 2567  for (;;) Line 3236  for (;;)
3236            register unsigned int d;            register unsigned int d;
3237            for (fi = min;; fi++)            for (fi = min;; fi++)
3238              {              {
3239              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM28);
3240              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3241                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3242                if (eptr >= md->end_subject)
3243                  {
3244                  SCHECK_PARTIAL();
3245                  MRRETURN(MATCH_NOMATCH);
3246                  }
3247              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3248              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3249              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3250              }              }
3251            }            }
3252          else          else
# Line 2581  for (;;) Line 3255  for (;;)
3255            {            {
3256            for (fi = min;; fi++)            for (fi = min;; fi++)
3257              {              {
3258              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM29);
3259              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3260              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3261                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3262                  {
3263                  SCHECK_PARTIAL();
3264                  MRRETURN(MATCH_NOMATCH);
3265                  }
3266                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3267              }              }
3268            }            }
3269          /* Control never gets here */          /* Control never gets here */
# Line 2604  for (;;) Line 3283  for (;;)
3283            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3284              {              {
3285              int len = 1;              int len = 1;
3286              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3287                  {
3288                  SCHECK_PARTIAL();
3289                  break;
3290                  }
3291              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3292              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3293              if (fc == d) break;              if (fc == d) break;
# Line 2613  for (;;) Line 3296  for (;;)
3296          if (possessive) continue;          if (possessive) continue;
3297          for(;;)          for(;;)
3298              {              {
3299              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM30);
3300              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3301              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3302              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2625  for (;;) Line 3308  for (;;)
3308            {            {
3309            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3310              {              {
3311              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3312                  {
3313                  SCHECK_PARTIAL();
3314                  break;
3315                  }
3316                if (fc == md->lcc[*eptr]) break;
3317              eptr++;              eptr++;
3318              }              }
3319            if (possessive) continue;            if (possessive) continue;
3320            while (eptr >= pp)            while (eptr >= pp)
3321              {              {
3322              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM31);
3323              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3324              eptr--;              eptr--;
3325              }              }
3326            }            }
3327    
3328          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3329          }          }
3330        /* Control never gets here */        /* Control never gets here */
3331        }        }
# Line 2653  for (;;) Line 3341  for (;;)
3341          register unsigned int d;          register unsigned int d;
3342          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3343            {            {
3344              if (eptr >= md->end_subject)
3345                {
3346                SCHECK_PARTIAL();
3347                MRRETURN(MATCH_NOMATCH);
3348                }
3349            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3350            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3351            }            }
3352          }          }
3353        else        else
# Line 2662  for (;;) Line 3355  for (;;)
3355        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3356          {          {
3357          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3358            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3359              if (eptr >= md->end_subject)
3360                {
3361                SCHECK_PARTIAL();
3362                MRRETURN(MATCH_NOMATCH);
3363                }
3364              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3365              }
3366          }          }
3367    
3368        if (min == max) continue;        if (min == max) continue;
# Line 2676  for (;;) Line 3376  for (;;)
3376            register unsigned int d;            register unsigned int d;
3377            for (fi = min;; fi++)            for (fi = min;; fi++)
3378              {              {
3379              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM32);
3380              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3381                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3382                if (eptr >= md->end_subject)
3383                  {
3384                  SCHECK_PARTIAL();
3385                  MRRETURN(MATCH_NOMATCH);
3386                  }
3387              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3388              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3389              }              }
3390            }            }
3391          else          else
# Line 2689  for (;;) Line 3394  for (;;)
3394            {            {
3395            for (fi = min;; fi++)            for (fi = min;; fi++)
3396              {              {
3397              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM33);
3398              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3399              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3400                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3401                  {
3402                  SCHECK_PARTIAL();
3403                  MRRETURN(MATCH_NOMATCH);
3404                  }
3405                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3406              }              }
3407            }            }
3408          /* Control never gets here */          /* Control never gets here */
# Line 2712  for (;;) Line 3422  for (;;)
3422            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3423              {              {
3424              int len = 1;              int len = 1;
3425              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3426                  {
3427                  SCHECK_PARTIAL();
3428                  break;
3429                  }
3430              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3431              if (fc == d) break;              if (fc == d) break;
3432              eptr += len;              eptr += len;
# Line 2720  for (;;) Line 3434  for (;;)
3434            if (possessive) continue;            if (possessive) continue;
3435            for(;;)            for(;;)
3436              {              {
3437              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM34);
3438              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3439              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3440              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2732  for (;;) Line 3446  for (;;)
3446            {            {
3447            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3448              {              {
3449              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3450                  {
3451                  SCHECK_PARTIAL();
3452                  break;
3453                  }
3454                if (fc == *eptr) break;
3455              eptr++;              eptr++;
3456              }              }
3457            if (possessive) continue;            if (possessive) continue;
3458            while (eptr >= pp)            while (eptr >= pp)
3459              {              {
3460              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM35);
3461              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3462              eptr--;              eptr--;
3463              }              }
3464            }            }
3465    
3466          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3467          }          }
3468        }        }
3469      /* Control never gets here */      /* Control never gets here */
# Line 2826  for (;;) Line 3545  for (;;)
3545    
3546      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3547      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3548      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3549      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3550      and single-bytes. */      and single-bytes. */
3551    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3552      if (min > 0)      if (min > 0)
3553        {        {
3554  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2841  for (;;) Line 3557  for (;;)
3557          switch(prop_type)          switch(prop_type)
3558            {            {
3559            case PT_ANY:            case PT_ANY:
3560            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3561            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3562              {              {
3563              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3564                  {
3565                  SCHECK_PARTIAL();
3566                  MRRETURN(MATCH_NOMATCH);
3567                  }
3568              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3569              }              }
3570            break;            break;
# Line 2852  for (;;) Line 3572  for (;;)
3572            case PT_LAMP:            case PT_LAMP:
3573            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3574              {              {
3575              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3576                  {
3577                  SCHECK_PARTIAL();
3578                  MRRETURN(MATCH_NOMATCH);
3579                  }
3580              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3581              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3582              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3583                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3584                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3585                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3586              }              }
3587            break;            break;
3588    
3589            case PT_GC:            case PT_GC:
3590            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3591              {              {
3592              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3593                  {
3594                  SCHECK_PARTIAL();
3595                  MRRETURN(MATCH_NOMATCH);
3596                  }
3597              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3598              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3599              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3600                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3601              }              }
3602            break;            break;
3603    
3604            case PT_PC:            case PT_PC:
3605            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3606              {              {
3607              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3608                  {
3609                  SCHECK_PARTIAL();
3610                  MRRETURN(MATCH_NOMATCH);
3611                  }
3612              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3613              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3614              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3615                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3616              }              }
3617            break;            break;
3618    
3619            case PT_SC:            case PT_SC:
3620            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3621              {              {
3622              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3623                  {
3624                  SCHECK_PARTIAL();
3625                  MRRETURN(MATCH_NOMATCH);
3626                  }
3627              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3628              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3629              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3630                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3631                }
3632              break;
3633    
3634              case PT_ALNUM:
3635              for (i = 1; i <= min; i++)
3636                {
3637                if (eptr >= md->end_subject)
3638                  {
3639                  SCHECK_PARTIAL();
3640                  MRRETURN(MATCH_NOMATCH);
3641                  }
3642                GETCHARINCTEST(c, eptr);
3643                prop_category = UCD_CATEGORY(c);
3644                if ((prop_category == ucp_L || prop_category == ucp_N)
3645                       == prop_fail_result)
3646                  MRRETURN(MATCH_NOMATCH);
3647                }
3648              break;
3649    
3650              case PT_SPACE:    /* Perl space */
3651              for (i = 1; i <= min; i++)
3652                {
3653                if (eptr >= md->end_subject)
3654                  {
3655                  SCHECK_PARTIAL();
3656                  MRRETURN(MATCH_NOMATCH);
3657                  }
3658                GETCHARINCTEST(c, eptr);
3659                prop_category = UCD_CATEGORY(c);
3660                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3661                     c == CHAR_FF || c == CHAR_CR)
3662                       == prop_fail_result)
3663                  MRRETURN(MATCH_NOMATCH);
3664                }
3665              break;
3666    
3667              case PT_PXSPACE:  /* POSIX space */
3668              for (i = 1; i <= min; i++)
3669                {
3670                if (eptr >= md->end_subject)
3671                  {
3672                  SCHECK_PARTIAL();
3673                  MRRETURN(MATCH_NOMATCH);
3674                  }
3675                GETCHARINCTEST(c, eptr);
3676                prop_category = UCD_CATEGORY(c);
3677                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3678                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3679                       == prop_fail_result)
3680                  MRRETURN(MATCH_NOMATCH);
3681                }
3682              break;
3683    
3684              case PT_WORD:
3685              for (i = 1; i <= min; i++)
3686                {
3687                if (eptr >= md->end_subject)
3688                  {
3689                  SCHECK_PARTIAL();
3690                  MRRETURN(MATCH_NOMATCH);
3691                  }
3692                GETCHARINCTEST(c, eptr);
3693                prop_category = UCD_CATEGORY(c);
3694                if ((prop_category == ucp_L || prop_category == ucp_N ||
3695                     c == CHAR_UNDERSCORE)
3696                       == prop_fail_result)
3697                  MRRETURN(MATCH_NOMATCH);
3698              }              }
3699            break;            break;
3700    
3701              /* This should not occur */
3702    
3703            default:            default:
3704            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
3705            }            }
# Line 2907  for (;;) Line 3712  for (;;)
3712          {          {
3713          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3714            {            {
3715              if (eptr >= md->end_subject)
3716                {
3717                SCHECK_PARTIAL();
3718                MRRETURN(MATCH_NOMATCH);
3719                }
3720            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3721            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3722            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3723            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3724              {              {
3725              int len = 1;              int len = 1;
3726              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3727                {                else { GETCHARLEN(c, eptr, len); }
3728                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3729              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3730              eptr += len;              eptr += len;
3731              }              }
# Line 2935  for (;;) Line 3743  for (;;)
3743          case OP_ANY:          case OP_ANY:
3744          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3745            {            {
3746            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3747                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))              {
3748              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3749                MRRETURN(MATCH_NOMATCH);
3750                }
3751              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3752              eptr++;
3753              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3754              }
3755            break;
3756    
3757            case OP_ALLANY:
3758            for (i = 1; i <= min; i++)
3759              {
3760              if (eptr >= md->end_subject)
3761                {
3762                SCHECK_PARTIAL();
3763                MRRETURN(MATCH_NOMATCH);
3764                }
3765            eptr++;            eptr++;
3766            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3767            }            }
3768          break;          break;
3769    
3770          case OP_ANYBYTE:          case OP_ANYBYTE:
3771            if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3772          eptr += min;          eptr += min;
3773          break;          break;
3774    
3775          case OP_ANYNL:          case OP_ANYNL:
3776          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3777            {            {
3778            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3779                {
3780                SCHECK_PARTIAL();
3781                MRRETURN(MATCH_NOMATCH);
3782                }
3783            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3784            switch(c)            switch(c)
3785              {              {
3786              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3787    
3788              case 0x000d:              case 0x000d:
3789              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3790              break;              break;
# Line 2967  for (;;) Line 3797  for (;;)
3797              case 0x0085:              case 0x0085:
3798              case 0x2028:              case 0x2028:
3799              case 0x2029:              case 0x2029:
3800              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);              if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3801              break;              break;
3802              }              }
3803            }            }
# Line 2976  for (;;) Line 3806  for (;;)
3806          case OP_NOT_HSPACE:          case OP_NOT_HSPACE:
3807          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3808            {            {
3809            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3810                {
3811                SCHECK_PARTIAL();
3812                MRRETURN(MATCH_NOMATCH);
3813                }
3814            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3815            switch(c)            switch(c)
3816              {              {
# Line 3000  for (;;) Line 3834  for (;;)
3834              case 0x202f:    /* NARROW NO-BREAK SPACE */              case 0x202f:    /* NARROW NO-BREAK SPACE */
3835              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3836              case 0x3000:    /* IDEOGRAPHIC SPACE */              case 0x3000:    /* IDEOGRAPHIC SPACE */
3837              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
3838              }              }
3839            }            }
3840          break;          break;
# Line 3008  for (;;) Line 3842  for (;;)
3842          case OP_HSPACE:          case OP_HSPACE:
3843          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3844            {            {
3845            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3846                {
3847                SCHECK_PARTIAL();
3848                MRRETURN(MATCH_NOMATCH);
3849                }
3850            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3851            switch(c)            switch(c)
3852              {              {
3853              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3854              case 0x09:      /* HT */              case 0x09:      /* HT */
3855              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
3856              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
# Line 3040  for (;;) Line 3878  for (;;)
3878          case OP_NOT_VSPACE:          case OP_NOT_VSPACE:
3879          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3880            {            {
3881            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3882                {
3883                SCHECK_PARTIAL();
3884                MRRETURN(MATCH_NOMATCH);
3885                }
3886            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3887            switch(c)            switch(c)
3888              {              {
# Line 3052  for (;;) Line 3894  for (;;)
3894              case 0x85:      /* NEL */              case 0x85:      /* NEL */
3895              case 0x2028:    /* LINE SEPARATOR */              case 0x2028:    /* LINE SEPARATOR */
3896              case 0x2029:    /* PARAGRAPH SEPARATOR */              case 0x2029:    /* PARAGRAPH SEPARATOR */
3897              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
3898              }              }
3899            }            }
3900          break;          break;
# Line 3060  for (;;) Line 3902  for (;;)
3902          case OP_VSPACE:          case OP_VSPACE:
3903          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3904            {            {
3905            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3906                {
3907                SCHECK_PARTIAL();
3908                MRRETURN(MATCH_NOMATCH);
3909                }
3910            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3911            switch(c)            switch(c)
3912              {              {
3913              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3914              case 0x0a:      /* LF */              case 0x0a:      /* LF */
3915              case 0x0b:      /* VT */              case 0x0b:      /* VT */
3916              case 0x0c:      /* FF */              case 0x0c:      /* FF */
# Line 3080  for (;;) Line 3926  for (;;)
3926          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3927          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3928            {            {
3929            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3930                {
3931                SCHECK_PARTIAL();
3932                MRRETURN(MATCH_NOMATCH);
3933                }
3934            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3935            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3936              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
3937            }            }
3938          break;          break;
3939    
3940          case OP_DIGIT:          case OP_DIGIT:
3941          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3942            {            {
3943            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3944               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3945              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3946                MRRETURN(MATCH_NOMATCH);
3947                }
3948              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3949                MRRETURN(MATCH_NOMATCH);
3950            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3951            }            }
3952          break;          break;
# Line 3100  for (;;) Line 3954  for (;;)
3954          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3955          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3956            {            {
3957            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3958               (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))              {
3959              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3960                MRRETURN(MATCH_NOMATCH);
3961                }
3962              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3963                MRRETURN(MATCH_NOMATCH);
3964            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3965            }            }
3966          break;          break;
# Line 3110  for (;;) Line 3968  for (;;)
3968          case OP_WHITESPACE:          case OP_WHITESPACE:
3969          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3970            {            {
3971            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3972               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3973              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3974                MRRETURN(MATCH_NOMATCH);
3975                }
3976              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3977                MRRETURN(MATCH_NOMATCH);
3978            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3979            }            }
3980          break;          break;
# Line 3120  for (;;) Line 3982  for (;;)
3982          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3983          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3984            {            {
3985            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3986               (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))              {
3987              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3988                MRRETURN(MATCH_NOMATCH);
3989                }
3990              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3991                MRRETURN(MATCH_NOMATCH);
3992            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3993            }            }
3994          break;          break;
# Line 3130  for (;;) Line 3996  for (;;)
3996          case OP_WORDCHAR:          case OP_WORDCHAR:
3997          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3998            {            {
3999            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
4000               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
4001              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
4002                MRRETURN(MATCH_NOMATCH);
4003                }
4004              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
4005                MRRETURN(MATCH_NOMATCH);
4006            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4007            }            }
4008          break;          break;
# Line 3145  for (;;) Line 4015  for (;;)
4015  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
4016    
4017        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
4018        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum        than OP_PROP and OP_NOTPROP. */
       number of bytes present, as this was tested above. */  
4019    
4020        switch(ctype)        switch(ctype)
4021          {          {
4022          case OP_ANY:          case OP_ANY:
4023          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
4024            {            {
4025            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
4026              {              {
4027              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
4028              eptr++;              MRRETURN(MATCH_NOMATCH);
4029              }              }
4030              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
4031              eptr++;
4032            }            }
         else eptr += min;  
4033          break;          break;
4034    
4035          case OP_ANYBYTE:          case OP_ALLANY:
4036            if (eptr > md->end_subject - min)
4037              {
4038              SCHECK_PARTIAL();
4039              MRRETURN(MATCH_NOMATCH);
4040              }
4041          eptr += min;          eptr += min;
4042          break;          break;
4043    
4044          /* Because of the CRLF case, we can't assume the minimum number of          case OP_ANYBYTE:
4045          bytes are present in this case. */          if (eptr > md->end_subject - min)
4046              {
4047              SCHECK_PARTIAL();
4048              MRRETURN(MATCH_NOMATCH);
4049              }
4050            eptr += min;
4051            break;
4052    
4053          case OP_ANYNL:          case OP_ANYNL:
4054          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4055            {            {
4056            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
4057                {
4058                SCHECK_PARTIAL();
4059                MRRETURN(MATCH_NOMATCH);
4060                }
4061            switch(*eptr++)            switch(*eptr++)
4062              {              {
4063              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4064    
4065              case 0x000d:              case 0x000d:
4066              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4067              break;              break;
4068    
4069              case 0x000a:              case 0x000a:
4070              break;              break;
4071    
4072              case 0x000b:              case 0x000b:
4073              case 0x000c:              case 0x000c:
4074              case 0x0085:              case 0x0085:
4075              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);              if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4076              break;              break;
4077              }              }
4078            }            }
# Line 3194  for (;;) Line 4081  for (;;)
4081          case OP_NOT_HSPACE:          case OP_NOT_HSPACE:
4082          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4083            {            {
4084            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
4085                {
4086                SCHECK_PARTIAL();
4087                MRRETURN(MATCH_NOMATCH);
4088                }
4089            switch(*eptr++)            switch(*eptr++)
4090              {              {
4091              default: break;              default: break;
4092              case 0x09:      /* HT */              case 0x09:      /* HT */
4093              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4094              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
4095              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4096              }              }
4097            }            }
4098          break;          break;
# Line 3209  for (;;) Line 4100  for (;;)
4100          case OP_HSPACE:          case OP_HSPACE:
4101          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4102            {            {
4103            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
4104       &n