/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 615 by ph10, Mon Jul 11 14:23:06 2011 UTC revision 1187 by zherczeg, Mon Oct 29 11:30:45 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
   
40  /* This module contains pcre_exec(), the externally visible function that does  /* This module contains pcre_exec(), the externally visible function that does
41  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
42  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
# Line 57  possible. There are also some static sup Line 56  possible. There are also some static sup
56  #undef min  #undef min
57  #undef max  #undef max
58    
59  /* Values for setting in md->match_function_type to indicate two special types  /* Values for setting in md->match_function_type to indicate two special types
60  of call to match(). We do it this way to save on using another stack variable,  of call to match(). We do it this way to save on using another stack variable,
61  as stack usage is to be discouraged. */  as stack usage is to be discouraged. */
62    
63  #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */  #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
# Line 76  negative to avoid the external error cod Line 75  negative to avoid the external error cod
75  #define MATCH_ACCEPT       (-999)  #define MATCH_ACCEPT       (-999)
76  #define MATCH_COMMIT       (-998)  #define MATCH_COMMIT       (-998)
77  #define MATCH_KETRPOS      (-997)  #define MATCH_KETRPOS      (-997)
78  #define MATCH_PRUNE        (-996)  #define MATCH_ONCE         (-996)
79  #define MATCH_SKIP         (-995)  #define MATCH_PRUNE        (-995)
80  #define MATCH_SKIP_ARG     (-994)  #define MATCH_SKIP         (-994)
81  #define MATCH_THEN         (-993)  #define MATCH_SKIP_ARG     (-993)
82    #define MATCH_THEN         (-992)
 /* This is a convenience macro for code that occurs many times. */  
   
 #define MRRETURN(ra) \  
   { \  
   md->mark = markptr; \  
   RRETURN(ra); \  
   }  
83    
84  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
85  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
# Line 100  because the offset vector is always a mu Line 92  because the offset vector is always a mu
92  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
93  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
94    
   
   
95  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
96  /*************************************************  /*************************************************
97  *        Debugging function to print chars       *  *        Debugging function to print chars       *
# Line 120  Returns:     nothing Line 110  Returns:     nothing
110  */  */
111    
112  static void  static void
113  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
114  {  {
115  unsigned int c;  pcre_uint32 c;
116    BOOL utf = md->utf;
117  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
118  while (length-- > 0)  while (length-- > 0)
119    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = RAWUCHARINCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
120  }  }
121  #endif  #endif
122    
# Line 137  while (length-- > 0) Line 128  while (length-- > 0)
128    
129  /* Normally, if a back reference hasn't been set, the length that is passed is  /* Normally, if a back reference hasn't been set, the length that is passed is
130  negative, so the match always fails. However, in JavaScript compatibility mode,  negative, so the match always fails. However, in JavaScript compatibility mode,
131  the length passed is zero. Note that in caseless UTF-8 mode, the number of  the length passed is zero. Note that in caseless UTF-8 mode, the number of
132  subject bytes matched may be different to the number of reference bytes.  subject bytes matched may be different to the number of reference bytes.
133    
134  Arguments:  Arguments:
# Line 147  Arguments: Line 138  Arguments:
138    md          points to match data block    md          points to match data block
139    caseless    TRUE if caseless    caseless    TRUE if caseless
140    
141  Returns:      < 0 if not matched, otherwise the number of subject bytes matched  Returns:      >= 0 the number of subject bytes matched
142                  -1 no match
143                  -2 partial match; always given if at end subject
144  */  */
145    
146  static int  static int
147  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
148    BOOL caseless)    BOOL caseless)
149  {  {
150  USPTR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
151  register USPTR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
152    #ifdef SUPPORT_UTF
153    BOOL utf = md->utf;
154    #endif
155    
156  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
157  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 170  pchars(p, length, FALSE, md); Line 166  pchars(p, length, FALSE, md);
166  printf("\n");  printf("\n");
167  #endif  #endif
168    
169  /* Always fail if reference not set (and not JavaScript compatible). */  /* Always fail if reference not set (and not JavaScript compatible - in that
170    case the length is passed as zero). */
171    
172  if (length < 0) return -1;  if (length < 0) return -1;
173    
# Line 180  ASCII characters. */ Line 177  ASCII characters. */
177    
178  if (caseless)  if (caseless)
179    {    {
180  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
181  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
182    if (md->utf8)    if (utf)
183      {      {
184      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
185      bytes matched may differ, because there are some characters whose upper and      data units matched may differ, because in UTF-8 there are some characters
186      lower case versions code as different numbers of bytes. For example, U+023A      whose upper and lower case versions code have different numbers of bytes.
187      (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);      For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
188      a sequence of 3 of the former uses 6 bytes, as does a sequence of two of      (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
189      the latter. It is important, therefore, to check the length along the      sequence of two of the latter. It is important, therefore, to check the
190      reference, not along the subject (earlier code did this wrong). */      length along the reference, not along the subject (earlier code did this
191        wrong). */
192      USPTR endptr = p + length;  
193        PCRE_PUCHAR endptr = p + length;
194      while (p < endptr)      while (p < endptr)
195        {        {
196        int c, d;        pcre_uint32 c, d;
197        if (eptr >= md->end_subject) return -1;        const ucd_record *ur;
198          if (eptr >= md->end_subject) return -2;   /* Partial match */
199        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
200        GETCHARINC(d, p);        GETCHARINC(d, p);
201        if (c != d && c != UCD_OTHERCASE(d)) return -1;        ur = GET_UCD(d);
202          if (c != d && c != d + ur->other_case)
203            {
204            const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
205            for (;;)
206              {
207              if (c < *pp) return -1;
208              if (c == *pp++) break;
209              }
210            }
211        }        }
212      }      }
213    else    else
# Line 209  if (caseless) Line 217  if (caseless)
217    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
218    is no UCP support. */    is no UCP support. */
219      {      {
     if (eptr + length > md->end_subject) return -1;  
220      while (length-- > 0)      while (length-- > 0)
221        { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }        {
222      }        pcre_uchar cc, cp;
223          if (eptr >= md->end_subject) return -2;   /* Partial match */
224          cc = RAWUCHARTEST(eptr);
225          cp = RAWUCHARTEST(p);
226          if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
227          p++;
228          eptr++;
229          }
230        }
231    }    }
232    
233  /* In the caseful case, we can just compare the bytes, whether or not we  /* In the caseful case, we can just compare the bytes, whether or not we
234  are in UTF-8 mode. */  are in UTF-8 mode. */
235    
236  else  else
237    {    {
238    if (eptr + length > md->end_subject) return -1;    while (length-- > 0)
239    while (length-- > 0) if (*p++ != *eptr++) return -1;      {
240        if (eptr >= md->end_subject) return -2;   /* Partial match */
241        if (RAWUCHARINCTEST(p) != RAWUCHARINCTEST(eptr)) return -1;
242        }
243    }    }
244    
245  return eptr - eptr_start;  return (int)(eptr - eptr_start);
246  }  }
247    
248    
# Line 276  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 294  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
294         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
295         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
296         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
297         RM61,  RM62, RM63};         RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
298    
299  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
300  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 289  actually used in this definition. */ Line 307  actually used in this definition. */
307  #define RMATCH(ra,rb,rc,rd,re,rw) \  #define RMATCH(ra,rb,rc,rd,re,rw) \
308    { \    { \
309    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
310    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1); \    rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
311    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
312    }    }
313  #define RRETURN(ra) \  #define RRETURN(ra) \
314    { \    { \
315    printf("match() returned %d from line %d ", ra, __LINE__); \    printf("match() returned %d from line %d\n", ra, __LINE__); \
316    return ra; \    return ra; \
317    }    }
318  #else  #else
319  #define RMATCH(ra,rb,rc,rd,re,rw) \  #define RMATCH(ra,rb,rc,rd,re,rw) \
320    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1)    rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
321  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
322  #endif  #endif
323    
# Line 314  argument of match(), which never changes Line 332  argument of match(), which never changes
332    
333  #define RMATCH(ra,rb,rc,rd,re,rw)\  #define RMATCH(ra,rb,rc,rd,re,rw)\
334    {\    {\
335    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = frame->Xnextframe;\
336    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\    if (newframe == NULL)\
337    frame->Xwhere = rw; \      {\
338        newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
339        if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
340        newframe->Xnextframe = NULL;\
341        frame->Xnextframe = newframe;\
342        }\
343      frame->Xwhere = rw;\
344    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
345    newframe->Xecode = rb;\    newframe->Xecode = rb;\
346    newframe->Xmstart = mstart;\    newframe->Xmstart = mstart;\
   newframe->Xmarkptr = markptr;\  
347    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
348    newframe->Xeptrb = re;\    newframe->Xeptrb = re;\
349    newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
# Line 336  argument of match(), which never changes Line 359  argument of match(), which never changes
359    {\    {\
360    heapframe *oldframe = frame;\    heapframe *oldframe = frame;\
361    frame = oldframe->Xprevframe;\    frame = oldframe->Xprevframe;\
   (pcre_stack_free)(oldframe);\  
362    if (frame != NULL)\    if (frame != NULL)\
363      {\      {\
364      rrc = ra;\      rrc = ra;\
# Line 350  argument of match(), which never changes Line 372  argument of match(), which never changes
372    
373  typedef struct heapframe {  typedef struct heapframe {
374    struct heapframe *Xprevframe;    struct heapframe *Xprevframe;
375      struct heapframe *Xnextframe;
376    
377    /* Function arguments that may change */    /* Function arguments that may change */
378    
379    USPTR Xeptr;    PCRE_PUCHAR Xeptr;
380    const uschar *Xecode;    const pcre_uchar *Xecode;
381    USPTR Xmstart;    PCRE_PUCHAR Xmstart;
   USPTR Xmarkptr;  
382    int Xoffset_top;    int Xoffset_top;
383    eptrblock *Xeptrb;    eptrblock *Xeptrb;
384    unsigned int Xrdepth;    unsigned int Xrdepth;
385    
386    /* Function local variables */    /* Function local variables */
387    
388    USPTR Xcallpat;    PCRE_PUCHAR Xcallpat;
389  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
390    USPTR Xcharptr;    PCRE_PUCHAR Xcharptr;
391  #endif  #endif
392    USPTR Xdata;    PCRE_PUCHAR Xdata;
393    USPTR Xnext;    PCRE_PUCHAR Xnext;
394    USPTR Xpp;    PCRE_PUCHAR Xpp;
395    USPTR Xprev;    PCRE_PUCHAR Xprev;
396    USPTR Xsaved_eptr;    PCRE_PUCHAR Xsaved_eptr;
397    
398    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
399    
# Line 381  typedef struct heapframe { Line 403  typedef struct heapframe {
403    
404  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
405    int Xprop_type;    int Xprop_type;
406    int Xprop_value;    unsigned int Xprop_value;
407    int Xprop_fail_result;    int Xprop_fail_result;
   int Xprop_category;  
   int Xprop_chartype;  
   int Xprop_script;  
408    int Xoclength;    int Xoclength;
409    uschar Xocchars[8];    pcre_uchar Xocchars[6];
410  #endif  #endif
411    
412    int Xcodelink;    int Xcodelink;
# Line 429  returns a negative (error) response, the Line 448  returns a negative (error) response, the
448  same response. */  same response. */
449    
450  /* These macros pack up tests that are used for partial matching, and which  /* These macros pack up tests that are used for partial matching, and which
451  appears several times in the code. We set the "hit end" flag if the pointer is  appear several times in the code. We set the "hit end" flag if the pointer is
452  at the end of the subject and also past the start of the subject (i.e.  at the end of the subject and also past the start of the subject (i.e.
453  something has been matched). For hard partial matching, we then return  something has been matched). For hard partial matching, we then return
454  immediately. The second one is used when we already know we are past the end of  immediately. The second one is used when we already know we are past the end of
# Line 440  the subject. */ Line 459  the subject. */
459        eptr > md->start_used_ptr) \        eptr > md->start_used_ptr) \
460      { \      { \
461      md->hitend = TRUE; \      md->hitend = TRUE; \
462      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
463      }      }
464    
465  #define SCHECK_PARTIAL()\  #define SCHECK_PARTIAL()\
466    if (md->partial != 0 && eptr > md->start_used_ptr) \    if (md->partial != 0 && eptr > md->start_used_ptr) \
467      { \      { \
468      md->hitend = TRUE; \      md->hitend = TRUE; \
469      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
470      }      }
471    
472    
473  /* Performance note: It might be tempting to extract commonly used fields from  /* Performance note: It might be tempting to extract commonly used fields from
474  the md structure (e.g. utf8, end_subject) into individual variables to improve  the md structure (e.g. utf, end_subject) into individual variables to improve
475  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
476  made performance worse.  made performance worse.
477    
# Line 461  Arguments: Line 480  Arguments:
480     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
481     mstart      pointer to the current match start position (can be modified     mstart      pointer to the current match start position (can be modified
482                   by encountering \K)                   by encountering \K)
    markptr     pointer to the most recent MARK name, or NULL  
483     offset_top  current top pointer     offset_top  current top pointer
484     md          pointer to "static" info for the match     md          pointer to "static" info for the match
485     eptrb       pointer to chain of blocks containing eptr at start of     eptrb       pointer to chain of blocks containing eptr at start of
# Line 476  Returns:       MATCH_MATCH if matched Line 494  Returns:       MATCH_MATCH if matched
494  */  */
495    
496  static int  static int
497  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
498    const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb,    PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
499    unsigned int rdepth)    unsigned int rdepth)
500  {  {
501  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
# Line 486  so they can be ordinary variables in all Line 504  so they can be ordinary variables in all
504    
505  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
506  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
507  register unsigned int c;   /* Character values not kept over RMATCH() calls */  register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
508  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf;         /* Local copy of UTF flag for speed */
509    
510  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
511  BOOL caseless;  BOOL caseless;
512  int condcode;  int condcode;
513    
514  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
515  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame". We set up the top-level
516  heap storage. Set up the top-level frame here; others are obtained from the  frame on the stack here; subsequent instantiations are obtained from the heap
517  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  whenever RMATCH() does a "recursion". See the macro definitions above. Putting
518    the top-level on the stack rather than malloc-ing them all gives a performance
519    boost in many cases where there is not much "recursion". */
520    
521  #ifdef NO_RECURSE  #ifdef NO_RECURSE
522  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)md->match_frames_base;
 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  
 frame->Xprevframe = NULL;            /* Marks the top level */  
523    
524  /* Copy in the original argument variables */  /* Copy in the original argument variables */
525    
526  frame->Xeptr = eptr;  frame->Xeptr = eptr;
527  frame->Xecode = ecode;  frame->Xecode = ecode;
528  frame->Xmstart = mstart;  frame->Xmstart = mstart;
 frame->Xmarkptr = markptr;  
529  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
530  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
531  frame->Xrdepth = rdepth;  frame->Xrdepth = rdepth;
# Line 522  HEAP_RECURSE: Line 539  HEAP_RECURSE:
539  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
540  #define ecode              frame->Xecode  #define ecode              frame->Xecode
541  #define mstart             frame->Xmstart  #define mstart             frame->Xmstart
 #define markptr            frame->Xmarkptr  
542  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
543  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
544  #define rdepth             frame->Xrdepth  #define rdepth             frame->Xrdepth
545    
546  /* Ditto for the local variables */  /* Ditto for the local variables */
547    
548  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
549  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
550  #endif  #endif
551  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
# Line 550  HEAP_RECURSE: Line 566  HEAP_RECURSE:
566  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
567  #define prop_value         frame->Xprop_value  #define prop_value         frame->Xprop_value
568  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
 #define prop_category      frame->Xprop_category  
 #define prop_chartype      frame->Xprop_chartype  
 #define prop_script        frame->Xprop_script  
569  #define oclength           frame->Xoclength  #define oclength           frame->Xoclength
570  #define occhars            frame->Xocchars  #define occhars            frame->Xocchars
571  #endif  #endif
# Line 590  declarations can be cut out in a block. Line 603  declarations can be cut out in a block.
603  below are for variables that do not have to be preserved over a recursive call  below are for variables that do not have to be preserved over a recursive call
604  to RMATCH(). */  to RMATCH(). */
605    
606  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
607  const uschar *charptr;  const pcre_uchar *charptr;
608  #endif  #endif
609  const uschar *callpat;  const pcre_uchar *callpat;
610  const uschar *data;  const pcre_uchar *data;
611  const uschar *next;  const pcre_uchar *next;
612  USPTR         pp;  PCRE_PUCHAR       pp;
613  const uschar *prev;  const pcre_uchar *prev;
614  USPTR         saved_eptr;  PCRE_PUCHAR       saved_eptr;
615    
616  recursion_info new_recursive;  recursion_info new_recursive;
617    
618  BOOL cur_is_word;  BOOL cur_is_word;
619  BOOL condition;  BOOL condition;
620  BOOL prev_is_word;  BOOL prev_is_word;
621    
622  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
623  int prop_type;  int prop_type;
624  int prop_value;  unsigned int prop_value;
625  int prop_fail_result;  int prop_fail_result;
 int prop_category;  
 int prop_chartype;  
 int prop_script;  
626  int oclength;  int oclength;
627  uschar occhars[8];  pcre_uchar occhars[6];
628  #endif  #endif
629    
630  int codelink;  int codelink;
# Line 622  int ctype; Line 632  int ctype;
632  int length;  int length;
633  int max;  int max;
634  int min;  int min;
635  int number;  unsigned int number;
636  int offset;  int offset;
637  int op;  pcre_uchar op;
638  int save_capture_last;  int save_capture_last;
639  int save_offset1, save_offset2, save_offset3;  int save_offset1, save_offset2, save_offset3;
640  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
641    
642  eptrblock newptrb;  eptrblock newptrb;
643    
644    /* There is a special fudge for calling match() in a way that causes it to
645    measure the size of its basic stack frame when the stack is being used for
646    recursion. The second argument (ecode) being NULL triggers this behaviour. It
647    cannot normally ever be NULL. The return is the negated value of the frame
648    size. */
649    
650    if (ecode == NULL)
651      {
652      if (rdepth == 0)
653        return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
654      else
655        {
656        int len = (char *)&rdepth - (char *)eptr;
657        return (len > 0)? -len : len;
658        }
659      }
660  #endif     /* NO_RECURSE */  #endif     /* NO_RECURSE */
661    
662  /* To save space on the stack and in the heap frame, I have doubled up on some  /* To save space on the stack and in the heap frame, I have doubled up on some
663  of the local variables that are used only in localised parts of the code, but  of the local variables that are used only in localised parts of the code, but
664  still need to be preserved over recursive calls of match(). These macros define  still need to be preserved over recursive calls of match(). These macros define
665  the alternative names that are used. */  the alternative names that are used. */
666    
667  #define allow_zero    cur_is_word  #define allow_zero    cur_is_word
# Line 642  the alternative names that are used. */ Line 669  the alternative names that are used. */
669  #define code_offset   codelink  #define code_offset   codelink
670  #define condassert    condition  #define condassert    condition
671  #define matched_once  prev_is_word  #define matched_once  prev_is_word
672    #define foc           number
673    #define save_mark     data
674    
675  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
676  variables. */  variables. */
# Line 667  defined). However, RMATCH isn't like a f Line 696  defined). However, RMATCH isn't like a f
696  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
697  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
698    
699  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
700  utf8 = md->utf8;       /* Local copy of the flag */  utf = md->utf;       /* Local copy of the flag */
701  #else  #else
702  utf8 = FALSE;  utf = FALSE;
703  #endif  #endif
704    
705  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
# Line 680  if (md->match_call_count++ >= md->match_ Line 709  if (md->match_call_count++ >= md->match_
709  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
710    
711  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
712  string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is  string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
713  done this way to save having to use another function argument, which would take  done this way to save having to use another function argument, which would take
714  up space on the stack. See also MATCH_CONDASSERT below.  up space on the stack. See also MATCH_CONDASSERT below.
715    
716  When MATCH_CBEGROUP is set, add the current subject pointer to the chain of  When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
# Line 705  for (;;) Line 734  for (;;)
734    {    {
735    minimize = possessive = FALSE;    minimize = possessive = FALSE;
736    op = *ecode;    op = *ecode;
737    
738    switch(op)    switch(op)
739      {      {
740      case OP_MARK:      case OP_MARK:
741      markptr = ecode + 2;      md->nomatch_mark = ecode + 2;
742      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      md->mark = NULL;    /* In case previously set by assertion */
743        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
744        eptrb, RM55);        eptrb, RM55);
745        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
746             md->mark == NULL) md->mark = ecode + 2;
747    
748      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
749      argument, and we must check whether that argument matches this MARK's      argument, and we must check whether that argument matches this MARK's
# Line 720  for (;;) Line 752  for (;;)
752      position and return MATCH_SKIP. Otherwise, pass back the return code      position and return MATCH_SKIP. Otherwise, pass back the return code
753      unaltered. */      unaltered. */
754    
755      if (rrc == MATCH_SKIP_ARG &&      else if (rrc == MATCH_SKIP_ARG &&
756          strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)          STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
757        {        {
758        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
759        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
760        }        }
   
     if (md->mark == NULL) md->mark = markptr;  
761      RRETURN(rrc);      RRETURN(rrc);
762    
763      case OP_FAIL:      case OP_FAIL:
764      MRRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
765    
766      /* COMMIT overrides PRUNE, SKIP, and THEN */      /* COMMIT overrides PRUNE, SKIP, and THEN */
767    
768      case OP_COMMIT:      case OP_COMMIT:
769      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
770        eptrb, RM52);        eptrb, RM52);
771      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
772          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
773          rrc != MATCH_THEN)          rrc != MATCH_THEN)
774        RRETURN(rrc);        RRETURN(rrc);
775      MRRETURN(MATCH_COMMIT);      RRETURN(MATCH_COMMIT);
776    
777      /* PRUNE overrides THEN */      /* PRUNE overrides THEN */
778    
779      case OP_PRUNE:      case OP_PRUNE:
780      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
781        eptrb, RM51);        eptrb, RM51);
782      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
783      MRRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
784    
785      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
786      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      md->nomatch_mark = ecode + 2;
787        md->mark = NULL;    /* In case previously set by assertion */
788        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
789        eptrb, RM56);        eptrb, RM56);
790        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
791             md->mark == NULL) md->mark = ecode + 2;
792      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
     md->mark = ecode + 2;  
793      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
794    
795      /* SKIP overrides PRUNE and THEN */      /* SKIP overrides PRUNE and THEN */
796    
797      case OP_SKIP:      case OP_SKIP:
798      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
799        eptrb, RM53);        eptrb, RM53);
800      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
801        RRETURN(rrc);        RRETURN(rrc);
802      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
803      MRRETURN(MATCH_SKIP);      RRETURN(MATCH_SKIP);
804    
805        /* Note that, for Perl compatibility, SKIP with an argument does NOT set
806        nomatch_mark. There is a flag that disables this opcode when re-matching a
807        pattern that ended with a SKIP for which there was not a matching MARK. */
808    
809      case OP_SKIP_ARG:      case OP_SKIP_ARG:
810      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      if (md->ignore_skip_arg)
811          {
812          ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
813          break;
814          }
815        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
816        eptrb, RM57);        eptrb, RM57);
817      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
818        RRETURN(rrc);        RRETURN(rrc);
819    
820      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
821      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
822      caught by a matching MARK, or get to the top, where it is treated the same      caught by a matching MARK, or get to the top, where it causes a rematch
823      as PRUNE. */      with the md->ignore_skip_arg flag set. */
824    
825      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
826      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
827    
828      /* For THEN (and THEN_ARG) we pass back the address of the bracket or      /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
829      the alt that is at the start of the current branch. This makes it possible      the branch in which it occurs can be determined. Overload the start of
830      to skip back past alternatives that precede the THEN within the current      match pointer to do this. */
     branch. */  
831    
832      case OP_THEN:      case OP_THEN:
833      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
834        eptrb, RM54);        eptrb, RM54);
835      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
836      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode;
837      MRRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
838    
839      case OP_THEN_ARG:      case OP_THEN_ARG:
840      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],      md->nomatch_mark = ecode + 2;
841        offset_top, md, eptrb, RM58);      md->mark = NULL;    /* In case previously set by assertion */
842        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
843          md, eptrb, RM58);
844        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
845             md->mark == NULL) md->mark = ecode + 2;
846      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
847      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode;
     md->mark = ecode + LINK_SIZE + 2;  
848      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
849    
850        /* Handle an atomic group that does not contain any capturing parentheses.
851        This can be handled like an assertion. Prior to 8.13, all atomic groups
852        were handled this way. In 8.13, the code was changed as below for ONCE, so
853        that backups pass through the group and thereby reset captured values.
854        However, this uses a lot more stack, so in 8.20, atomic groups that do not
855        contain any captures generate OP_ONCE_NC, which can be handled in the old,
856        less stack intensive way.
857    
858        Check the alternative branches in turn - the matching won't pass the KET
859        for this kind of subpattern. If any one branch matches, we carry on as at
860        the end of a normal bracket, leaving the subject pointer, but resetting
861        the start-of-match value in case it was changed by \K. */
862    
863        case OP_ONCE_NC:
864        prev = ecode;
865        saved_eptr = eptr;
866        save_mark = md->mark;
867        do
868          {
869          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
870          if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
871            {
872            mstart = md->start_match_ptr;
873            break;
874            }
875          if (rrc == MATCH_THEN)
876            {
877            next = ecode + GET(ecode,1);
878            if (md->start_match_ptr < next &&
879                (*ecode == OP_ALT || *next == OP_ALT))
880              rrc = MATCH_NOMATCH;
881            }
882    
883          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
884          ecode += GET(ecode,1);
885          md->mark = save_mark;
886          }
887        while (*ecode == OP_ALT);
888    
889        /* If hit the end of the group (which could be repeated), fail */
890    
891        if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
892    
893        /* Continue as from after the group, updating the offsets high water
894        mark, since extracts may have been taken. */
895    
896        do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
897    
898        offset_top = md->end_offset_top;
899        eptr = md->end_match_ptr;
900    
901        /* For a non-repeating ket, just continue at this level. This also
902        happens for a repeating ket if no characters were matched in the group.
903        This is the forcible breaking of infinite loops as implemented in Perl
904        5.005. */
905    
906        if (*ecode == OP_KET || eptr == saved_eptr)
907          {
908          ecode += 1+LINK_SIZE;
909          break;
910          }
911    
912        /* The repeating kets try the rest of the pattern or restart from the
913        preceding bracket, in the appropriate order. The second "call" of match()
914        uses tail recursion, to avoid using another stack frame. */
915    
916        if (*ecode == OP_KETRMIN)
917          {
918          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
919          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
920          ecode = prev;
921          goto TAIL_RECURSE;
922          }
923        else  /* OP_KETRMAX */
924          {
925          RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
926          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
927          ecode += 1 + LINK_SIZE;
928          goto TAIL_RECURSE;
929          }
930        /* Control never gets here */
931    
932      /* Handle a capturing bracket, other than those that are possessive with an      /* Handle a capturing bracket, other than those that are possessive with an
933      unlimited repeat. If there is space in the offset vector, save the current      unlimited repeat. If there is space in the offset vector, save the current
934      subject position in the working slot at the top of the vector. We mustn't      subject position in the working slot at the top of the vector. We mustn't
935      change the current values of the data slot, because they may be set from a      change the current values of the data slot, because they may be set from a
936      previous iteration of this group, and be referred to by a reference inside      previous iteration of this group, and be referred to by a reference inside
937      the group. If we fail to match, we need to restore this value and also the      the group. A failure to match might occur after the group has succeeded,
938      values of the final offsets, in case they were set by a previous iteration      if something later on doesn't match. For this reason, we need to restore
939      of the same bracket.      the working value and also the values of the final offsets, in case they
940        were set by a previous iteration of the same bracket.
941    
942      If there isn't enough space in the offset vector, treat this as if it were      If there isn't enough space in the offset vector, treat this as if it were
943      a non-capturing bracket. Don't worry about setting the flag for the error      a non-capturing bracket. Don't worry about setting the flag for the error
# Line 820  for (;;) Line 947  for (;;)
947      case OP_SCBRA:      case OP_SCBRA:
948      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
949      offset = number << 1;      offset = number << 1;
950    
951  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
952      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
953      printf("subject=");      printf("subject=");
# Line 834  for (;;) Line 961  for (;;)
961        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
962        save_offset3 = md->offset_vector[md->offset_end - number];        save_offset3 = md->offset_vector[md->offset_end - number];
963        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
964          save_mark = md->mark;
965    
966        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
967        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
# Line 841  for (;;) Line 969  for (;;)
969    
970        for (;;)        for (;;)
971          {          {
972          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
973          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
974            eptrb, RM1);            eptrb, RM1);
975          if (rrc != MATCH_NOMATCH &&          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
976              (rrc != MATCH_THEN || md->start_match_ptr != ecode))  
977            RRETURN(rrc);          /* If we backed up to a THEN, check whether it is within the current
978            branch by comparing the address of the THEN that is passed back with
979            the end of the branch. If it is within the current branch, and the
980            branch is one of two or more alternatives (it either starts or ends
981            with OP_ALT), we have reached the limit of THEN's action, so convert
982            the return code to NOMATCH, which will cause normal backtracking to
983            happen from now on. Otherwise, THEN is passed back to an outer
984            alternative. This implements Perl's treatment of parenthesized groups,
985            where a group not containing | does not affect the current alternative,
986            that is, (X) is NOT the same as (X|(*F)). */
987    
988            if (rrc == MATCH_THEN)
989              {
990              next = ecode + GET(ecode,1);
991              if (md->start_match_ptr < next &&
992                  (*ecode == OP_ALT || *next == OP_ALT))
993                rrc = MATCH_NOMATCH;
994              }
995    
996            /* Anything other than NOMATCH is passed back. */
997    
998            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
999          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
1000          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
1001          if (*ecode != OP_ALT) break;          md->mark = save_mark;
1002            if (*ecode != OP_ALT) break;
1003          }          }
1004    
1005        DPRINTF(("bracket %d failed\n", number));        DPRINTF(("bracket %d failed\n", number));
   
1006        md->offset_vector[offset] = save_offset1;        md->offset_vector[offset] = save_offset1;
1007        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
1008        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
1009    
1010        if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;        /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
1011        RRETURN(MATCH_NOMATCH);  
1012          RRETURN(rrc);
1013        }        }
1014    
1015      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
# Line 873  for (;;) Line 1023  for (;;)
1023      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1024      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1025    
1026      /* Non-capturing bracket, except for possessive with unlimited repeat. Loop      /* Non-capturing or atomic group, except for possessive with unlimited
1027      for all the alternatives. When we get to the final alternative within the      repeat and ONCE group with no captures. Loop for all the alternatives.
     brackets, we used to return the result of a recursive call to match()  
     whatever happened so it was possible to reduce stack usage by turning this  
     into a tail recursion, except in the case of a possibly empty group.  
     However, now that there is the possiblity of (*THEN) occurring in the final  
     alternative, this optimization is no longer possible. */  
1028    
1029        When we get to the final alternative within the brackets, we used to return
1030        the result of a recursive call to match() whatever happened so it was
1031        possible to reduce stack usage by turning this into a tail recursion,
1032        except in the case of a possibly empty group. However, now that there is
1033        the possiblity of (*THEN) occurring in the final alternative, this
1034        optimization is no longer always possible.
1035    
1036        We can optimize if we know there are no (*THEN)s in the pattern; at present
1037        this is the best that can be done.
1038    
1039        MATCH_ONCE is returned when the end of an atomic group is successfully
1040        reached, but subsequent matching fails. It passes back up the tree (causing
1041        captured values to be reset) until the original atomic group level is
1042        reached. This is tested by comparing md->once_target with the start of the
1043        group. At this point, the return is converted into MATCH_NOMATCH so that
1044        previous backup points can be taken. */
1045    
1046        case OP_ONCE:
1047      case OP_BRA:      case OP_BRA:
1048      case OP_SBRA:      case OP_SBRA:
1049      DPRINTF(("start non-capturing bracket\n"));      DPRINTF(("start non-capturing bracket\n"));
1050    
1051      for (;;)      for (;;)
1052        {        {
1053        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA || op == OP_ONCE)
1054        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,          md->match_function_type = MATCH_CBEGROUP;
1055    
1056          /* If this is not a possibly empty group, and there are no (*THEN)s in
1057          the pattern, and this is the final alternative, optimize as described
1058          above. */
1059    
1060          else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1061            {
1062            ecode += PRIV(OP_lengths)[*ecode];
1063            goto TAIL_RECURSE;
1064            }
1065    
1066          /* In all other cases, we have to make another call to match(). */
1067    
1068          save_mark = md->mark;
1069          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1070          RM2);          RM2);
1071        if (rrc != MATCH_NOMATCH &&  
1072            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* See comment in the code for capturing groups above about handling
1073          THEN. */
1074    
1075          if (rrc == MATCH_THEN)
1076            {
1077            next = ecode + GET(ecode,1);
1078            if (md->start_match_ptr < next &&
1079                (*ecode == OP_ALT || *next == OP_ALT))
1080              rrc = MATCH_NOMATCH;
1081            }
1082    
1083          if (rrc != MATCH_NOMATCH)
1084            {
1085            if (rrc == MATCH_ONCE)
1086              {
1087              const pcre_uchar *scode = ecode;
1088              if (*scode != OP_ONCE)           /* If not at start, find it */
1089                {
1090                while (*scode == OP_ALT) scode += GET(scode, 1);
1091                scode -= GET(scode, 1);
1092                }
1093              if (md->once_target == scode) rrc = MATCH_NOMATCH;
1094              }
1095          RRETURN(rrc);          RRETURN(rrc);
1096            }
1097        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1098        if (*ecode != OP_ALT) break;        md->mark = save_mark;
1099          if (*ecode != OP_ALT) break;
1100        }        }
1101    
     if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;  
1102      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
1103    
1104      /* Handle possessive capturing brackets with an unlimited repeat. We come      /* Handle possessive capturing brackets with an unlimited repeat. We come
1105      here from BRAZERO with allow_zero set TRUE. The offset_vector values are      here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1106      handled similarly to the normal case above. However, the matching is      handled similarly to the normal case above. However, the matching is
1107      different. The end of these brackets will always be OP_KETRPOS, which      different. The end of these brackets will always be OP_KETRPOS, which
1108      returns MATCH_KETRPOS without going further in the pattern. By this means      returns MATCH_KETRPOS without going further in the pattern. By this means
1109      we can handle the group by iteration rather than recursion, thereby      we can handle the group by iteration rather than recursion, thereby
1110      reducing the amount of stack needed. */      reducing the amount of stack needed. */
1111    
1112      case OP_CBRAPOS:      case OP_CBRAPOS:
1113      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1114      allow_zero = FALSE;      allow_zero = FALSE;
1115    
1116      POSSESSIVE_CAPTURE:      POSSESSIVE_CAPTURE:
1117      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
1118      offset = number << 1;      offset = number << 1;
# Line 925  for (;;) Line 1127  for (;;)
1127      if (offset < md->offset_max)      if (offset < md->offset_max)
1128        {        {
1129        matched_once = FALSE;        matched_once = FALSE;
1130        code_offset = ecode - md->start_code;        code_offset = (int)(ecode - md->start_code);
1131    
1132        save_offset1 = md->offset_vector[offset];        save_offset1 = md->offset_vector[offset];
1133        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
# Line 933  for (;;) Line 1135  for (;;)
1135        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
1136    
1137        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1138    
1139        /* Each time round the loop, save the current subject position for use        /* Each time round the loop, save the current subject position for use
1140        when the group matches. For MATCH_MATCH, the group has matched, so we        when the group matches. For MATCH_MATCH, the group has matched, so we
1141        restart it with a new subject starting position, remembering that we had        restart it with a new subject starting position, remembering that we had
1142        at least one match. For MATCH_NOMATCH, carry on with the alternatives, as        at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1143        usual. If we haven't matched any alternatives in any iteration, check to        usual. If we haven't matched any alternatives in any iteration, check to
1144        see if a previous iteration matched. If so, the group has matched;        see if a previous iteration matched. If so, the group has matched;
1145        continue from afterwards. Otherwise it has failed; restore the previous        continue from afterwards. Otherwise it has failed; restore the previous
1146        capture values before returning NOMATCH. */        capture values before returning NOMATCH. */
1147    
1148        for (;;)        for (;;)
1149          {          {
1150          md->offset_vector[md->offset_end - number] =          md->offset_vector[md->offset_end - number] =
1151            (int)(eptr - md->start_subject);            (int)(eptr - md->start_subject);
1152          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1153          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1154            eptrb, RM63);            eptrb, RM63);
1155          if (rrc == MATCH_KETRPOS)          if (rrc == MATCH_KETRPOS)
1156            {            {
1157            offset_top = md->end_offset_top;            offset_top = md->end_offset_top;
1158            eptr = md->end_match_ptr;            eptr = md->end_match_ptr;
1159            ecode = md->start_code + code_offset;            ecode = md->start_code + code_offset;
1160            save_capture_last = md->capture_last;            save_capture_last = md->capture_last;
1161            matched_once = TRUE;            matched_once = TRUE;
1162            continue;            continue;
1163            }            }
1164          if (rrc != MATCH_NOMATCH &&  
1165              (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* See comment in the code for capturing groups above about handling
1166            RRETURN(rrc);          THEN. */
1167    
1168            if (rrc == MATCH_THEN)
1169              {
1170              next = ecode + GET(ecode,1);
1171              if (md->start_match_ptr < next &&
1172                  (*ecode == OP_ALT || *next == OP_ALT))
1173                rrc = MATCH_NOMATCH;
1174              }
1175    
1176            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1177          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
1178          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
1179          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
1180          }          }
1181    
1182        if (!matched_once)        if (!matched_once)
1183          {          {
1184          md->offset_vector[offset] = save_offset1;          md->offset_vector[offset] = save_offset1;
1185          md->offset_vector[offset+1] = save_offset2;          md->offset_vector[offset+1] = save_offset2;
1186          md->offset_vector[md->offset_end - number] = save_offset3;          md->offset_vector[md->offset_end - number] = save_offset3;
1187          }          }
1188    
       if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;  
1189        if (allow_zero || matched_once)        if (allow_zero || matched_once)
1190          {          {
1191          ecode += 1 + LINK_SIZE;          ecode += 1 + LINK_SIZE;
1192          break;          break;
1193          }          }
1194    
1195        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1196        }        }
1197    
1198      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1199      as a non-capturing bracket. */      as a non-capturing bracket. */
1200    
# Line 995  for (;;) Line 1206  for (;;)
1206      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1207      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1208    
1209      /* Non-capturing possessive bracket with unlimited repeat. We come here      /* Non-capturing possessive bracket with unlimited repeat. We come here
1210      from BRAZERO with allow_zero = TRUE. The code is similar to the above,      from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1211      without the capturing complication. It is written out separately for speed      without the capturing complication. It is written out separately for speed
1212      and cleanliness. */      and cleanliness. */
1213    
1214      case OP_BRAPOS:      case OP_BRAPOS:
1215      case OP_SBRAPOS:      case OP_SBRAPOS:
1216      allow_zero = FALSE;      allow_zero = FALSE;
1217    
1218      POSSESSIVE_NON_CAPTURE:      POSSESSIVE_NON_CAPTURE:
1219      matched_once = FALSE;      matched_once = FALSE;
1220      code_offset = ecode - md->start_code;      code_offset = (int)(ecode - md->start_code);
1221    
1222      for (;;)      for (;;)
1223        {        {
1224        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1225        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1226          eptrb, RM48);          eptrb, RM48);
1227        if (rrc == MATCH_KETRPOS)        if (rrc == MATCH_KETRPOS)
1228          {          {
1229          offset_top = md->end_offset_top;          offset_top = md->end_offset_top;
1230          eptr = md->end_match_ptr;          eptr = md->end_match_ptr;
1231          ecode = md->start_code + code_offset;          ecode = md->start_code + code_offset;
1232          matched_once = TRUE;          matched_once = TRUE;
1233          continue;          continue;
1234          }          }
1235        if (rrc != MATCH_NOMATCH &&  
1236            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* See comment in the code for capturing groups above about handling
1237          RRETURN(rrc);        THEN. */
1238    
1239          if (rrc == MATCH_THEN)
1240            {
1241            next = ecode + GET(ecode,1);
1242            if (md->start_match_ptr < next &&
1243                (*ecode == OP_ALT || *next == OP_ALT))
1244              rrc = MATCH_NOMATCH;
1245            }
1246    
1247          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1248        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1249        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1250        }        }
1251    
1252      if (matched_once || allow_zero)      if (matched_once || allow_zero)
1253        {        {
1254        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1255        break;        break;
1256        }        }
1257      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
1258    
1259      /* Control never reaches here. */      /* Control never reaches here. */
# Line 1051  for (;;) Line 1272  for (;;)
1272    
1273      if (ecode[LINK_SIZE+1] == OP_CALLOUT)      if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1274        {        {
1275        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
1276          {          {
1277          pcre_callout_block cb;          PUBL(callout_block) cb;
1278          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1279          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1280          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1281    #if defined COMPILE_PCRE8
1282          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
1283    #elif defined COMPILE_PCRE16
1284            cb.subject          = (PCRE_SPTR16)md->start_subject;
1285    #elif defined COMPILE_PCRE32
1286            cb.subject          = (PCRE_SPTR32)md->start_subject;
1287    #endif
1288          cb.subject_length   = (int)(md->end_subject - md->start_subject);          cb.subject_length   = (int)(md->end_subject - md->start_subject);
1289          cb.start_match      = (int)(mstart - md->start_subject);          cb.start_match      = (int)(mstart - md->start_subject);
1290          cb.current_position = (int)(eptr - md->start_subject);          cb.current_position = (int)(eptr - md->start_subject);
# Line 1066  for (;;) Line 1293  for (;;)
1293          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1294          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1295          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1296          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);          cb.mark             = md->nomatch_mark;
1297            if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1298          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1299          }          }
1300        ecode += _pcre_OP_lengths[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1301        }        }
1302    
1303      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1085  for (;;) Line 1313  for (;;)
1313          }          }
1314        else        else
1315          {          {
1316          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/          unsigned int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1317          condition =  (recno == RREF_ANY || recno == md->recursive->group_num);          condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1318    
1319          /* If the test is for recursion into a specific subpattern, and it is          /* If the test is for recursion into a specific subpattern, and it is
1320          false, but the test was set up by name, scan the table to see if the          false, but the test was set up by name, scan the table to see if the
1321          name refers to any other numbers, and test them. The condition is true          name refers to any other numbers, and test them. The condition is true
1322          if any one is set. */          if any one is set. */
1323    
1324          if (!condition && condcode == OP_NRREF && recno != RREF_ANY)          if (!condition && condcode == OP_NRREF)
1325            {            {
1326            uschar *slotA = md->name_table;            pcre_uchar *slotA = md->name_table;
1327            for (i = 0; i < md->name_count; i++)            for (i = 0; i < md->name_count; i++)
1328              {              {
1329              if (GET2(slotA, 0) == recno) break;              if (GET2(slotA, 0) == recno) break;
# Line 1108  for (;;) Line 1336  for (;;)
1336    
1337            if (i < md->name_count)            if (i < md->name_count)
1338              {              {
1339              uschar *slotB = slotA;              pcre_uchar *slotB = slotA;
1340              while (slotB > md->name_table)              while (slotB > md->name_table)
1341                {                {
1342                slotB -= md->name_entry_size;                slotB -= md->name_entry_size;
1343                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1344                  {                  {
1345                  condition = GET2(slotB, 0) == md->recursive->group_num;                  condition = GET2(slotB, 0) == md->recursive->group_num;
1346                  if (condition) break;                  if (condition) break;
# Line 1128  for (;;) Line 1356  for (;;)
1356                for (i++; i < md->name_count; i++)                for (i++; i < md->name_count; i++)
1357                  {                  {
1358                  slotB += md->name_entry_size;                  slotB += md->name_entry_size;
1359                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                  if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1360                    {                    {
1361                    condition = GET2(slotB, 0) == md->recursive->group_num;                    condition = GET2(slotB, 0) == md->recursive->group_num;
1362                    if (condition) break;                    if (condition) break;
# Line 1141  for (;;) Line 1369  for (;;)
1369    
1370          /* Chose branch according to the condition */          /* Chose branch according to the condition */
1371    
1372          ecode += condition? 3 : GET(ecode, 1);          ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1373          }          }
1374        }        }
1375    
# Line 1157  for (;;) Line 1385  for (;;)
1385    
1386        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
1387          {          {
1388          int refno = offset >> 1;          unsigned int refno = offset >> 1;
1389          uschar *slotA = md->name_table;          pcre_uchar *slotA = md->name_table;
1390    
1391          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
1392            {            {
# Line 1172  for (;;) Line 1400  for (;;)
1400    
1401          if (i < md->name_count)          if (i < md->name_count)
1402            {            {
1403            uschar *slotB = slotA;            pcre_uchar *slotB = slotA;
1404            while (slotB > md->name_table)            while (slotB > md->name_table)
1405              {              {
1406              slotB -= md->name_entry_size;              slotB -= md->name_entry_size;
1407              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1408                {                {
1409                offset = GET2(slotB, 0) << 1;                offset = GET2(slotB, 0) << 1;
1410                condition = offset < offset_top &&                condition = offset < offset_top &&
# Line 1194  for (;;) Line 1422  for (;;)
1422              for (i++; i < md->name_count; i++)              for (i++; i < md->name_count; i++)
1423                {                {
1424                slotB += md->name_entry_size;                slotB += md->name_entry_size;
1425                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1426                  {                  {
1427                  offset = GET2(slotB, 0) << 1;                  offset = GET2(slotB, 0) << 1;
1428                  condition = offset < offset_top &&                  condition = offset < offset_top &&
# Line 1209  for (;;) Line 1437  for (;;)
1437    
1438        /* Chose branch according to the condition */        /* Chose branch according to the condition */
1439    
1440        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1441        }        }
1442    
1443      else if (condcode == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
# Line 1224  for (;;) Line 1452  for (;;)
1452    
1453      else      else
1454        {        {
1455        md->match_function_type = MATCH_CONDASSERT;        md->match_function_type = MATCH_CONDASSERT;
1456        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1457        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1458          {          {
1459            if (md->end_offset_top > offset_top)
1460              offset_top = md->end_offset_top;  /* Captures may have happened */
1461          condition = TRUE;          condition = TRUE;
1462          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1463          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1464          }          }
1465        else if (rrc != MATCH_NOMATCH &&  
1466                (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1467          assertion; it is therefore treated as NOMATCH. */
1468    
1469          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1470          {          {
1471          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1472          }          }
# Line 1244  for (;;) Line 1477  for (;;)
1477          }          }
1478        }        }
1479    
1480      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one, can
1481      we used to use tail recursion to avoid using another stack frame, except      use tail recursion to avoid using another stack frame, except when there is
1482      when there was unlimited repeat of a possibly empty group. However, that      unlimited repeat of a possibly empty group. In the latter case, a recursive
1483      strategy no longer works because of the possibilty of (*THEN) being      call to match() is always required, unless the second alternative doesn't
1484      encountered in the branch. A recursive call to match() is always required,      exist, in which case we can just plough on. Note that, for compatibility
1485      unless the second alternative doesn't exist, in which case we can just      with Perl, the | in a conditional group is NOT treated as creating two
1486      plough on. */      alternatives. If a THEN is encountered in the branch, it propagates out to
1487        the enclosing alternative (unless nested in a deeper set of alternatives,
1488        of course). */
1489    
1490      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1491        {        {
1492        if (op == OP_SCOND) md->match_function_type = MATCH_CBEGROUP;        if (op != OP_SCOND)
1493            {
1494            ecode += 1 + LINK_SIZE;
1495            goto TAIL_RECURSE;
1496            }
1497    
1498          md->match_function_type = MATCH_CBEGROUP;
1499        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
       if (rrc == MATCH_THEN && md->start_match_ptr == ecode)  
         rrc = MATCH_NOMATCH;  
1500        RRETURN(rrc);        RRETURN(rrc);
1501        }        }
1502      else                         /* Condition false & no alternative */  
1503         /* Condition false & no alternative; continue after the group. */
1504    
1505        else
1506        {        {
1507        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1508        }        }
# Line 1287  for (;;) Line 1529  for (;;)
1529        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1530        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1531        }        }
1532      ecode += 3;      ecode += 1 + IMM2_SIZE;
1533      break;      break;
1534    
1535    
1536      /* End of the pattern, either real or forced. If we are in a recursion, we      /* End of the pattern, either real or forced. */
     should restore the offsets appropriately, and if it's a top-level  
     recursion, continue from after the call. */  
1537    
     case OP_ACCEPT:  
     case OP_ASSERT_ACCEPT:  
1538      case OP_END:      case OP_END:
1539      if (md->recursive != NULL)      case OP_ACCEPT:
1540        {      case OP_ASSERT_ACCEPT:
       recursion_info *rec = md->recursive;  
       md->recursive = rec->prevrec;  
       memmove(md->offset_vector, rec->offset_save,  
         rec->saved_max * sizeof(int));  
       offset_top = rec->save_offset_top;  
       if (rec->group_num == 0)  
         {  
         ecode = rec->after_call;  
         break;  
         }  
       }  
1541    
1542      /* Otherwise, if we have matched an empty string, fail if not in an      /* If we have matched an empty string, fail if not in an assertion and not
1543      assertion and if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART      in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1544      is set and we have matched at the start of the subject. In both cases,      is set and we have matched at the start of the subject. In both cases,
1545      backtracking will then try other alternatives, if any. */      backtracking will then try other alternatives, if any. */
1546    
1547      else if (eptr == mstart && op != OP_ASSERT_ACCEPT &&      if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1548          (md->notempty ||           md->recursive == NULL &&
1549            (md->notempty_atstart &&           (md->notempty ||
1550              mstart == md->start_subject + md->start_offset)))             (md->notempty_atstart &&
1551        MRRETURN(MATCH_NOMATCH);               mstart == md->start_subject + md->start_offset)))
1552          RRETURN(MATCH_NOMATCH);
1553    
1554      /* Otherwise, we have a match. */      /* Otherwise, we have a match. */
1555    
1556      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
1557      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
1558      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1559    
1560      /* For some reason, the macros don't work properly if an expression is      /* For some reason, the macros don't work properly if an expression is
1561      given as the argument to MRRETURN when the heap is in use. */      given as the argument to RRETURN when the heap is in use. */
1562    
1563      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1564      MRRETURN(rrc);      RRETURN(rrc);
1565    
1566      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
1567      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
1568      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1569      start of each branch to move the current point backwards, so the code at      start of each branch to move the current point backwards, so the code at
1570      this level is identical to the lookahead case. When the assertion is part      this level is identical to the lookahead case. When the assertion is part
1571      of a condition, we want to return immediately afterwards. The caller of      of a condition, we want to return immediately afterwards. The caller of
1572      this incarnation of the match() function will have set MATCH_CONDASSERT in      this incarnation of the match() function will have set MATCH_CONDASSERT in
1573      md->match_function type, and one of these opcodes will be the first opcode      md->match_function type, and one of these opcodes will be the first opcode
1574      that is processed. We use a local variable that is preserved over calls to      that is processed. We use a local variable that is preserved over calls to
1575      match() to remember this case. */      match() to remember this case. */
1576    
1577      case OP_ASSERT:      case OP_ASSERT:
1578      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1579        save_mark = md->mark;
1580      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1581        {        {
1582        condassert = TRUE;        condassert = TRUE;
1583        md->match_function_type = 0;        md->match_function_type = 0;
1584        }        }
1585      else condassert = FALSE;      else condassert = FALSE;
1586    
1587      do      do
1588        {        {
1589        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
# Line 1363  for (;;) Line 1592  for (;;)
1592          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1593          break;          break;
1594          }          }
1595        if (rrc != MATCH_NOMATCH &&        md->mark = save_mark;
1596            (rrc != MATCH_THEN || md->start_match_ptr != ecode))  
1597          RRETURN(rrc);        /* A COMMIT failure must fail the entire assertion, without trying any
1598          subsequent branches. */
1599    
1600          if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
1601    
1602          /* PCRE does not allow THEN to escape beyond an assertion; it
1603          is treated as NOMATCH. */
1604    
1605          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1606        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1607        }        }
1608      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1609    
1610      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1611    
1612      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1613    
# Line 1390  for (;;) Line 1627  for (;;)
1627    
1628      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1629      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1630        save_mark = md->mark;
1631      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1632        {        {
1633        condassert = TRUE;        condassert = TRUE;
1634        md->match_function_type = 0;        md->match_function_type = 0;
1635        }        }
1636      else condassert = FALSE;      else condassert = FALSE;
1637    
1638      do      do
1639        {        {
1640        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1641        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);        md->mark = save_mark;
1642          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1643        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1644          {          {
1645          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1646          break;          break;
1647          }          }
1648        if (rrc != MATCH_NOMATCH &&  
1649            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1650          RRETURN(rrc);        as NOMATCH. */
1651    
1652          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1653        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1654        }        }
1655      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1656    
1657      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1658    
1659      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1660      continue;      continue;
1661    
# Line 1424  for (;;) Line 1665  for (;;)
1665      back a number of characters, not bytes. */      back a number of characters, not bytes. */
1666    
1667      case OP_REVERSE:      case OP_REVERSE:
1668  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1669      if (utf8)      if (utf)
1670        {        {
1671        i = GET(ecode, 1);        i = GET(ecode, 1);
1672        while (i-- > 0)        while (i-- > 0)
1673          {          {
1674          eptr--;          eptr--;
1675          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1676          BACKCHAR(eptr);          BACKCHAR(eptr);
1677          }          }
1678        }        }
# Line 1442  for (;;) Line 1683  for (;;)
1683    
1684        {        {
1685        eptr -= GET(ecode, 1);        eptr -= GET(ecode, 1);
1686        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1687        }        }
1688    
1689      /* Save the earliest consulted character, then skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
# Line 1456  for (;;) Line 1697  for (;;)
1697      function is able to force a failure. */      function is able to force a failure. */
1698    
1699      case OP_CALLOUT:      case OP_CALLOUT:
1700      if (pcre_callout != NULL)      if (PUBL(callout) != NULL)
1701        {        {
1702        pcre_callout_block cb;        PUBL(callout_block) cb;
1703        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1704        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1705        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1706    #if defined COMPILE_PCRE8
1707        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1708    #elif defined COMPILE_PCRE16
1709          cb.subject          = (PCRE_SPTR16)md->start_subject;
1710    #elif defined COMPILE_PCRE32
1711          cb.subject          = (PCRE_SPTR32)md->start_subject;
1712    #endif
1713        cb.subject_length   = (int)(md->end_subject - md->start_subject);        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1714        cb.start_match      = (int)(mstart - md->start_subject);        cb.start_match      = (int)(mstart - md->start_subject);
1715        cb.current_position = (int)(eptr - md->start_subject);        cb.current_position = (int)(eptr - md->start_subject);
# Line 1471  for (;;) Line 1718  for (;;)
1718        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1719        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1720        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1721        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);        cb.mark             = md->nomatch_mark;
1722          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1723        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1724        }        }
1725      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 1481  for (;;) Line 1729  for (;;)
1729      offset data is the offset to the starting bracket from the start of the      offset data is the offset to the starting bracket from the start of the
1730      whole pattern. (This is so that it works from duplicated subpatterns.)      whole pattern. (This is so that it works from duplicated subpatterns.)
1731    
1732      If there are any capturing brackets started but not finished, we have to      The state of the capturing groups is preserved over recursion, and
1733      save their starting points and reinstate them after the recursion. However,      re-instated afterwards. We don't know how many are started and not yet
1734      we don't know how many such there are (offset_top records the completed      finished (offset_top records the completed total) so we just have to save
1735      total) so we just have to save all the potential data. There may be up to      all the potential data. There may be up to 65535 such values, which is too
1736      65535 such values, which is too large to put on the stack, but using malloc      large to put on the stack, but using malloc for small numbers seems
1737      for small numbers seems expensive. As a compromise, the stack is used when      expensive. As a compromise, the stack is used when there are no more than
1738      there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc      REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
     is used. A problem is what to do if the malloc fails ... there is no way of  
     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX  
     values on the stack, and accept that the rest may be wrong.  
1739    
1740      There are also other values that have to be saved. We use a chained      There are also other values that have to be saved. We use a chained
1741      sequence of blocks that actually live on the stack. Thanks to Robin Houston      sequence of blocks that actually live on the stack. Thanks to Robin Houston
1742      for the original version of this logic. */      for the original version of this logic. It has, however, been hacked around
1743        a lot, so he is not to blame for the current way it works. */
1744    
1745      case OP_RECURSE:      case OP_RECURSE:
1746        {        {
1747          recursion_info *ri;
1748          unsigned int recno;
1749    
1750        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1751        new_recursive.group_num = (callpat == md->start_code)? 0 :        recno = (callpat == md->start_code)? 0 :
1752          GET2(callpat, 1 + LINK_SIZE);          GET2(callpat, 1 + LINK_SIZE);
1753    
1754          /* Check for repeating a recursion without advancing the subject pointer.
1755          This should catch convoluted mutual recursions. (Some simple cases are
1756          caught at compile time.) */
1757    
1758          for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1759            if (recno == ri->group_num && eptr == ri->subject_position)
1760              RRETURN(PCRE_ERROR_RECURSELOOP);
1761    
1762        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1763    
1764          new_recursive.group_num = recno;
1765          new_recursive.subject_position = eptr;
1766        new_recursive.prevrec = md->recursive;        new_recursive.prevrec = md->recursive;
1767        md->recursive = &new_recursive;        md->recursive = &new_recursive;
1768    
1769        /* Find where to continue from afterwards */        /* Where to continue from afterwards */
1770    
1771        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
       new_recursive.after_call = ecode;  
1772    
1773        /* Now save the offset data. */        /* Now save the offset data */
1774    
1775        new_recursive.saved_max = md->offset_end;        new_recursive.saved_max = md->offset_end;
1776        if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)        if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
# Line 1520  for (;;) Line 1778  for (;;)
1778        else        else
1779          {          {
1780          new_recursive.offset_save =          new_recursive.offset_save =
1781            (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));            (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1782          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1783          }          }
   
1784        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1785              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1786        new_recursive.save_offset_top = offset_top;  
1787          /* OK, now we can do the recursion. After processing each alternative,
1788        /* OK, now we can do the recursion. For each top-level alternative we        restore the offset data. If there were nested recursions, md->recursive
1789        restore the offset and recursion data. */        might be changed, so reset it before looping. */
1790    
1791        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1792        cbegroup = (*callpat >= OP_SBRA);        cbegroup = (*callpat >= OP_SBRA);
1793        do        do
1794          {          {
1795          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1796          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1797            md, eptrb, RM6);            md, eptrb, RM6);
1798            memcpy(md->offset_vector, new_recursive.offset_save,
1799                new_recursive.saved_max * sizeof(int));
1800            md->recursive = new_recursive.prevrec;
1801          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1802            {            {
1803            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
           md->recursive = new_recursive.prevrec;  
1804            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1805              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1806            MRRETURN(MATCH_MATCH);  
1807              /* Set where we got to in the subject, and reset the start in case
1808              it was changed by \K. This *is* propagated back out of a recursion,
1809              for Perl compatibility. */
1810    
1811              eptr = md->end_match_ptr;
1812              mstart = md->start_match_ptr;
1813              goto RECURSION_MATCHED;        /* Exit loop; end processing */
1814            }            }
1815          else if (rrc != MATCH_NOMATCH &&  
1816                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
1817            is treated as NOMATCH. */
1818    
1819            else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
1820                     rrc != MATCH_COMMIT)
1821            {            {
1822            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1823            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1824              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1825            RRETURN(rrc);            RRETURN(rrc);
1826            }            }
1827    
1828          md->recursive = &new_recursive;          md->recursive = &new_recursive;
         memcpy(md->offset_vector, new_recursive.offset_save,  
             new_recursive.saved_max * sizeof(int));  
1829          callpat += GET(callpat, 1);          callpat += GET(callpat, 1);
1830          }          }
1831        while (*callpat == OP_ALT);        while (*callpat == OP_ALT);
# Line 1565  for (;;) Line 1833  for (;;)
1833        DPRINTF(("Recursion didn't match\n"));        DPRINTF(("Recursion didn't match\n"));
1834        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1835        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1836          (pcre_free)(new_recursive.offset_save);          (PUBL(free))(new_recursive.offset_save);
1837        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
       }  
     /* Control never reaches here */  
   
     /* "Once" brackets are like assertion brackets except that after a match,  
     the point in the subject string is not moved back. Thus there can never be  
     a move back into the brackets. Friedl calls these "atomic" subpatterns.  
     Check the alternative branches in turn - the matching won't pass the KET  
     for this kind of subpattern. If any one branch matches, we carry on as at  
     the end of a normal bracket, leaving the subject pointer, but resetting  
     the start-of-match value in case it was changed by \K. */  
   
     case OP_ONCE:  
     prev = ecode;  
     saved_eptr = eptr;  
   
     do  
       {  
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);  
       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */  
         {  
         mstart = md->start_match_ptr;  
         break;  
         }  
       if (rrc != MATCH_NOMATCH &&  
           (rrc != MATCH_THEN || md->start_match_ptr != ecode))  
         RRETURN(rrc);  
       ecode += GET(ecode,1);  
       }  
     while (*ecode == OP_ALT);  
   
     /* If hit the end of the group (which could be repeated), fail */  
   
     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);  
   
     /* Continue after the group, updating the offsets high water mark, since  
     extracts may have been taken. */  
   
     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);  
   
     offset_top = md->end_offset_top;  
     eptr = md->end_match_ptr;  
   
     /* For a non-repeating ket, just continue at this level. This also  
     happens for a repeating ket if no characters were matched in the group.  
     This is the forcible breaking of infinite loops as implemented in Perl  
     5.005. If there is an options reset, it will get obeyed in the normal  
     course of events. */  
   
     if (*ecode == OP_KET || eptr == saved_eptr)  
       {  
       ecode += 1+LINK_SIZE;  
       break;  
1838        }        }
1839    
1840      /* The repeating kets try the rest of the pattern or restart from the      RECURSION_MATCHED:
1841      preceding bracket, in the appropriate order. The second "call" of match()      break;
     uses tail recursion, to avoid using another stack frame. */  
   
     if (*ecode == OP_KETRMIN)  
       {  
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM8);  
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
       ecode = prev;  
       goto TAIL_RECURSE;  
       }  
     else  /* OP_KETRMAX */  
       {  
       md->match_function_type = MATCH_CBEGROUP;  
       RMATCH(eptr, prev, offset_top, md, eptrb, RM9);  
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
       ecode += 1 + LINK_SIZE;  
       goto TAIL_RECURSE;  
       }  
     /* Control never gets here */  
1842    
1843      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1844      bracketed group and go to there. */      bracketed group and go to there. */
# Line 1654  for (;;) Line 1852  for (;;)
1852      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1853      with fixed upper repeat limits are compiled as a number of copies, with the      with fixed upper repeat limits are compiled as a number of copies, with the
1854      optional ones preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1855    
1856      case OP_BRAZERO:      case OP_BRAZERO:
1857      next = ecode + 1;      next = ecode + 1;
1858      RMATCH(eptr, next, offset_top, md, eptrb, RM10);      RMATCH(eptr, next, offset_top, md, eptrb, RM10);
# Line 1662  for (;;) Line 1860  for (;;)
1860      do next += GET(next, 1); while (*next == OP_ALT);      do next += GET(next, 1); while (*next == OP_ALT);
1861      ecode = next + 1 + LINK_SIZE;      ecode = next + 1 + LINK_SIZE;
1862      break;      break;
1863    
1864      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1865      next = ecode + 1;      next = ecode + 1;
1866      do next += GET(next, 1); while (*next == OP_ALT);      do next += GET(next, 1); while (*next == OP_ALT);
# Line 1676  for (;;) Line 1874  for (;;)
1874      do next += GET(next,1); while (*next == OP_ALT);      do next += GET(next,1); while (*next == OP_ALT);
1875      ecode = next + 1 + LINK_SIZE;      ecode = next + 1 + LINK_SIZE;
1876      break;      break;
1877    
1878      /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything      /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1879      here; just jump to the group, with allow_zero set TRUE. */      here; just jump to the group, with allow_zero set TRUE. */
1880    
1881      case OP_BRAPOSZERO:      case OP_BRAPOSZERO:
1882      op = *(++ecode);      op = *(++ecode);
1883      allow_zero = TRUE;      allow_zero = TRUE;
1884      if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;      if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1885        goto POSSESSIVE_NON_CAPTURE;        goto POSSESSIVE_NON_CAPTURE;
# Line 1691  for (;;) Line 1889  for (;;)
1889      case OP_KET:      case OP_KET:
1890      case OP_KETRMIN:      case OP_KETRMIN:
1891      case OP_KETRMAX:      case OP_KETRMAX:
1892      case OP_KETRPOS:      case OP_KETRPOS:
1893      prev = ecode - GET(ecode, 1);      prev = ecode - GET(ecode, 1);
1894    
1895      /* If this was a group that remembered the subject start, in order to break      /* If this was a group that remembered the subject start, in order to break
1896      infinite repeats of empty string matches, retrieve the subject start from      infinite repeats of empty string matches, retrieve the subject start from
1897      the chain. Otherwise, set it NULL. */      the chain. Otherwise, set it NULL. */
1898    
1899      if (*prev >= OP_SBRA)      if (*prev >= OP_SBRA || *prev == OP_ONCE)
1900        {        {
1901        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1902        eptrb = eptrb->epb_prev;              /* Backup to previous group */        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1903        }        }
1904      else saved_eptr = NULL;      else saved_eptr = NULL;
1905    
1906      /* If we are at the end of an assertion group or an atomic group, stop      /* If we are at the end of an assertion group or a non-capturing atomic
1907      matching and return MATCH_MATCH, but record the current high water mark for      group, stop matching and return MATCH_MATCH, but record the current high
1908      use by positive assertions. We also need to record the match start in case      water mark for use by positive assertions. We also need to record the match
1909      it was changed by \K. */      start in case it was changed by \K. */
1910    
1911      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1912          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||           *prev == OP_ONCE_NC)
         *prev == OP_ONCE)  
1913        {        {
1914        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE_NC */
1915        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1916        md->start_match_ptr = mstart;        md->start_match_ptr = mstart;
1917        MRRETURN(MATCH_MATCH);        RRETURN(MATCH_MATCH);         /* Sets md->mark */
1918        }        }
1919    
1920      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
1921      and if necessary complete handling an extraction by setting the offsets and      and if necessary complete handling an extraction by setting the offsets and
1922      bumping the high water mark. Note that whole-pattern recursion is coded as      bumping the high water mark. Whole-pattern recursion is coded as a recurse
1923      a recurse into group 0, so it won't be picked up here. Instead, we catch it      into group 0, so it won't be picked up here. Instead, we catch it when the
1924      when the OP_END is reached. Other recursion is handled here. */      OP_END is reached. Other recursion is handled here. We just have to record
1925        the current subject position and start match pointer and give a MATCH
1926        return. */
1927    
1928      if (*prev == OP_CBRA || *prev == OP_SCBRA ||      if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1929          *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)          *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
# Line 1737  for (;;) Line 1936  for (;;)
1936        printf("\n");        printf("\n");
1937  #endif  #endif
1938    
1939          /* Handle a recursively called group. */
1940    
1941          if (md->recursive != NULL && md->recursive->group_num == number)
1942            {
1943            md->end_match_ptr = eptr;
1944            md->start_match_ptr = mstart;
1945            RRETURN(MATCH_MATCH);
1946            }
1947    
1948          /* Deal with capturing */
1949    
1950        md->capture_last = number;        md->capture_last = number;
1951        if (offset >= md->offset_max) md->offset_overflow = TRUE; else        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1952          {          {
1953          /* If offset is greater than offset_top, it means that we are          /* If offset is greater than offset_top, it means that we are
1954          "skipping" a capturing group, and that group's offsets must be marked          "skipping" a capturing group, and that group's offsets must be marked
1955          unset. In earlier versions of PCRE, all the offsets were unset at the          unset. In earlier versions of PCRE, all the offsets were unset at the
1956          start of matching, but this doesn't work because atomic groups and          start of matching, but this doesn't work because atomic groups and
1957          assertions can cause a value to be set that should later be unset.          assertions can cause a value to be set that should later be unset.
1958          Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as          Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1959          part of the atomic group, but this is not on the final matching path,          part of the atomic group, but this is not on the final matching path,
1960          so must be unset when 2 is set. (If there is no group 2, there is no          so must be unset when 2 is set. (If there is no group 2, there is no
1961          problem, because offset_top will then be 2, indicating no capture.) */          problem, because offset_top will then be 2, indicating no capture.) */
1962    
1963          if (offset > offset_top)          if (offset > offset_top)
1964            {            {
1965            register int *iptr = md->offset_vector + offset_top;            register int *iptr = md->offset_vector + offset_top;
1966            register int *iend = md->offset_vector + offset;            register int *iend = md->offset_vector + offset;
1967            while (iptr < iend) *iptr++ = -1;            while (iptr < iend) *iptr++ = -1;
1968            }            }
1969    
1970          /* Now make the extraction */          /* Now make the extraction */
1971    
1972          md->offset_vector[offset] =          md->offset_vector[offset] =
# Line 1764  for (;;) Line 1974  for (;;)
1974          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1975          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1976          }          }
   
       /* Handle a recursively called group. Restore the offsets  
       appropriately and continue from after the call. */  
   
       if (md->recursive != NULL && md->recursive->group_num == number)  
         {  
         recursion_info *rec = md->recursive;  
         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));  
         md->recursive = rec->prevrec;  
         memcpy(md->offset_vector, rec->offset_save,  
           rec->saved_max * sizeof(int));  
         offset_top = rec->save_offset_top;  
         ecode = rec->after_call;  
         break;  
         }  
1977        }        }
1978    
1979      /* For a non-repeating ket, just continue at this level. This also      /* For an ordinary non-repeating ket, just continue at this level. This
1980      happens for a repeating ket if no characters were matched in the group.      also happens for a repeating ket if no characters were matched in the
1981      This is the forcible breaking of infinite loops as implemented in Perl      group. This is the forcible breaking of infinite loops as implemented in
1982      5.005. If there is an options reset, it will get obeyed in the normal      Perl 5.005. For a non-repeating atomic group that includes captures,
1983      course of events. */      establish a backup point by processing the rest of the pattern at a lower
1984        level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1985        original OP_ONCE level, thereby bypassing intermediate backup points, but
1986        resetting any captures that happened along the way. */
1987    
1988      if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1989        {        {
1990        ecode += 1 + LINK_SIZE;        if (*prev == OP_ONCE)
1991            {
1992            RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
1993            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1994            md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
1995            RRETURN(MATCH_ONCE);
1996            }
1997          ecode += 1 + LINK_SIZE;    /* Carry on at this level */
1998        break;        break;
1999        }        }
2000    
2001      /* OP_KETRPOS is a possessive repeating ket. Remember the current position,      /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
2002      and return the MATCH_KETRPOS. This makes it possible to do the repeats one      and return the MATCH_KETRPOS. This makes it possible to do the repeats one
2003      at a time from the outer level, thus saving stack. */      at a time from the outer level, thus saving stack. */
2004    
2005      if (*ecode == OP_KETRPOS)      if (*ecode == OP_KETRPOS)
2006        {        {
2007        md->end_match_ptr = eptr;        md->end_match_ptr = eptr;
2008        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
2009        RRETURN(MATCH_KETRPOS);        RRETURN(MATCH_KETRPOS);
2010        }        }
2011    
2012      /* The normal repeating kets try the rest of the pattern or restart from      /* The normal repeating kets try the rest of the pattern or restart from
2013      the preceding bracket, in the appropriate order. In the second case, we can      the preceding bracket, in the appropriate order. In the second case, we can
2014      use tail recursion to avoid using another stack frame, unless we have an      use tail recursion to avoid using another stack frame, unless we have an
2015      unlimited repeat of a group that can match an empty string. */      an atomic group or an unlimited repeat of a group that can match an empty
2016        string. */
2017    
2018      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
2019        {        {
2020        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
2021        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2022          if (*prev == OP_ONCE)
2023            {
2024            RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2025            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2026            md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2027            RRETURN(MATCH_ONCE);
2028            }
2029        if (*prev >= OP_SBRA)    /* Could match an empty string */        if (*prev >= OP_SBRA)    /* Could match an empty string */
2030          {          {
         md->match_function_type = MATCH_CBEGROUP;  
2031          RMATCH(eptr, prev, offset_top, md, eptrb, RM50);          RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2032          RRETURN(rrc);          RRETURN(rrc);
2033          }          }
# Line 1824  for (;;) Line 2036  for (;;)
2036        }        }
2037      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
2038        {        {
       if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;  
2039        RMATCH(eptr, prev, offset_top, md, eptrb, RM13);        RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2040          if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2041        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2042          if (*prev == OP_ONCE)
2043            {
2044            RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2045            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2046            md->once_target = prev;
2047            RRETURN(MATCH_ONCE);
2048            }
2049        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
2050        goto TAIL_RECURSE;        goto TAIL_RECURSE;
2051        }        }
# Line 1835  for (;;) Line 2054  for (;;)
2054      /* Not multiline mode: start of subject assertion, unless notbol. */      /* Not multiline mode: start of subject assertion, unless notbol. */
2055    
2056      case OP_CIRC:      case OP_CIRC:
2057      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2058    
2059      /* Start of subject assertion */      /* Start of subject assertion */
2060    
2061      case OP_SOD:      case OP_SOD:
2062      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2063      ecode++;      ecode++;
2064      break;      break;
2065    
2066      /* Multiline mode: start of subject unless notbol, or after any newline. */      /* Multiline mode: start of subject unless notbol, or after any newline. */
2067    
2068      case OP_CIRCM:      case OP_CIRCM:
2069      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2070      if (eptr != md->start_subject &&      if (eptr != md->start_subject &&
2071          (eptr == md->end_subject || !WAS_NEWLINE(eptr)))          (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2072        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2073      ecode++;      ecode++;
2074      break;      break;
2075    
2076      /* Start of match assertion */      /* Start of match assertion */
2077    
2078      case OP_SOM:      case OP_SOM:
2079      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2080      ecode++;      ecode++;
2081      break;      break;
2082    
# Line 1873  for (;;) Line 2092  for (;;)
2092    
2093      case OP_DOLLM:      case OP_DOLLM:
2094      if (eptr < md->end_subject)      if (eptr < md->end_subject)
2095        { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }        {
2096          if (!IS_NEWLINE(eptr))
2097            {
2098            if (md->partial != 0 &&
2099                eptr + 1 >= md->end_subject &&
2100                NLBLOCK->nltype == NLTYPE_FIXED &&
2101                NLBLOCK->nllen == 2 &&
2102                RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
2103              {
2104              md->hitend = TRUE;
2105              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2106              }
2107            RRETURN(MATCH_NOMATCH);
2108            }
2109          }
2110      else      else
2111        {        {
2112        if (md->noteol) MRRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
2113        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2114        }        }
2115      ecode++;      ecode++;
2116      break;      break;
2117    
2118      /* Not multiline mode: assert before a terminating newline or before end of      /* Not multiline mode: assert before a terminating newline or before end of
2119      subject unless noteol is set. */      subject unless noteol is set. */
2120    
2121      case OP_DOLL:      case OP_DOLL:
2122      if (md->noteol) MRRETURN(MATCH_NOMATCH);      if (md->noteol) RRETURN(MATCH_NOMATCH);
2123      if (!md->endonly) goto ASSERT_NL_OR_EOS;      if (!md->endonly) goto ASSERT_NL_OR_EOS;
2124    
2125      /* ... else fall through for endonly */      /* ... else fall through for endonly */
# Line 1894  for (;;) Line 2127  for (;;)
2127      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
2128    
2129      case OP_EOD:      case OP_EOD:
2130      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2131      SCHECK_PARTIAL();      SCHECK_PARTIAL();
2132      ecode++;      ecode++;
2133      break;      break;
# Line 1905  for (;;) Line 2138  for (;;)
2138      ASSERT_NL_OR_EOS:      ASSERT_NL_OR_EOS:
2139      if (eptr < md->end_subject &&      if (eptr < md->end_subject &&
2140          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2141        MRRETURN(MATCH_NOMATCH);        {
2142          if (md->partial != 0 &&
2143              eptr + 1 >= md->end_subject &&
2144              NLBLOCK->nltype == NLTYPE_FIXED &&
2145              NLBLOCK->nllen == 2 &&
2146              RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
2147            {
2148            md->hitend = TRUE;
2149            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2150            }
2151          RRETURN(MATCH_NOMATCH);
2152          }
2153    
2154      /* Either at end of string or \n before end. */      /* Either at end of string or \n before end. */
2155    
# Line 1924  for (;;) Line 2168  for (;;)
2168        be "non-word" characters. Remember the earliest consulted character for        be "non-word" characters. Remember the earliest consulted character for
2169        partial matching. */        partial matching. */
2170    
2171  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2172        if (utf8)        if (utf)
2173          {          {
2174          /* Get status of previous character */          /* Get status of previous character */
2175    
2176          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
2177            {            {
2178            USPTR lastptr = eptr - 1;            PCRE_PUCHAR lastptr = eptr - 1;
2179            while((*lastptr & 0xc0) == 0x80) lastptr--;            BACKCHAR(lastptr);
2180            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2181            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
2182  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 1997  for (;;) Line 2241  for (;;)
2241              }              }
2242            else            else
2243  #endif  #endif
2244            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = MAX_255(eptr[-1])
2245                && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2246            }            }
2247    
2248          /* Get status of next character */          /* Get status of next character */
# Line 2020  for (;;) Line 2265  for (;;)
2265            }            }
2266          else          else
2267  #endif  #endif
2268          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          cur_is_word = MAX_255(*eptr)
2269              && ((md->ctypes[*eptr] & ctype_word) != 0);
2270          }          }
2271    
2272        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
2273    
2274        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
2275             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2276          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2277        }        }
2278      break;      break;
2279    
2280      /* Match a single character type; inline for speed */      /* Match any single character type except newline; have to take care with
2281        CRLF newlines and partial matching. */
2282    
2283      case OP_ANY:      case OP_ANY:
2284      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2285        if (md->partial != 0 &&
2286            eptr + 1 >= md->end_subject &&
2287            NLBLOCK->nltype == NLTYPE_FIXED &&
2288            NLBLOCK->nllen == 2 &&
2289            RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
2290          {
2291          md->hitend = TRUE;
2292          if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2293          }
2294    
2295      /* Fall through */      /* Fall through */
2296    
2297        /* Match any single character whatsoever. */
2298    
2299      case OP_ALLANY:      case OP_ALLANY:
2300      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2301        {        {                            /* not be updated before SCHECK_PARTIAL. */
2302        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2303        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2304        }        }
2305      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;      eptr++;
2306    #ifdef SUPPORT_UTF
2307        if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2308    #endif
2309      ecode++;      ecode++;
2310      break;      break;
2311    
# Line 2051  for (;;) Line 2313  for (;;)
2313      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
2314    
2315      case OP_ANYBYTE:      case OP_ANYBYTE:
2316      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2317        {        {                            /* not be updated before SCHECK_PARTIAL. */
2318        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2319        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2320        }        }
2321        eptr++;
2322      ecode++;      ecode++;
2323      break;      break;
2324    
# Line 2063  for (;;) Line 2326  for (;;)
2326      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2327        {        {
2328        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2329        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2330        }        }
2331      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2332      if (      if (
2333  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2334         c < 256 &&         c < 256 &&
2335  #endif  #endif
2336         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
2337         )         )
2338        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2339      ecode++;      ecode++;
2340      break;      break;
2341    
# Line 2080  for (;;) Line 2343  for (;;)
2343      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2344        {        {
2345        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2346        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2347        }        }
2348      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2349      if (      if (
2350  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2351         c >= 256 ||         c > 255 ||
2352  #endif  #endif
2353         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
2354         )         )
2355        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2356      ecode++;      ecode++;
2357      break;      break;
2358    
# Line 2097  for (;;) Line 2360  for (;;)
2360      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2361        {        {
2362        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2363        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2364        }        }
2365      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2366      if (      if (
2367  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2368         c < 256 &&         c < 256 &&
2369  #endif  #endif
2370         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
2371         )         )
2372        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2373      ecode++;      ecode++;
2374      break;      break;
2375    
# Line 2114  for (;;) Line 2377  for (;;)
2377      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2378        {        {
2379        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2380        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2381        }        }
2382      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2383      if (      if (
2384  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2385         c >= 256 ||         c > 255 ||
2386  #endif  #endif
2387         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
2388         )         )
2389        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2390      ecode++;      ecode++;
2391      break;      break;
2392    
# Line 2131  for (;;) Line 2394  for (;;)
2394      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2395        {        {
2396        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2397        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2398        }        }
2399      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2400      if (      if (
2401  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2402         c < 256 &&         c < 256 &&
2403  #endif  #endif
2404         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
2405         )         )
2406        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2407      ecode++;      ecode++;
2408      break;      break;
2409    
# Line 2148  for (;;) Line 2411  for (;;)
2411      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2412        {        {
2413        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2414        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2415        }        }
2416      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2417      if (      if (
2418  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2419         c >= 256 ||         c > 255 ||
2420  #endif  #endif
2421         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
2422         )         )
2423        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2424      ecode++;      ecode++;
2425      break;      break;
2426    
# Line 2165  for (;;) Line 2428  for (;;)
2428      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2429        {        {
2430        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2431        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2432        }        }
2433      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2434      switch(c)      switch(c)
2435        {        {
2436        default: MRRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2437    
2438        case 0x000d:        case CHAR_CR:
2439        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr >= md->end_subject)
2440            {
2441            SCHECK_PARTIAL();
2442            }
2443          else if (RAWUCHARTEST(eptr) == CHAR_LF) eptr++;
2444        break;        break;
2445    
2446        case 0x000a:        case CHAR_LF:
2447        break;        break;
2448    
2449        case 0x000b:        case CHAR_VT:
2450        case 0x000c:        case CHAR_FF:
2451        case 0x0085:        case CHAR_NEL:
2452    #ifndef EBCDIC
2453        case 0x2028:        case 0x2028:
2454        case 0x2029:        case 0x2029:
2455        if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);  #endif  /* Not EBCDIC */
2456          if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2457        break;        break;
2458        }        }
2459      ecode++;      ecode++;
# Line 2194  for (;;) Line 2463  for (;;)
2463      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2464        {        {
2465        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2466        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2467        }        }
2468      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2469      switch(c)      switch(c)
2470        {        {
2471          HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2472        default: break;        default: break;
       case 0x09:      /* HT */  
       case 0x20:      /* SPACE */  
       case 0xa0:      /* NBSP */  
       case 0x1680:    /* OGHAM SPACE MARK */  
       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
       case 0x2000:    /* EN QUAD */  
       case 0x2001:    /* EM QUAD */  
       case 0x2002:    /* EN SPACE */  
       case 0x2003:    /* EM SPACE */  
       case 0x2004:    /* THREE-PER-EM SPACE */  
       case 0x2005:    /* FOUR-PER-EM SPACE */  
       case 0x2006:    /* SIX-PER-EM SPACE */  
       case 0x2007:    /* FIGURE SPACE */  
       case 0x2008:    /* PUNCTUATION SPACE */  
       case 0x2009:    /* THIN SPACE */  
       case 0x200A:    /* HAIR SPACE */  
       case 0x202f:    /* NARROW NO-BREAK SPACE */  
       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
       case 0x3000:    /* IDEOGRAPHIC SPACE */  
       MRRETURN(MATCH_NOMATCH);  
2473        }        }
2474      ecode++;      ecode++;
2475      break;      break;
# Line 2228  for (;;) Line 2478  for (;;)
2478      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2479        {        {
2480        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2481        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2482        }        }
2483      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2484      switch(c)      switch(c)
2485        {        {
2486        default: MRRETURN(MATCH_NOMATCH);        HSPACE_CASES: break;  /* Byte and multibyte cases */
2487        case 0x09:      /* HT */        default: RRETURN(MATCH_NOMATCH);
       case 0x20:      /* SPACE */  
       case 0xa0:      /* NBSP */  
       case 0x1680:    /* OGHAM SPACE MARK */  
       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
       case 0x2000:    /* EN QUAD */  
       case 0x2001:    /* EM QUAD */  
       case 0x2002:    /* EN SPACE */  
       case 0x2003:    /* EM SPACE */  
       case 0x2004:    /* THREE-PER-EM SPACE */  
       case 0x2005:    /* FOUR-PER-EM SPACE */  
       case 0x2006:    /* SIX-PER-EM SPACE */  
       case 0x2007:    /* FIGURE SPACE */  
       case 0x2008:    /* PUNCTUATION SPACE */  
       case 0x2009:    /* THIN SPACE */  
       case 0x200A:    /* HAIR SPACE */  
       case 0x202f:    /* NARROW NO-BREAK SPACE */  
       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
       case 0x3000:    /* IDEOGRAPHIC SPACE */  
       break;  
2488        }        }
2489      ecode++;      ecode++;
2490      break;      break;
# Line 2262  for (;;) Line 2493  for (;;)
2493      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2494        {        {
2495        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2496        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2497        }        }
2498      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2499      switch(c)      switch(c)
2500        {        {
2501          VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2502        default: break;        default: break;
       case 0x0a:      /* LF */  
       case 0x0b:      /* VT */  
       case 0x0c:      /* FF */  
       case 0x0d:      /* CR */  
       case 0x85:      /* NEL */  
       case 0x2028:    /* LINE SEPARATOR */  
       case 0x2029:    /* PARAGRAPH SEPARATOR */  
       MRRETURN(MATCH_NOMATCH);  
2503        }        }
2504      ecode++;      ecode++;
2505      break;      break;
# Line 2284  for (;;) Line 2508  for (;;)
2508      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2509        {        {
2510        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2511        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2512        }        }
2513      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2514      switch(c)      switch(c)
2515        {        {
2516        default: MRRETURN(MATCH_NOMATCH);        VSPACE_CASES: break;
2517        case 0x0a:      /* LF */        default: RRETURN(MATCH_NOMATCH);
       case 0x0b:      /* VT */  
       case 0x0c:      /* FF */  
       case 0x0d:      /* CR */  
       case 0x85:      /* NEL */  
       case 0x2028:    /* LINE SEPARATOR */  
       case 0x2029:    /* PARAGRAPH SEPARATOR */  
       break;  
2518        }        }
2519      ecode++;      ecode++;
2520      break;      break;
# Line 2311  for (;;) Line 2528  for (;;)
2528      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2529        {        {
2530        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2531        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2532        }        }
2533      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2534        {        {
2535          const pcre_uint32 *cp;
2536        const ucd_record *prop = GET_UCD(c);        const ucd_record *prop = GET_UCD(c);
2537    
2538        switch(ecode[1])        switch(ecode[1])
2539          {          {
2540          case PT_ANY:          case PT_ANY:
2541          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2542          break;          break;
2543    
2544          case PT_LAMP:          case PT_LAMP:
2545          if ((prop->chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2546               prop->chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2547               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2548            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2549          break;          break;
2550    
2551          case PT_GC:          case PT_GC:
2552          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2553            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2554          break;          break;
2555    
2556          case PT_PC:          case PT_PC:
2557          if ((ecode[2] != prop->chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2558            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2559          break;          break;
2560    
2561          case PT_SC:          case PT_SC:
2562          if ((ecode[2] != prop->script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2563            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2564          break;          break;
2565    
2566          /* These are specials */          /* These are specials */
2567    
2568          case PT_ALNUM:          case PT_ALNUM:
2569          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2570               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2571            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2572          break;          break;
2573    
2574          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2575          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2576               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2577                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2578            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2579          break;          break;
2580    
2581          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2582          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2583               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2584               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2585                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2586            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2587          break;          break;
2588    
2589          case PT_WORD:          case PT_WORD:
2590          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2591               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2592               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2593            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2594            break;
2595    
2596            case PT_CLIST:
2597            cp = PRIV(ucd_caseless_sets) + prop->caseset;
2598            for (;;)
2599              {
2600              if (c < *cp)
2601                { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2602              if (c == *cp++)
2603                { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2604              }
2605          break;          break;
2606    
2607          /* This should never occur */          /* This should never occur */
# Line 2392  for (;;) Line 2621  for (;;)
2621      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2622        {        {
2623        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2624        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2625        }        }
2626      GETCHARINCTEST(c, eptr);      else
2627        {        {
2628        int category = UCD_CATEGORY(c);        int lgb, rgb;
2629        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);        GETCHARINCTEST(c, eptr);
2630          lgb = UCD_GRAPHBREAK(c);
2631        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2632          {          {
2633          int len = 1;          int len = 1;
2634          if (!utf8) c = *eptr; else          if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2635            {          rgb = UCD_GRAPHBREAK(c);
2636            GETCHARLEN(c, eptr, len);          if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2637            }          lgb = rgb;
         category = UCD_CATEGORY(c);  
         if (category != ucp_M) break;  
2638          eptr += len;          eptr += len;
2639          }          }
2640        }        }
2641        CHECK_PARTIAL();
2642      ecode++;      ecode++;
2643      break;      break;
2644  #endif  #endif  /* SUPPORT_UCP */
2645    
2646    
2647      /* Match a back reference, possibly repeatedly. Look past the end of the      /* Match a back reference, possibly repeatedly. Look past the end of the
# Line 2424  for (;;) Line 2653  for (;;)
2653      loops). */      loops). */
2654    
2655      case OP_REF:      case OP_REF:
2656      case OP_REFI:      case OP_REFI:
2657      caseless = op == OP_REFI;      caseless = op == OP_REFI;
2658      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2659      ecode += 3;      ecode += 1 + IMM2_SIZE;
2660    
2661      /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2662    
# Line 2467  for (;;) Line 2696  for (;;)
2696        case OP_CRMINRANGE:        case OP_CRMINRANGE:
2697        minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2698        min = GET2(ecode, 1);        min = GET2(ecode, 1);
2699        max = GET2(ecode, 3);        max = GET2(ecode, 1 + IMM2_SIZE);
2700        if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2701        ecode += 5;        ecode += 1 + 2 * IMM2_SIZE;
2702        break;        break;
2703    
2704        default:               /* No repeat follows */        default:               /* No repeat follows */
2705        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2706          {          {
2707            if (length == -2) eptr = md->end_subject;   /* Partial match */
2708          CHECK_PARTIAL();          CHECK_PARTIAL();
2709          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2710          }          }
2711        eptr += length;        eptr += length;
2712        continue;              /* With the main loop */        continue;              /* With the main loop */
2713        }        }
2714    
2715      /* Handle repeated back references. If the length of the reference is      /* Handle repeated back references. If the length of the reference is
2716      zero, just continue with the main loop. */      zero, just continue with the main loop. If the length is negative, it
2717        means the reference is unset in non-Java-compatible mode. If the minimum is
2718        zero, we can continue at the same level without recursion. For any other
2719        minimum, carrying on will result in NOMATCH. */
2720    
2721      if (length == 0) continue;      if (length == 0) continue;
2722        if (length < 0 && min == 0) continue;
2723    
2724      /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2725      the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
# Line 2493  for (;;) Line 2727  for (;;)
2727    
2728      for (i = 1; i <= min; i++)      for (i = 1; i <= min; i++)
2729        {        {
2730        int slength;        int slength;
2731        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2732          {          {
2733            if (slength == -2) eptr = md->end_subject;   /* Partial match */
2734          CHECK_PARTIAL();          CHECK_PARTIAL();
2735          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2736          }          }
2737        eptr += slength;        eptr += slength;
2738        }        }
# Line 2513  for (;;) Line 2748  for (;;)
2748        {        {
2749        for (fi = min;; fi++)        for (fi = min;; fi++)
2750          {          {
2751          int slength;          int slength;
2752          RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2753          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2754          if (fi >= max) MRRETURN(MATCH_NOMATCH);          if (fi >= max) RRETURN(MATCH_NOMATCH);
2755          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2756            {            {
2757              if (slength == -2) eptr = md->end_subject;   /* Partial match */
2758            CHECK_PARTIAL();            CHECK_PARTIAL();
2759            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2760            }            }
2761          eptr += slength;          eptr += slength;
2762          }          }
# Line 2534  for (;;) Line 2770  for (;;)
2770        pp = eptr;        pp = eptr;
2771        for (i = min; i < max; i++)        for (i = min; i < max; i++)
2772          {          {
2773          int slength;          int slength;
2774          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2775            {            {
2776            CHECK_PARTIAL();            /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2777              the soft partial matching case. */
2778    
2779              if (slength == -2 && md->partial != 0 &&
2780                  md->end_subject > md->start_used_ptr)
2781                {
2782                md->hitend = TRUE;
2783                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2784                }
2785            break;            break;
2786            }            }
2787          eptr += slength;          eptr += slength;
2788          }          }
2789    
2790        while (eptr >= pp)        while (eptr >= pp)
2791          {          {
2792          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2793          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2794          eptr -= length;          eptr -= length;
2795          }          }
2796        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2797        }        }
2798      /* Control never gets here */      /* Control never gets here */
2799    
# Line 2566  for (;;) Line 2811  for (;;)
2811      case OP_NCLASS:      case OP_NCLASS:
2812      case OP_CLASS:      case OP_CLASS:
2813        {        {
2814          /* The data variable is saved across frames, so the byte map needs to
2815          be stored there. */
2816    #define BYTE_MAP ((pcre_uint8 *)data)
2817        data = ecode + 1;                /* Save for matching */        data = ecode + 1;                /* Save for matching */
2818        ecode += 33;                     /* Advance past the item */        ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2819    
2820        switch (*ecode)        switch (*ecode)
2821          {          {
# Line 2588  for (;;) Line 2836  for (;;)
2836          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2837          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2838          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2839          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2840          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2841          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2842          break;          break;
2843    
2844          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2600  for (;;) Line 2848  for (;;)
2848    
2849        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
2850    
2851  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2852        /* UTF-8 mode */        if (utf)
       if (utf8)  
2853          {          {
2854          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2855            {            {
2856            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2857              {              {
2858              SCHECK_PARTIAL();              SCHECK_PARTIAL();
2859              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2860              }              }
2861            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2862            if (c > 255)            if (c > 255)
2863              {              {
2864              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2865              }              }
2866            else            else
2867              {              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  
             }  
2868            }            }
2869          }          }
2870        else        else
2871  #endif  #endif
2872        /* Not UTF-8 mode */        /* Not UTF mode */
2873          {          {
2874          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2875            {            {
2876            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2877              {              {
2878              SCHECK_PARTIAL();              SCHECK_PARTIAL();
2879              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2880              }              }
2881            c = *eptr++;            c = *eptr++;
2882            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2883              if (c > 255)
2884                {
2885                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2886                }
2887              else
2888    #endif
2889                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2890            }            }
2891          }          }
2892    
# Line 2648  for (;;) Line 2900  for (;;)
2900    
2901        if (minimize)        if (minimize)
2902          {          {
2903  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2904          /* UTF-8 mode */          if (utf)
         if (utf8)  
2905            {            {
2906            for (fi = min;; fi++)            for (fi = min;; fi++)
2907              {              {
2908              RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2909              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2910              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2911              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2912                {                {
2913                SCHECK_PARTIAL();                SCHECK_PARTIAL();
2914                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2915                }                }
2916              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2917              if (c > 255)              if (c > 255)
2918                {                {
2919                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2920                }                }
2921              else              else
2922                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
               if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  
               }  
2923              }              }
2924            }            }
2925          else          else
2926  #endif  #endif
2927          /* Not UTF-8 mode */          /* Not UTF mode */
2928            {            {
2929            for (fi = min;; fi++)            for (fi = min;; fi++)
2930              {              {
2931              RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2932              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2933              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2934              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2935                {                {
2936                SCHECK_PARTIAL();                SCHECK_PARTIAL();
2937                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2938                }                }
2939              c = *eptr++;              c = *eptr++;
2940              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2941                if (c > 255)
2942                  {
2943                  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2944                  }
2945                else
2946    #endif
2947                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2948              }              }
2949            }            }
2950          /* Control never gets here */          /* Control never gets here */
# Line 2700  for (;;) Line 2956  for (;;)
2956          {          {
2957          pp = eptr;          pp = eptr;
2958    
2959  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2960          /* UTF-8 mode */          if (utf)
         if (utf8)  
2961            {            {
2962            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2963              {              {
# Line 2718  for (;;) Line 2973  for (;;)
2973                if (op == OP_CLASS) break;                if (op == OP_CLASS) break;
2974                }                }
2975              else              else
2976                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
               if ((data[c/8] & (1 << (c&7))) == 0) break;  
               }  
2977              eptr += len;              eptr += len;
2978              }              }
2979            for (;;)            for (;;)
# Line 2733  for (;;) Line 2986  for (;;)
2986            }            }
2987          else          else
2988  #endif  #endif
2989            /* Not UTF-8 mode */            /* Not UTF mode */
2990            {            {
2991            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2992              {              {
# Line 2743  for (;;) Line 2996  for (;;)
2996                break;                break;
2997                }                }
2998              c = *eptr;              c = *eptr;
2999              if ((data[c/8] & (1 << (c&7))) == 0) break;  #ifndef COMPILE_PCRE8
3000                if (c > 255)
3001                  {
3002                  if (op == OP_CLASS) break;
3003                  }
3004                else
3005    #endif
3006                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3007              eptr++;              eptr++;
3008              }              }
3009            while (eptr >= pp)            while (eptr >= pp)
# Line 2754  for (;;) Line 3014  for (;;)
3014              }              }
3015            }            }
3016    
3017          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3018          }          }
3019    #undef BYTE_MAP
3020        }        }
3021      /* Control never gets here */      /* Control never gets here */
3022    
# Line 2764  for (;;) Line 3025  for (;;)
3025      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
3026      mode, because Unicode properties are supported in non-UTF-8 mode. */      mode, because Unicode properties are supported in non-UTF-8 mode. */
3027    
3028  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3029      case OP_XCLASS:      case OP_XCLASS:
3030        {        {
3031        data = ecode + 1 + LINK_SIZE;                /* Save for matching */        data = ecode + 1 + LINK_SIZE;                /* Save for matching */
# Line 2789  for (;;) Line 3050  for (;;)
3050          case OP_CRMINRANGE:          case OP_CRMINRANGE:
3051          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
3052          min = GET2(ecode, 1);          min = GET2(ecode, 1);
3053          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
3054          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
3055          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
3056          break;          break;
3057    
3058          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2806  for (;;) Line 3067  for (;;)
3067          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3068            {            {
3069            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3070            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3071            }            }
3072          GETCHARINCTEST(c, eptr);          GETCHARINCTEST(c, eptr);
3073          if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);          if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3074          }          }
3075    
3076        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 2826  for (;;) Line 3087  for (;;)
3087            {            {
3088            RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3089            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3090            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3091            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3092              {              {
3093              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3094              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3095              }              }
3096            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3097            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);            if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3098            }            }
3099          /* Control never gets here */          /* Control never gets here */
3100          }          }
# Line 2851  for (;;) Line 3112  for (;;)
3112              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3113              break;              break;
3114              }              }
3115    #ifdef SUPPORT_UTF
3116            GETCHARLENTEST(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
3117            if (!_pcre_xclass(c, data)) break;  #else
3118              c = *eptr;
3119    #endif
3120              if (!PRIV(xclass)(c, data, utf)) break;
3121            eptr += len;            eptr += len;
3122            }            }
3123          for(;;)          for(;;)
# Line 2860  for (;;) Line 3125  for (;;)
3125            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3126            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3127            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3128            if (utf8) BACKCHAR(eptr);  #ifdef SUPPORT_UTF
3129              if (utf) BACKCHAR(eptr);
3130    #endif
3131            }            }
3132          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3133          }          }
3134    
3135        /* Control never gets here */        /* Control never gets here */
# Line 2872  for (;;) Line 3139  for (;;)
3139      /* Match a single character, casefully */      /* Match a single character, casefully */
3140    
3141      case OP_CHAR:      case OP_CHAR:
3142  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3143      if (utf8)      if (utf)
3144        {        {
3145        length = 1;        length = 1;
3146        ecode++;        ecode++;
# Line 2881  for (;;) Line 3148  for (;;)
3148        if (length > md->end_subject - eptr)        if (length > md->end_subject - eptr)
3149          {          {
3150          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3151          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3152          }          }
3153        while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);        while (length-- > 0) if (*ecode++ != RAWUCHARINC(eptr)) RRETURN(MATCH_NOMATCH);
3154        }        }
3155      else      else
3156  #endif  #endif
3157        /* Not UTF mode */
     /* Non-UTF-8 mode */  
3158        {        {
3159        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
3160          {          {
3161          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3162          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3163          }          }
3164        if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3165        ecode += 2;        ecode += 2;
3166        }        }
3167      break;      break;
3168    
3169      /* Match a single character, caselessly */      /* Match a single character, caselessly. If we are at the end of the
3170        subject, give up immediately. */
3171    
3172      case OP_CHARI:      case OP_CHARI:
3173  #ifdef SUPPORT_UTF8      if (eptr >= md->end_subject)
3174      if (utf8)        {
3175          SCHECK_PARTIAL();
3176          RRETURN(MATCH_NOMATCH);
3177          }
3178    
3179    #ifdef SUPPORT_UTF
3180        if (utf)
3181        {        {
3182        length = 1;        length = 1;
3183        ecode++;        ecode++;
3184        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
3185    
       if (length > md->end_subject - eptr)  
         {  
         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */  
         MRRETURN(MATCH_NOMATCH);  
         }  
   
3186        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
3187        can use the fast lookup table. */        we know that its other case must also be one byte long, so we can use the
3188          fast lookup table. We know that there is at least one byte left in the
3189          subject. */
3190    
3191        if (fc < 128)        if (fc < 128)
3192          {          {
3193          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);          pcre_uchar cc = RAWUCHAR(eptr);
3194            if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3195            ecode++;
3196            eptr++;
3197          }          }
3198    
3199        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character. Note that we cannot
3200          use the value of "length" to check for sufficient bytes left, because the
3201          other case of the character may have more or fewer bytes.  */
3202    
3203        else        else
3204          {          {
3205          unsigned int dc;          pcre_uint32 dc;
3206          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
3207          ecode += length;          ecode += length;
3208    
# Line 2940  for (;;) Line 3214  for (;;)
3214  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3215            if (dc != UCD_OTHERCASE(fc))            if (dc != UCD_OTHERCASE(fc))
3216  #endif  #endif
3217              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3218            }            }
3219          }          }
3220        }        }
3221      else      else
3222  #endif   /* SUPPORT_UTF8 */  #endif   /* SUPPORT_UTF */
3223    
3224      /* Non-UTF-8 mode */      /* Not UTF mode */
3225        {        {
3226        if (md->end_subject - eptr < 1)        if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3227          {            != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3228          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */        eptr++;
         MRRETURN(MATCH_NOMATCH);  
         }  
       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);  
3229        ecode += 2;        ecode += 2;
3230        }        }
3231      break;      break;
# Line 2964  for (;;) Line 3235  for (;;)
3235      case OP_EXACT:      case OP_EXACT:
3236      case OP_EXACTI:      case OP_EXACTI:
3237      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3238      ecode += 3;      ecode += 1 + IMM2_SIZE;
3239      goto REPEATCHAR;      goto REPEATCHAR;
3240    
3241      case OP_POSUPTO:      case OP_POSUPTO:
# Line 2979  for (;;) Line 3250  for (;;)
3250      min = 0;      min = 0;
3251      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3252      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3253      ecode += 3;      ecode += 1 + IMM2_SIZE;
3254      goto REPEATCHAR;      goto REPEATCHAR;
3255    
3256      case OP_POSSTAR:      case OP_POSSTAR:
# Line 3027  for (;;) Line 3298  for (;;)
3298      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. */
3299    
3300      REPEATCHAR:      REPEATCHAR:
3301  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3302      if (utf8)      if (utf)
3303        {        {
3304        length = 1;        length = 1;
3305        charptr = ecode;        charptr = ecode;
# Line 3041  for (;;) Line 3312  for (;;)
3312        if (length > 1)        if (length > 1)
3313          {          {
3314  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3315          unsigned int othercase;          pcre_uint32 othercase;
3316          if (op >= OP_STARI &&     /* Caseless */          if (op >= OP_STARI &&     /* Caseless */
3317              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
3318            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = PRIV(ord2utf)(othercase, occhars);
3319          else oclength = 0;          else oclength = 0;
3320  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3321    
3322          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3323            {            {
3324            if (eptr <= md->end_subject - length &&            if (eptr <= md->end_subject - length &&
3325              memcmp(eptr, charptr, length) == 0) eptr += length;              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3326  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3327            else if (oclength > 0 &&            else if (oclength > 0 &&
3328                     eptr <= md->end_subject - oclength &&                     eptr <= md->end_subject - oclength &&
3329                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3330  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3331            else            else
3332              {              {
3333              CHECK_PARTIAL();              CHECK_PARTIAL();
3334              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3335              }              }
3336            }            }
3337    
# Line 3072  for (;;) Line 3343  for (;;)
3343              {              {
3344              RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3345              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3346              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3347              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3348                memcmp(eptr, charptr, length) == 0) eptr += length;                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3349  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3350              else if (oclength > 0 &&              else if (oclength > 0 &&
3351                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3352                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3353  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3354              else              else
3355                {                {
3356                CHECK_PARTIAL();                CHECK_PARTIAL();
3357                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3358                }                }
3359              }              }
3360            /* Control never gets here */            /* Control never gets here */
# Line 3095  for (;;) Line 3366  for (;;)
3366            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3367              {              {
3368              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3369                  memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3370  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3371              else if (oclength > 0 &&              else if (oclength > 0 &&
3372                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3373                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3374  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3375              else              else
3376                {                {
# Line 3114  for (;;) Line 3385  for (;;)
3385              {              {
3386              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3387              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3388              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }              if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
3389  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3390              eptr--;              eptr--;
3391              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 3131  for (;;) Line 3402  for (;;)
3402        value of fc will always be < 128. */        value of fc will always be < 128. */
3403        }        }
3404      else      else
3405  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3406          /* When not in UTF-8 mode, load a single-byte character. */
3407      /* When not in UTF-8 mode, load a single-byte character. */        fc = *ecode++;
3408    
3409      fc = *ecode++;      /* The value of fc at this point is always one character, though we may
3410        or may not be in UTF mode. The code is duplicated for the caseless and
     /* The value of fc at this point is always less than 256, though we may or  
     may not be in UTF-8 mode. The code is duplicated for the caseless and  
3411      caseful cases, for speed, since matching characters is likely to be quite      caseful cases, for speed, since matching characters is likely to be quite
3412      common. First, ensure the minimum number of matches are present. If min =      common. First, ensure the minimum number of matches are present. If min =
3413      max, continue at the same level without recursing. Otherwise, if      max, continue at the same level without recursing. Otherwise, if
# Line 3147  for (;;) Line 3416  for (;;)
3416      maximizing, find the maximum number of characters and work backwards. */      maximizing, find the maximum number of characters and work backwards. */
3417    
3418      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3419        max, eptr));        max, (char *)eptr));
3420    
3421      if (op >= OP_STARI)  /* Caseless */      if (op >= OP_STARI)  /* Caseless */
3422        {        {
3423        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3424          /* fc must be < 128 if UTF is enabled. */
3425          foc = md->fcc[fc];
3426    #else
3427    #ifdef SUPPORT_UTF
3428    #ifdef SUPPORT_UCP
3429          if (utf && fc > 127)
3430            foc = UCD_OTHERCASE(fc);
3431    #else
3432          if (utf && fc > 127)
3433            foc = fc;
3434    #endif /* SUPPORT_UCP */
3435          else
3436    #endif /* SUPPORT_UTF */
3437            foc = TABLE_GET(fc, md->fcc, fc);
3438    #endif /* COMPILE_PCRE8 */
3439    
3440        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3441          {          {
3442            pcre_uchar cc;
3443    
3444          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3445            {            {
3446            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3447            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3448            }            }
3449          if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);          cc = RAWUCHARTEST(eptr);
3450            if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3451            eptr++;
3452          }          }
3453        if (min == max) continue;        if (min == max) continue;
3454        if (minimize)        if (minimize)
3455          {          {
3456          for (fi = min;; fi++)          for (fi = min;; fi++)
3457            {            {
3458              pcre_uchar cc;
3459    
3460            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3461            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3462            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3463            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3464              {              {
3465              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3466              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3467              }              }
3468            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);            cc = RAWUCHARTEST(eptr);
3469              if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3470              eptr++;
3471            }            }
3472          /* Control never gets here */          /* Control never gets here */
3473          }          }
# Line 3183  for (;;) Line 3476  for (;;)
3476          pp = eptr;          pp = eptr;
3477          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3478            {            {
3479              pcre_uchar cc;
3480    
3481            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3482              {              {
3483              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3484              break;              break;
3485              }              }
3486            if (fc != md->lcc[*eptr]) break;            cc = RAWUCHARTEST(eptr);
3487              if (fc != cc && foc != cc) break;
3488            eptr++;            eptr++;
3489            }            }
3490    
# Line 3200  for (;;) Line 3496  for (;;)
3496            eptr--;            eptr--;
3497            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3498            }            }
3499          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3500          }          }
3501        /* Control never gets here */        /* Control never gets here */
3502        }        }
# Line 3214  for (;;) Line 3510  for (;;)
3510          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3511            {            {
3512            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3513            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3514            }            }
3515          if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);          if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3516          }          }
3517    
3518        if (min == max) continue;        if (min == max) continue;
# Line 3227  for (;;) Line 3523  for (;;)
3523            {            {
3524            RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3525            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3526            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3527            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3528              {              {
3529              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3530              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3531              }              }
3532            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);            if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3533            }            }
3534          /* Control never gets here */          /* Control never gets here */
3535          }          }
# Line 3247  for (;;) Line 3543  for (;;)
3543              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3544              break;              break;
3545              }              }
3546            if (fc != *eptr) break;            if (fc != RAWUCHARTEST(eptr)) break;
3547            eptr++;            eptr++;
3548            }            }
3549          if (possessive) continue;          if (possessive) continue;
# Line 3258  for (;;) Line 3554  for (;;)
3554            eptr--;            eptr--;
3555            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3556            }            }
3557          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3558          }          }
3559        }        }
3560      /* Control never gets here */      /* Control never gets here */
# Line 3267  for (;;) Line 3563  for (;;)
3563      checking can be multibyte. */      checking can be multibyte. */
3564    
3565      case OP_NOT:      case OP_NOT:
3566      case OP_NOTI:      case OP_NOTI:
3567      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
3568        {        {
3569        SCHECK_PARTIAL();        SCHECK_PARTIAL();
3570        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
3571        }        }
3572      ecode++;  #ifdef SUPPORT_UTF
3573      GETCHARINCTEST(c, eptr);      if (utf)
     if (op == OP_NOTI)         /* The caseless case */  
3574        {        {
3575  #ifdef SUPPORT_UTF8        register pcre_uint32 ch, och;
3576        if (c < 256)  
3577  #endif        ecode++;
3578        c = md->lcc[c];        GETCHARINC(ch, ecode);
3579        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);        GETCHARINC(c, eptr);
3580    
3581          if (op == OP_NOT)
3582            {
3583            if (ch == c) RRETURN(MATCH_NOMATCH);
3584            }
3585          else
3586            {
3587    #ifdef SUPPORT_UCP
3588            if (ch > 127)
3589              och = UCD_OTHERCASE(ch);
3590    #else
3591            if (ch > 127)
3592              och = ch;
3593    #endif /* SUPPORT_UCP */
3594            else
3595              och = TABLE_GET(ch, md->fcc, ch);
3596            if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3597            }
3598        }        }
3599      else    /* Caseful */      else
3600    #endif
3601        {        {
3602        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);        register pcre_uint32 ch = ecode[1];
3603          c = *eptr++;
3604          if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3605            RRETURN(MATCH_NOMATCH);
3606          ecode += 2;
3607        }        }
3608      break;      break;
3609    
# Line 3299  for (;;) Line 3617  for (;;)
3617      case OP_NOTEXACT:      case OP_NOTEXACT:
3618      case OP_NOTEXACTI:      case OP_NOTEXACTI:
3619      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3620      ecode += 3;      ecode += 1 + IMM2_SIZE;
3621      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3622    
3623      case OP_NOTUPTO:      case OP_NOTUPTO:
# Line 3309  for (;;) Line 3627  for (;;)
3627      min = 0;      min = 0;
3628      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3629      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3630      ecode += 3;      ecode += 1 + IMM2_SIZE;
3631      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3632    
3633      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
# Line 3341  for (;;) Line 3659  for (;;)
3659      possessive = TRUE;      possessive = TRUE;
3660      min = 0;      min = 0;
3661      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3662      ecode += 3;      ecode += 1 + IMM2_SIZE;
3663      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3664    
3665      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 3365  for (;;) Line 3683  for (;;)
3683      /* Common code for all repeated single-byte matches. */      /* Common code for all repeated single-byte matches. */
3684    
3685      REPEATNOTCHAR:      REPEATNOTCHAR:
3686      fc = *ecode++;      GETCHARINCTEST(fc, ecode);
3687    
3688      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
3689      since matching characters is likely to be quite common. First, ensure the      since matching characters is likely to be quite common. First, ensure the
# Line 3376  for (;;) Line 3694  for (;;)
3694      characters and work backwards. */      characters and work backwards. */
3695    
3696      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3697        max, eptr));        max, (char *)eptr));
3698    
3699      if (op >= OP_NOTSTARI)     /* Caseless */      if (op >= OP_NOTSTARI)     /* Caseless */
3700        {        {
3701        fc = md->lcc[fc];  #ifdef SUPPORT_UTF
3702    #ifdef SUPPORT_UCP
3703          if (utf && fc > 127)
3704            foc = UCD_OTHERCASE(fc);
3705    #else
3706          if (utf && fc > 127)
3707            foc = fc;
3708    #endif /* SUPPORT_UCP */
3709          else
3710    #endif /* SUPPORT_UTF */
3711            foc = TABLE_GET(fc, md->fcc, fc);
3712    
3713  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3714        /* UTF-8 mode */        if (utf)
       if (utf8)  
3715          {          {
3716          register unsigned int d;          register pcre_uint32 d;
3717          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3718            {            {
3719            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3720              {              {
3721              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3722              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3723              }              }
3724            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3725            if (d < 256) d = md->lcc[d];            if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
           if (fc == d) MRRETURN(MATCH_NOMATCH);  
3726            }            }
3727          }          }
3728        else        else
3729  #endif  #endif
3730          /* Not UTF mode */
       /* Not UTF-8 mode */  
3731          {          {
3732          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3733            {            {
3734            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3735              {              {
3736              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3737              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3738              }              }
3739            if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);            if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3740              eptr++;
3741            }            }
3742          }          }
3743    
# Line 3419  for (;;) Line 3745  for (;;)
3745    
3746        if (minimize)        if (minimize)
3747          {          {
3748  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3749          /* UTF-8 mode */          if (utf)
         if (utf8)  
3750            {            {
3751            register unsigned int d;            register pcre_uint32 d;
3752            for (fi = min;; fi++)            for (fi = min;; fi++)
3753              {              {
3754              RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3755              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3756              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3757              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3758                {                {
3759                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3760                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3761                }                }
3762              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3763              if (d < 256) d = md->lcc[d];              if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
             if (fc == d) MRRETURN(MATCH_NOMATCH);  
3764              }              }
3765            }            }
3766          else          else
3767  #endif  #endif
3768          /* Not UTF-8 mode */          /* Not UTF mode */
3769            {            {
3770            for (fi = min;; fi++)            for (fi = min;; fi++)
3771              {              {
3772              RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3773              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3774              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3775              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3776                {                {
3777                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3778                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3779                }                }
3780              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);              if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3781                eptr++;
3782              }              }
3783            }            }
3784          /* Control never gets here */          /* Control never gets here */
# Line 3465  for (;;) Line 3790  for (;;)
3790          {          {
3791          pp = eptr;          pp = eptr;
3792    
3793  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3794          /* UTF-8 mode */          if (utf)
         if (utf8)  
3795            {            {
3796            register unsigned int d;            register pcre_uint32 d;
3797            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3798              {              {
3799              int len = 1;              int len = 1;
# Line 3479  for (;;) Line 3803  for (;;)
3803                break;                break;
3804                }                }
3805              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3806              if (d < 256) d = md->lcc[d];              if (fc == d || (unsigned int)foc == d) break;
             if (fc == d) break;  
3807              eptr += len;              eptr += len;
3808              }              }
3809          if (possessive) continue;            if (possessive) continue;
3810          for(;;)            for(;;)
3811              {              {
3812              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3813              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 3494  for (;;) Line 3817  for (;;)
3817            }            }
3818          else          else
3819  #endif  #endif
3820          /* Not UTF-8 mode */          /* Not UTF mode */
3821            {            {
3822            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3823              {              {
# Line 3503  for (;;) Line 3826  for (;;)
3826                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3827                break;                break;
3828                }                }
3829              if (fc == md->lcc[*eptr]) break;              if (fc == *eptr || foc == *eptr) break;
3830              eptr++;              eptr++;
3831              }              }
3832            if (possessive) continue;            if (possessive) continue;
# Line 3515  for (;;) Line 3838  for (;;)
3838              }              }
3839            }            }
3840    
3841          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3842          }          }
3843        /* Control never gets here */        /* Control never gets here */
3844        }        }
# Line 3524  for (;;) Line 3847  for (;;)
3847    
3848      else      else
3849        {        {
3850  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3851        /* UTF-8 mode */        if (utf)
       if (utf8)  
3852          {          {
3853          register unsigned int d;          register pcre_uint32 d;
3854          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3855            {            {
3856            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3857              {              {
3858              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3859              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3860              }              }
3861            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3862            if (fc == d) MRRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
3863            }            }
3864          }          }
3865        else        else
3866  #endif  #endif
3867        /* Not UTF-8 mode */        /* Not UTF mode */
3868          {          {
3869          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3870            {            {
3871            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3872              {              {
3873              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3874              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3875              }              }
3876            if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3877            }            }
3878          }          }
3879    
# Line 3559  for (;;) Line 3881  for (;;)
3881    
3882        if (minimize)        if (minimize)
3883          {          {
3884  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3885          /* UTF-8 mode */          if (utf)
         if (utf8)  
3886            {            {
3887            register unsigned int d;            register pcre_uint32 d;
3888            for (fi = min;; fi++)            for (fi = min;; fi++)
3889              {              {
3890              RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3891              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3892              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3893              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3894                {                {
3895                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3896                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3897                }                }
3898              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3899              if (fc == d) MRRETURN(MATCH_NOMATCH);              if (fc == d) RRETURN(MATCH_NOMATCH);
3900              }              }
3901            }            }
3902          else          else
3903  #endif  #endif
3904          /* Not UTF-8 mode */          /* Not UTF mode */
3905            {            {
3906            for (fi = min;; fi++)            for (fi = min;; fi++)
3907              {              {
3908              RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3909              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3910              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3911              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3912                {                {
3913                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3914                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3915                }                }
3916              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);              if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3917              }              }
3918            }            }
3919          /* Control never gets here */          /* Control never gets here */
# Line 3604  for (;;) Line 3925  for (;;)
3925          {          {
3926          pp = eptr;          pp = eptr;
3927    
3928  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3929          /* UTF-8 mode */          if (utf)
         if (utf8)  
3930            {            {
3931            register unsigned int d;            register pcre_uint32 d;
3932            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3933              {              {
3934              int len = 1;              int len = 1;
# Line 3632  for (;;) Line 3952  for (;;)
3952            }            }
3953          else          else
3954  #endif  #endif
3955          /* Not UTF-8 mode */          /* Not UTF mode */
3956            {            {
3957            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3958              {              {
# Line 3653  for (;;) Line 3973  for (;;)
3973              }              }
3974            }            }
3975    
3976          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3977          }          }
3978        }        }
3979      /* Control never gets here */      /* Control never gets here */
# Line 3665  for (;;) Line 3985  for (;;)
3985      case OP_TYPEEXACT:      case OP_TYPEEXACT:
3986      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3987      minimize = TRUE;      minimize = TRUE;
3988      ecode += 3;      ecode += 1 + IMM2_SIZE;
3989      goto REPEATTYPE;      goto REPEATTYPE;
3990    
3991      case OP_TYPEUPTO:      case OP_TYPEUPTO:
# Line 3673  for (;;) Line 3993  for (;;)
3993      min = 0;      min = 0;
3994      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3995      minimize = *ecode == OP_TYPEMINUPTO;      minimize = *ecode == OP_TYPEMINUPTO;
3996      ecode += 3;      ecode += 1 + IMM2_SIZE;
3997      goto REPEATTYPE;      goto REPEATTYPE;
3998    
3999      case OP_TYPEPOSSTAR:      case OP_TYPEPOSSTAR:
# Line 3701  for (;;) Line 4021  for (;;)
4021      possessive = TRUE;      possessive = TRUE;
4022      min = 0;      min = 0;
4023      max = GET2(ecode, 1);      max = GET2(ecode, 1);
4024      ecode += 3;      ecode += 1 + IMM2_SIZE;
4025      goto REPEATTYPE;      goto REPEATTYPE;
4026    
4027      case OP_TYPESTAR:      case OP_TYPESTAR:
# Line 3747  for (;;) Line 4067  for (;;)
4067          switch(prop_type)          switch(prop_type)
4068            {            {
4069            case PT_ANY:            case PT_ANY:
4070            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4071            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4072              {              {
4073              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4074                {                {
4075                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4076                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4077                }                }
4078              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4079              }              }
# Line 3762  for (;;) Line 4082  for (;;)
4082            case PT_LAMP:            case PT_LAMP:
4083            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4084              {              {
4085                int chartype;
4086              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4087                {                {
4088                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4089                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4090                }                }
4091              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4092              prop_chartype = UCD_CHARTYPE(c);              chartype = UCD_CHARTYPE(c);
4093              if ((prop_chartype == ucp_Lu ||              if ((chartype == ucp_Lu ||
4094                   prop_chartype == ucp_Ll ||                   chartype == ucp_Ll ||
4095                   prop_chartype == ucp_Lt) == prop_fail_result)                   chartype == ucp_Lt) == prop_fail_result)
4096                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4097              }              }
4098            break;            break;
4099    
# Line 3782  for (;;) Line 4103  for (;;)
4103              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4104                {                {
4105                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4106                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4107                }                }
4108              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4109              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4110              if ((prop_category == prop_value) == prop_fail_result)                RRETURN(MATCH_NOMATCH);
               MRRETURN(MATCH_NOMATCH);  
4111              }              }
4112            break;            break;
4113    
# Line 3797  for (;;) Line 4117  for (;;)
4117              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4118                {                {
4119                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4120                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4121                }                }
4122              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4123              prop_chartype = UCD_CHARTYPE(c);              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4124              if ((prop_chartype == prop_value) == prop_fail_result)                RRETURN(MATCH_NOMATCH);
               MRRETURN(MATCH_NOMATCH);  
4125              }              }
4126            break;            break;
4127    
# Line 3812  for (;;) Line 4131  for (;;)
4131              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4132                {                {
4133                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4134                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4135                }                }
4136              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4137              prop_script = UCD_SCRIPT(c);              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4138              if ((prop_script == prop_value) == prop_fail_result)