/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 617 by ph10, Tue Jul 12 11:00:10 2011 UTC revision 926 by ph10, Wed Feb 22 15:01:32 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 57  possible. There are also some static sup Line 57  possible. There are also some static sup
57  #undef min  #undef min
58  #undef max  #undef max
59    
60  /* Values for setting in md->match_function_type to indicate two special types  /* Values for setting in md->match_function_type to indicate two special types
61  of call to match(). We do it this way to save on using another stack variable,  of call to match(). We do it this way to save on using another stack variable,
62  as stack usage is to be discouraged. */  as stack usage is to be discouraged. */
63    
64  #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */  #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
# Line 76  negative to avoid the external error cod Line 76  negative to avoid the external error cod
76  #define MATCH_ACCEPT       (-999)  #define MATCH_ACCEPT       (-999)
77  #define MATCH_COMMIT       (-998)  #define MATCH_COMMIT       (-998)
78  #define MATCH_KETRPOS      (-997)  #define MATCH_KETRPOS      (-997)
79  #define MATCH_PRUNE        (-996)  #define MATCH_ONCE         (-996)
80  #define MATCH_SKIP         (-995)  #define MATCH_PRUNE        (-995)
81  #define MATCH_SKIP_ARG     (-994)  #define MATCH_SKIP         (-994)
82  #define MATCH_THEN         (-993)  #define MATCH_SKIP_ARG     (-993)
83    #define MATCH_THEN         (-992)
 /* This is a convenience macro for code that occurs many times. */  
   
 #define MRRETURN(ra) \  
   { \  
   md->mark = markptr; \  
   RRETURN(ra); \  
   }  
84    
85  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
86  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
# Line 120  Returns:     nothing Line 113  Returns:     nothing
113  */  */
114    
115  static void  static void
116  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
117  {  {
118  unsigned int c;  unsigned int c;
119  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 137  while (length-- > 0) Line 130  while (length-- > 0)
130    
131  /* Normally, if a back reference hasn't been set, the length that is passed is  /* Normally, if a back reference hasn't been set, the length that is passed is
132  negative, so the match always fails. However, in JavaScript compatibility mode,  negative, so the match always fails. However, in JavaScript compatibility mode,
133  the length passed is zero. Note that in caseless UTF-8 mode, the number of  the length passed is zero. Note that in caseless UTF-8 mode, the number of
134  subject bytes matched may be different to the number of reference bytes.  subject bytes matched may be different to the number of reference bytes.
135    
136  Arguments:  Arguments:
# Line 147  Arguments: Line 140  Arguments:
140    md          points to match data block    md          points to match data block
141    caseless    TRUE if caseless    caseless    TRUE if caseless
142    
143  Returns:      < 0 if not matched, otherwise the number of subject bytes matched  Returns:      >= 0 the number of subject bytes matched
144                  -1 no match
145                  -2 partial match; always given if at end subject
146  */  */
147    
148  static int  static int
149  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
150    BOOL caseless)    BOOL caseless)
151  {  {
152  USPTR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
153  register USPTR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
154    
155  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
156  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 170  pchars(p, length, FALSE, md); Line 165  pchars(p, length, FALSE, md);
165  printf("\n");  printf("\n");
166  #endif  #endif
167    
168  /* Always fail if reference not set (and not JavaScript compatible). */  /* Always fail if reference not set (and not JavaScript compatible - in that
169    case the length is passed as zero). */
170    
171  if (length < 0) return -1;  if (length < 0) return -1;
172    
# Line 180  ASCII characters. */ Line 176  ASCII characters. */
176    
177  if (caseless)  if (caseless)
178    {    {
179  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
180  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
181    if (md->utf8)    if (md->utf)
182      {      {
183      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
184      bytes matched may differ, because there are some characters whose upper and      bytes matched may differ, because there are some characters whose upper and
185      lower case versions code as different numbers of bytes. For example, U+023A      lower case versions code as different numbers of bytes. For example, U+023A
186      (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);      (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
187      a sequence of 3 of the former uses 6 bytes, as does a sequence of two of      a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
188      the latter. It is important, therefore, to check the length along the      the latter. It is important, therefore, to check the length along the
189      reference, not along the subject (earlier code did this wrong). */      reference, not along the subject (earlier code did this wrong). */
190    
191      USPTR endptr = p + length;      PCRE_PUCHAR endptr = p + length;
192      while (p < endptr)      while (p < endptr)
193        {        {
194        int c, d;        int c, d;
195        if (eptr >= md->end_subject) return -1;        if (eptr >= md->end_subject) return -2;   /* Partial match */
196        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
197        GETCHARINC(d, p);        GETCHARINC(d, p);
198        if (c != d && c != UCD_OTHERCASE(d)) return -1;        if (c != d && c != UCD_OTHERCASE(d)) return -1;
# Line 209  if (caseless) Line 205  if (caseless)
205    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
206    is no UCP support. */    is no UCP support. */
207      {      {
     if (eptr + length > md->end_subject) return -1;  
208      while (length-- > 0)      while (length-- > 0)
209        { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }        {
210      }        if (eptr >= md->end_subject) return -2;   /* Partial match */
211          if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
212          p++;
213          eptr++;
214          }
215        }
216    }    }
217    
218  /* In the caseful case, we can just compare the bytes, whether or not we  /* In the caseful case, we can just compare the bytes, whether or not we
219  are in UTF-8 mode. */  are in UTF-8 mode. */
220    
221  else  else
222    {    {
223    if (eptr + length > md->end_subject) return -1;    while (length-- > 0)
224    while (length-- > 0) if (*p++ != *eptr++) return -1;      {
225        if (eptr >= md->end_subject) return -2;   /* Partial match */
226        if (*p++ != *eptr++) return -1;
227        }
228    }    }
229    
230  return eptr - eptr_start;  return (int)(eptr - eptr_start);
231  }  }
232    
233    
# Line 276  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 279  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
279         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
280         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
281         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
282         RM61,  RM62, RM63};         RM61,  RM62, RM63, RM64, RM65, RM66 };
283    
284  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
285  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 289  actually used in this definition. */ Line 292  actually used in this definition. */
292  #define RMATCH(ra,rb,rc,rd,re,rw) \  #define RMATCH(ra,rb,rc,rd,re,rw) \
293    { \    { \
294    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
295    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1); \    rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
296    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
297    }    }
298  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 299  actually used in this definition. */ Line 302  actually used in this definition. */
302    }    }
303  #else  #else
304  #define RMATCH(ra,rb,rc,rd,re,rw) \  #define RMATCH(ra,rb,rc,rd,re,rw) \
305    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1)    rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
306  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
307  #endif  #endif
308    
# Line 314  argument of match(), which never changes Line 317  argument of match(), which never changes
317    
318  #define RMATCH(ra,rb,rc,rd,re,rw)\  #define RMATCH(ra,rb,rc,rd,re,rw)\
319    {\    {\
320    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
321    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
322    frame->Xwhere = rw; \    frame->Xwhere = rw; \
323    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
324    newframe->Xecode = rb;\    newframe->Xecode = rb;\
325    newframe->Xmstart = mstart;\    newframe->Xmstart = mstart;\
   newframe->Xmarkptr = markptr;\  
326    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
327    newframe->Xeptrb = re;\    newframe->Xeptrb = re;\
328    newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
# Line 336  argument of match(), which never changes Line 338  argument of match(), which never changes
338    {\    {\
339    heapframe *oldframe = frame;\    heapframe *oldframe = frame;\
340    frame = oldframe->Xprevframe;\    frame = oldframe->Xprevframe;\
341    (pcre_stack_free)(oldframe);\    if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
342    if (frame != NULL)\    if (frame != NULL)\
343      {\      {\
344      rrc = ra;\      rrc = ra;\
# Line 353  typedef struct heapframe { Line 355  typedef struct heapframe {
355    
356    /* Function arguments that may change */    /* Function arguments that may change */
357    
358    USPTR Xeptr;    PCRE_PUCHAR Xeptr;
359    const uschar *Xecode;    const pcre_uchar *Xecode;
360    USPTR Xmstart;    PCRE_PUCHAR Xmstart;
   USPTR Xmarkptr;  
361    int Xoffset_top;    int Xoffset_top;
362    eptrblock *Xeptrb;    eptrblock *Xeptrb;
363    unsigned int Xrdepth;    unsigned int Xrdepth;
364    
365    /* Function local variables */    /* Function local variables */
366    
367    USPTR Xcallpat;    PCRE_PUCHAR Xcallpat;
368  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
369    USPTR Xcharptr;    PCRE_PUCHAR Xcharptr;
370  #endif  #endif
371    USPTR Xdata;    PCRE_PUCHAR Xdata;
372    USPTR Xnext;    PCRE_PUCHAR Xnext;
373    USPTR Xpp;    PCRE_PUCHAR Xpp;
374    USPTR Xprev;    PCRE_PUCHAR Xprev;
375    USPTR Xsaved_eptr;    PCRE_PUCHAR Xsaved_eptr;
376    
377    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
378    
# Line 383  typedef struct heapframe { Line 384  typedef struct heapframe {
384    int Xprop_type;    int Xprop_type;
385    int Xprop_value;    int Xprop_value;
386    int Xprop_fail_result;    int Xprop_fail_result;
   int Xprop_category;  
   int Xprop_chartype;  
   int Xprop_script;  
387    int Xoclength;    int Xoclength;
388    uschar Xocchars[8];    pcre_uchar Xocchars[6];
389  #endif  #endif
390    
391    int Xcodelink;    int Xcodelink;
# Line 429  returns a negative (error) response, the Line 427  returns a negative (error) response, the
427  same response. */  same response. */
428    
429  /* These macros pack up tests that are used for partial matching, and which  /* These macros pack up tests that are used for partial matching, and which
430  appears several times in the code. We set the "hit end" flag if the pointer is  appear several times in the code. We set the "hit end" flag if the pointer is
431  at the end of the subject and also past the start of the subject (i.e.  at the end of the subject and also past the start of the subject (i.e.
432  something has been matched). For hard partial matching, we then return  something has been matched). For hard partial matching, we then return
433  immediately. The second one is used when we already know we are past the end of  immediately. The second one is used when we already know we are past the end of
# Line 440  the subject. */ Line 438  the subject. */
438        eptr > md->start_used_ptr) \        eptr > md->start_used_ptr) \
439      { \      { \
440      md->hitend = TRUE; \      md->hitend = TRUE; \
441      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
442      }      }
443    
444  #define SCHECK_PARTIAL()\  #define SCHECK_PARTIAL()\
445    if (md->partial != 0 && eptr > md->start_used_ptr) \    if (md->partial != 0 && eptr > md->start_used_ptr) \
446      { \      { \
447      md->hitend = TRUE; \      md->hitend = TRUE; \
448      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
449      }      }
450    
451    
452  /* Performance note: It might be tempting to extract commonly used fields from  /* Performance note: It might be tempting to extract commonly used fields from
453  the md structure (e.g. utf8, end_subject) into individual variables to improve  the md structure (e.g. utf, end_subject) into individual variables to improve
454  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
455  made performance worse.  made performance worse.
456    
# Line 461  Arguments: Line 459  Arguments:
459     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
460     mstart      pointer to the current match start position (can be modified     mstart      pointer to the current match start position (can be modified
461                   by encountering \K)                   by encountering \K)
    markptr     pointer to the most recent MARK name, or NULL  
462     offset_top  current top pointer     offset_top  current top pointer
463     md          pointer to "static" info for the match     md          pointer to "static" info for the match
464     eptrb       pointer to chain of blocks containing eptr at start of     eptrb       pointer to chain of blocks containing eptr at start of
# Line 476  Returns:       MATCH_MATCH if matched Line 473  Returns:       MATCH_MATCH if matched
473  */  */
474    
475  static int  static int
476  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
477    const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb,    PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
478    unsigned int rdepth)    unsigned int rdepth)
479  {  {
480  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
# Line 487  so they can be ordinary variables in all Line 484  so they can be ordinary variables in all
484  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
485  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
486  register unsigned int c;   /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
487  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf;         /* Local copy of UTF flag for speed */
488    
489  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
490  BOOL caseless;  BOOL caseless;
491  int condcode;  int condcode;
492    
493  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
494  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame". We set up the top-level
495  heap storage. Set up the top-level frame here; others are obtained from the  frame on the stack here; subsequent instantiations are obtained from the heap
496  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  whenever RMATCH() does a "recursion". See the macro definitions above. Putting
497    the top-level on the stack rather than malloc-ing them all gives a performance
498    boost in many cases where there is not much "recursion". */
499    
500  #ifdef NO_RECURSE  #ifdef NO_RECURSE
501  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));  heapframe frame_zero;
502  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  heapframe *frame = &frame_zero;
503  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
504    
505  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 508  frame->Xprevframe = NULL;            /* Line 507  frame->Xprevframe = NULL;            /*
507  frame->Xeptr = eptr;  frame->Xeptr = eptr;
508  frame->Xecode = ecode;  frame->Xecode = ecode;
509  frame->Xmstart = mstart;  frame->Xmstart = mstart;
 frame->Xmarkptr = markptr;  
510  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
511  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
512  frame->Xrdepth = rdepth;  frame->Xrdepth = rdepth;
# Line 522  HEAP_RECURSE: Line 520  HEAP_RECURSE:
520  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
521  #define ecode              frame->Xecode  #define ecode              frame->Xecode
522  #define mstart             frame->Xmstart  #define mstart             frame->Xmstart
 #define markptr            frame->Xmarkptr  
523  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
524  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
525  #define rdepth             frame->Xrdepth  #define rdepth             frame->Xrdepth
526    
527  /* Ditto for the local variables */  /* Ditto for the local variables */
528    
529  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
530  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
531  #endif  #endif
532  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
# Line 550  HEAP_RECURSE: Line 547  HEAP_RECURSE:
547  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
548  #define prop_value         frame->Xprop_value  #define prop_value         frame->Xprop_value
549  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
 #define prop_category      frame->Xprop_category  
 #define prop_chartype      frame->Xprop_chartype  
 #define prop_script        frame->Xprop_script  
550  #define oclength           frame->Xoclength  #define oclength           frame->Xoclength
551  #define occhars            frame->Xocchars  #define occhars            frame->Xocchars
552  #endif  #endif
# Line 590  declarations can be cut out in a block. Line 584  declarations can be cut out in a block.
584  below are for variables that do not have to be preserved over a recursive call  below are for variables that do not have to be preserved over a recursive call
585  to RMATCH(). */  to RMATCH(). */
586    
587  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
588  const uschar *charptr;  const pcre_uchar *charptr;
589  #endif  #endif
590  const uschar *callpat;  const pcre_uchar *callpat;
591  const uschar *data;  const pcre_uchar *data;
592  const uschar *next;  const pcre_uchar *next;
593  USPTR         pp;  PCRE_PUCHAR       pp;
594  const uschar *prev;  const pcre_uchar *prev;
595  USPTR         saved_eptr;  PCRE_PUCHAR       saved_eptr;
596    
597  recursion_info new_recursive;  recursion_info new_recursive;
598    
599  BOOL cur_is_word;  BOOL cur_is_word;
600  BOOL condition;  BOOL condition;
601  BOOL prev_is_word;  BOOL prev_is_word;
602    
# Line 610  BOOL prev_is_word; Line 604  BOOL prev_is_word;
604  int prop_type;  int prop_type;
605  int prop_value;  int prop_value;
606  int prop_fail_result;  int prop_fail_result;
 int prop_category;  
 int prop_chartype;  
 int prop_script;  
607  int oclength;  int oclength;
608  uschar occhars[8];  pcre_uchar occhars[6];
609  #endif  #endif
610    
611  int codelink;  int codelink;
# Line 630  int save_offset1, save_offset2, save_off Line 621  int save_offset1, save_offset2, save_off
621  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
622    
623  eptrblock newptrb;  eptrblock newptrb;
624    
625    /* There is a special fudge for calling match() in a way that causes it to
626    measure the size of its basic stack frame when the stack is being used for
627    recursion. The second argument (ecode) being NULL triggers this behaviour. It
628    cannot normally ever be NULL. The return is the negated value of the frame
629    size. */
630    
631    if (ecode == NULL)
632      {
633      if (rdepth == 0)
634        return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
635      else
636        {
637        int len = (char *)&rdepth - (char *)eptr;
638        return (len > 0)? -len : len;
639        }
640      }
641  #endif     /* NO_RECURSE */  #endif     /* NO_RECURSE */
642    
643  /* To save space on the stack and in the heap frame, I have doubled up on some  /* To save space on the stack and in the heap frame, I have doubled up on some
644  of the local variables that are used only in localised parts of the code, but  of the local variables that are used only in localised parts of the code, but
645  still need to be preserved over recursive calls of match(). These macros define  still need to be preserved over recursive calls of match(). These macros define
646  the alternative names that are used. */  the alternative names that are used. */
647    
648  #define allow_zero    cur_is_word  #define allow_zero    cur_is_word
# Line 642  the alternative names that are used. */ Line 650  the alternative names that are used. */
650  #define code_offset   codelink  #define code_offset   codelink
651  #define condassert    condition  #define condassert    condition
652  #define matched_once  prev_is_word  #define matched_once  prev_is_word
653    #define foc           number
654    #define save_mark     data
655    
656  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
657  variables. */  variables. */
# Line 667  defined). However, RMATCH isn't like a f Line 677  defined). However, RMATCH isn't like a f
677  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
678  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
679    
680  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
681  utf8 = md->utf8;       /* Local copy of the flag */  utf = md->utf;       /* Local copy of the flag */
682  #else  #else
683  utf8 = FALSE;  utf = FALSE;
684  #endif  #endif
685    
686  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
# Line 680  if (md->match_call_count++ >= md->match_ Line 690  if (md->match_call_count++ >= md->match_
690  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
691    
692  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
693  string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is  string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
694  done this way to save having to use another function argument, which would take  done this way to save having to use another function argument, which would take
695  up space on the stack. See also MATCH_CONDASSERT below.  up space on the stack. See also MATCH_CONDASSERT below.
696    
697  When MATCH_CBEGROUP is set, add the current subject pointer to the chain of  When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
# Line 705  for (;;) Line 715  for (;;)
715    {    {
716    minimize = possessive = FALSE;    minimize = possessive = FALSE;
717    op = *ecode;    op = *ecode;
718    
719    switch(op)    switch(op)
720      {      {
721      case OP_MARK:      case OP_MARK:
722      markptr = ecode + 2;      md->nomatch_mark = ecode + 2;
723      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      md->mark = NULL;    /* In case previously set by assertion */
724        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
725        eptrb, RM55);        eptrb, RM55);
726        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
727             md->mark == NULL) md->mark = ecode + 2;
728    
729      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
730      argument, and we must check whether that argument matches this MARK's      argument, and we must check whether that argument matches this MARK's
# Line 720  for (;;) Line 733  for (;;)
733      position and return MATCH_SKIP. Otherwise, pass back the return code      position and return MATCH_SKIP. Otherwise, pass back the return code
734      unaltered. */      unaltered. */
735    
736      if (rrc == MATCH_SKIP_ARG &&      else if (rrc == MATCH_SKIP_ARG &&
737          strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)          STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
738        {        {
739        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
740        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
741        }        }
   
     if (md->mark == NULL) md->mark = markptr;  
742      RRETURN(rrc);      RRETURN(rrc);
743    
744      case OP_FAIL:      case OP_FAIL:
745      MRRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
746    
747      /* COMMIT overrides PRUNE, SKIP, and THEN */      /* COMMIT overrides PRUNE, SKIP, and THEN */
748    
749      case OP_COMMIT:      case OP_COMMIT:
750      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
751        eptrb, RM52);        eptrb, RM52);
752      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
753          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
754          rrc != MATCH_THEN)          rrc != MATCH_THEN)
755        RRETURN(rrc);        RRETURN(rrc);
756      MRRETURN(MATCH_COMMIT);      RRETURN(MATCH_COMMIT);
757    
758      /* PRUNE overrides THEN */      /* PRUNE overrides THEN */
759    
760      case OP_PRUNE:      case OP_PRUNE:
761      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
762        eptrb, RM51);        eptrb, RM51);
763      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
764      MRRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
765    
766      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
767      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      md->nomatch_mark = ecode + 2;
768        md->mark = NULL;    /* In case previously set by assertion */
769        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
770        eptrb, RM56);        eptrb, RM56);
771        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
772             md->mark == NULL) md->mark = ecode + 2;
773      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
     md->mark = ecode + 2;  
774      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
775    
776      /* SKIP overrides PRUNE and THEN */      /* SKIP overrides PRUNE and THEN */
777    
778      case OP_SKIP:      case OP_SKIP:
779      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
780        eptrb, RM53);        eptrb, RM53);
781      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
782        RRETURN(rrc);        RRETURN(rrc);
783      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
784      MRRETURN(MATCH_SKIP);      RRETURN(MATCH_SKIP);
785    
786        /* Note that, for Perl compatibility, SKIP with an argument does NOT set
787        nomatch_mark. There is a flag that disables this opcode when re-matching a
788        pattern that ended with a SKIP for which there was not a matching MARK. */
789    
790      case OP_SKIP_ARG:      case OP_SKIP_ARG:
791      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      if (md->ignore_skip_arg)
792          {
793          ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
794          break;
795          }
796        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
797        eptrb, RM57);        eptrb, RM57);
798      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
799        RRETURN(rrc);        RRETURN(rrc);
800    
801      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
802      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
803      caught by a matching MARK, or get to the top, where it is treated the same      caught by a matching MARK, or get to the top, where it causes a rematch
804      as PRUNE. */      with the md->ignore_skip_arg flag set. */
805    
806      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
807      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
808    
809      /* For THEN (and THEN_ARG) we pass back the address of the bracket or      /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
810      the alt that is at the start of the current branch. This makes it possible      the branch in which it occurs can be determined. Overload the start of
811      to skip back past alternatives that precede the THEN within the current      match pointer to do this. */
     branch. */  
812    
813      case OP_THEN:      case OP_THEN:
814      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
815        eptrb, RM54);        eptrb, RM54);
816      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
817      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode;
818      MRRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
819    
820      case OP_THEN_ARG:      case OP_THEN_ARG:
821      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],      md->nomatch_mark = ecode + 2;
822        offset_top, md, eptrb, RM58);      md->mark = NULL;    /* In case previously set by assertion */
823        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
824          md, eptrb, RM58);
825        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
826             md->mark == NULL) md->mark = ecode + 2;
827      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
828      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode;
     md->mark = ecode + LINK_SIZE + 2;  
829      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
830    
831        /* Handle an atomic group that does not contain any capturing parentheses.
832        This can be handled like an assertion. Prior to 8.13, all atomic groups
833        were handled this way. In 8.13, the code was changed as below for ONCE, so
834        that backups pass through the group and thereby reset captured values.
835        However, this uses a lot more stack, so in 8.20, atomic groups that do not
836        contain any captures generate OP_ONCE_NC, which can be handled in the old,
837        less stack intensive way.
838    
839        Check the alternative branches in turn - the matching won't pass the KET
840        for this kind of subpattern. If any one branch matches, we carry on as at
841        the end of a normal bracket, leaving the subject pointer, but resetting
842        the start-of-match value in case it was changed by \K. */
843    
844        case OP_ONCE_NC:
845        prev = ecode;
846        saved_eptr = eptr;
847        save_mark = md->mark;
848        do
849          {
850          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
851          if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
852            {
853            mstart = md->start_match_ptr;
854            break;
855            }
856          if (rrc == MATCH_THEN)
857            {
858            next = ecode + GET(ecode,1);
859            if (md->start_match_ptr < next &&
860                (*ecode == OP_ALT || *next == OP_ALT))
861              rrc = MATCH_NOMATCH;
862            }
863    
864          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
865          ecode += GET(ecode,1);
866          md->mark = save_mark;
867          }
868        while (*ecode == OP_ALT);
869    
870        /* If hit the end of the group (which could be repeated), fail */
871    
872        if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
873    
874        /* Continue as from after the group, updating the offsets high water
875        mark, since extracts may have been taken. */
876    
877        do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
878    
879        offset_top = md->end_offset_top;
880        eptr = md->end_match_ptr;
881    
882        /* For a non-repeating ket, just continue at this level. This also
883        happens for a repeating ket if no characters were matched in the group.
884        This is the forcible breaking of infinite loops as implemented in Perl
885        5.005. */
886    
887        if (*ecode == OP_KET || eptr == saved_eptr)
888          {
889          ecode += 1+LINK_SIZE;
890          break;
891          }
892    
893        /* The repeating kets try the rest of the pattern or restart from the
894        preceding bracket, in the appropriate order. The second "call" of match()
895        uses tail recursion, to avoid using another stack frame. */
896    
897        if (*ecode == OP_KETRMIN)
898          {
899          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
900          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
901          ecode = prev;
902          goto TAIL_RECURSE;
903          }
904        else  /* OP_KETRMAX */
905          {
906          md->match_function_type = MATCH_CBEGROUP;
907          RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
908          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
909          ecode += 1 + LINK_SIZE;
910          goto TAIL_RECURSE;
911          }
912        /* Control never gets here */
913    
914      /* Handle a capturing bracket, other than those that are possessive with an      /* Handle a capturing bracket, other than those that are possessive with an
915      unlimited repeat. If there is space in the offset vector, save the current      unlimited repeat. If there is space in the offset vector, save the current
916      subject position in the working slot at the top of the vector. We mustn't      subject position in the working slot at the top of the vector. We mustn't
917      change the current values of the data slot, because they may be set from a      change the current values of the data slot, because they may be set from a
918      previous iteration of this group, and be referred to by a reference inside      previous iteration of this group, and be referred to by a reference inside
919      the group. A failure to match might occur after the group has succeeded,      the group. A failure to match might occur after the group has succeeded,
920      if something later on doesn't match. For this reason, we need to restore      if something later on doesn't match. For this reason, we need to restore
921      the working value and also the values of the final offsets, in case they      the working value and also the values of the final offsets, in case they
922      were set by a previous iteration of the same bracket.      were set by a previous iteration of the same bracket.
# Line 821  for (;;) Line 929  for (;;)
929      case OP_SCBRA:      case OP_SCBRA:
930      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
931      offset = number << 1;      offset = number << 1;
932    
933  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
934      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
935      printf("subject=");      printf("subject=");
# Line 835  for (;;) Line 943  for (;;)
943        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
944        save_offset3 = md->offset_vector[md->offset_end - number];        save_offset3 = md->offset_vector[md->offset_end - number];
945        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
946          save_mark = md->mark;
947    
948        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
949        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
# Line 842  for (;;) Line 951  for (;;)
951    
952        for (;;)        for (;;)
953          {          {
954          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
955          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
956            eptrb, RM1);            eptrb, RM1);
957          if (rrc != MATCH_NOMATCH &&          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
958              (rrc != MATCH_THEN || md->start_match_ptr != ecode))  
959            RRETURN(rrc);          /* If we backed up to a THEN, check whether it is within the current
960            branch by comparing the address of the THEN that is passed back with
961            the end of the branch. If it is within the current branch, and the
962            branch is one of two or more alternatives (it either starts or ends
963            with OP_ALT), we have reached the limit of THEN's action, so convert
964            the return code to NOMATCH, which will cause normal backtracking to
965            happen from now on. Otherwise, THEN is passed back to an outer
966            alternative. This implements Perl's treatment of parenthesized groups,
967            where a group not containing | does not affect the current alternative,
968            that is, (X) is NOT the same as (X|(*F)). */
969    
970            if (rrc == MATCH_THEN)
971              {
972              next = ecode + GET(ecode,1);
973              if (md->start_match_ptr < next &&
974                  (*ecode == OP_ALT || *next == OP_ALT))
975                rrc = MATCH_NOMATCH;
976              }
977    
978            /* Anything other than NOMATCH is passed back. */
979    
980            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
981          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
982          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
983          if (*ecode != OP_ALT) break;          md->mark = save_mark;
984            if (*ecode != OP_ALT) break;
985          }          }
986    
987        DPRINTF(("bracket %d failed\n", number));        DPRINTF(("bracket %d failed\n", number));
   
988        md->offset_vector[offset] = save_offset1;        md->offset_vector[offset] = save_offset1;
989        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
990        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
991    
992        if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;        /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
993        RRETURN(MATCH_NOMATCH);  
994          RRETURN(rrc);
995        }        }
996    
997      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
# Line 874  for (;;) Line 1005  for (;;)
1005      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1006      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1007    
1008      /* Non-capturing bracket, except for possessive with unlimited repeat. Loop      /* Non-capturing or atomic group, except for possessive with unlimited
1009      for all the alternatives. When we get to the final alternative within the      repeat and ONCE group with no captures. Loop for all the alternatives.
     brackets, we used to return the result of a recursive call to match()  
     whatever happened so it was possible to reduce stack usage by turning this  
     into a tail recursion, except in the case of a possibly empty group.  
     However, now that there is the possiblity of (*THEN) occurring in the final  
     alternative, this optimization is no longer possible. */  
1010    
1011        When we get to the final alternative within the brackets, we used to return
1012        the result of a recursive call to match() whatever happened so it was
1013        possible to reduce stack usage by turning this into a tail recursion,
1014        except in the case of a possibly empty group. However, now that there is
1015        the possiblity of (*THEN) occurring in the final alternative, this
1016        optimization is no longer always possible.
1017    
1018        We can optimize if we know there are no (*THEN)s in the pattern; at present
1019        this is the best that can be done.
1020    
1021        MATCH_ONCE is returned when the end of an atomic group is successfully
1022        reached, but subsequent matching fails. It passes back up the tree (causing
1023        captured values to be reset) until the original atomic group level is
1024        reached. This is tested by comparing md->once_target with the start of the
1025        group. At this point, the return is converted into MATCH_NOMATCH so that
1026        previous backup points can be taken. */
1027    
1028        case OP_ONCE:
1029      case OP_BRA:      case OP_BRA:
1030      case OP_SBRA:      case OP_SBRA:
1031      DPRINTF(("start non-capturing bracket\n"));      DPRINTF(("start non-capturing bracket\n"));
1032    
1033      for (;;)      for (;;)
1034        {        {
1035        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
1036        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,  
1037          /* If this is not a possibly empty group, and there are no (*THEN)s in
1038          the pattern, and this is the final alternative, optimize as described
1039          above. */
1040    
1041          else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1042            {
1043            ecode += PRIV(OP_lengths)[*ecode];
1044            goto TAIL_RECURSE;
1045            }
1046    
1047          /* In all other cases, we have to make another call to match(). */
1048    
1049          save_mark = md->mark;
1050          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1051          RM2);          RM2);
1052        if (rrc != MATCH_NOMATCH &&  
1053            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* See comment in the code for capturing groups above about handling
1054          THEN. */
1055    
1056          if (rrc == MATCH_THEN)
1057            {
1058            next = ecode + GET(ecode,1);
1059            if (md->start_match_ptr < next &&
1060                (*ecode == OP_ALT || *next == OP_ALT))
1061              rrc = MATCH_NOMATCH;
1062            }
1063    
1064          if (rrc != MATCH_NOMATCH)
1065            {
1066            if (rrc == MATCH_ONCE)
1067              {
1068              const pcre_uchar *scode = ecode;
1069              if (*scode != OP_ONCE)           /* If not at start, find it */
1070                {
1071                while (*scode == OP_ALT) scode += GET(scode, 1);
1072                scode -= GET(scode, 1);
1073                }
1074              if (md->once_target == scode) rrc = MATCH_NOMATCH;
1075              }
1076          RRETURN(rrc);          RRETURN(rrc);
1077            }
1078        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1079        if (*ecode != OP_ALT) break;        md->mark = save_mark;
1080          if (*ecode != OP_ALT) break;
1081        }        }
1082    
     if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;  
1083      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
1084    
1085      /* Handle possessive capturing brackets with an unlimited repeat. We come      /* Handle possessive capturing brackets with an unlimited repeat. We come
1086      here from BRAZERO with allow_zero set TRUE. The offset_vector values are      here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1087      handled similarly to the normal case above. However, the matching is      handled similarly to the normal case above. However, the matching is
1088      different. The end of these brackets will always be OP_KETRPOS, which      different. The end of these brackets will always be OP_KETRPOS, which
1089      returns MATCH_KETRPOS without going further in the pattern. By this means      returns MATCH_KETRPOS without going further in the pattern. By this means
1090      we can handle the group by iteration rather than recursion, thereby      we can handle the group by iteration rather than recursion, thereby
1091      reducing the amount of stack needed. */      reducing the amount of stack needed. */
1092    
1093      case OP_CBRAPOS:      case OP_CBRAPOS:
1094      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1095      allow_zero = FALSE;      allow_zero = FALSE;
1096    
1097      POSSESSIVE_CAPTURE:      POSSESSIVE_CAPTURE:
1098      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
1099      offset = number << 1;      offset = number << 1;
# Line 926  for (;;) Line 1108  for (;;)
1108      if (offset < md->offset_max)      if (offset < md->offset_max)
1109        {        {
1110        matched_once = FALSE;        matched_once = FALSE;
1111        code_offset = ecode - md->start_code;        code_offset = (int)(ecode - md->start_code);
1112    
1113        save_offset1 = md->offset_vector[offset];        save_offset1 = md->offset_vector[offset];
1114        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
# Line 934  for (;;) Line 1116  for (;;)
1116        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
1117    
1118        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1119    
1120        /* Each time round the loop, save the current subject position for use        /* Each time round the loop, save the current subject position for use
1121        when the group matches. For MATCH_MATCH, the group has matched, so we        when the group matches. For MATCH_MATCH, the group has matched, so we
1122        restart it with a new subject starting position, remembering that we had        restart it with a new subject starting position, remembering that we had
1123        at least one match. For MATCH_NOMATCH, carry on with the alternatives, as        at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1124        usual. If we haven't matched any alternatives in any iteration, check to        usual. If we haven't matched any alternatives in any iteration, check to
1125        see if a previous iteration matched. If so, the group has matched;        see if a previous iteration matched. If so, the group has matched;
1126        continue from afterwards. Otherwise it has failed; restore the previous        continue from afterwards. Otherwise it has failed; restore the previous
1127        capture values before returning NOMATCH. */        capture values before returning NOMATCH. */
1128    
1129        for (;;)        for (;;)
1130          {          {
1131          md->offset_vector[md->offset_end - number] =          md->offset_vector[md->offset_end - number] =
1132            (int)(eptr - md->start_subject);            (int)(eptr - md->start_subject);
1133          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1134          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1135            eptrb, RM63);            eptrb, RM63);
1136          if (rrc == MATCH_KETRPOS)          if (rrc == MATCH_KETRPOS)
1137            {            {
1138            offset_top = md->end_offset_top;            offset_top = md->end_offset_top;
1139            eptr = md->end_match_ptr;            eptr = md->end_match_ptr;
1140            ecode = md->start_code + code_offset;            ecode = md->start_code + code_offset;
1141            save_capture_last = md->capture_last;            save_capture_last = md->capture_last;
1142            matched_once = TRUE;            matched_once = TRUE;
1143            continue;            continue;
1144            }            }
1145          if (rrc != MATCH_NOMATCH &&  
1146              (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* See comment in the code for capturing groups above about handling
1147            RRETURN(rrc);          THEN. */
1148    
1149            if (rrc == MATCH_THEN)
1150              {
1151              next = ecode + GET(ecode,1);
1152              if (md->start_match_ptr < next &&
1153                  (*ecode == OP_ALT || *next == OP_ALT))
1154                rrc = MATCH_NOMATCH;
1155              }
1156    
1157            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1158          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
1159          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
1160          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
1161          }          }
1162    
1163        if (!matched_once)        if (!matched_once)
1164          {          {
1165          md->offset_vector[offset] = save_offset1;          md->offset_vector[offset] = save_offset1;
1166          md->offset_vector[offset+1] = save_offset2;          md->offset_vector[offset+1] = save_offset2;
1167          md->offset_vector[md->offset_end - number] = save_offset3;          md->offset_vector[md->offset_end - number] = save_offset3;
1168          }          }
1169    
       if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;  
1170        if (allow_zero || matched_once)        if (allow_zero || matched_once)
1171          {          {
1172          ecode += 1 + LINK_SIZE;          ecode += 1 + LINK_SIZE;
1173          break;          break;
1174          }          }
1175    
1176        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1177        }        }
1178    
1179      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1180      as a non-capturing bracket. */      as a non-capturing bracket. */
1181    
# Line 996  for (;;) Line 1187  for (;;)
1187      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1188      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1189    
1190      /* Non-capturing possessive bracket with unlimited repeat. We come here      /* Non-capturing possessive bracket with unlimited repeat. We come here
1191      from BRAZERO with allow_zero = TRUE. The code is similar to the above,      from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1192      without the capturing complication. It is written out separately for speed      without the capturing complication. It is written out separately for speed
1193      and cleanliness. */      and cleanliness. */
1194    
1195      case OP_BRAPOS:      case OP_BRAPOS:
1196      case OP_SBRAPOS:      case OP_SBRAPOS:
1197      allow_zero = FALSE;      allow_zero = FALSE;
1198    
1199      POSSESSIVE_NON_CAPTURE:      POSSESSIVE_NON_CAPTURE:
1200      matched_once = FALSE;      matched_once = FALSE;
1201      code_offset = ecode - md->start_code;      code_offset = (int)(ecode - md->start_code);
1202    
1203      for (;;)      for (;;)
1204        {        {
1205        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1206        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1207          eptrb, RM48);          eptrb, RM48);
1208        if (rrc == MATCH_KETRPOS)        if (rrc == MATCH_KETRPOS)
1209          {          {
1210          offset_top = md->end_offset_top;          offset_top = md->end_offset_top;
1211          eptr = md->end_match_ptr;          eptr = md->end_match_ptr;
1212          ecode = md->start_code + code_offset;          ecode = md->start_code + code_offset;
1213          matched_once = TRUE;          matched_once = TRUE;
1214          continue;          continue;
1215          }          }
1216        if (rrc != MATCH_NOMATCH &&  
1217            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* See comment in the code for capturing groups above about handling
1218          RRETURN(rrc);        THEN. */
1219    
1220          if (rrc == MATCH_THEN)
1221            {
1222            next = ecode + GET(ecode,1);
1223            if (md->start_match_ptr < next &&
1224                (*ecode == OP_ALT || *next == OP_ALT))
1225              rrc = MATCH_NOMATCH;
1226            }
1227    
1228          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1229        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1230        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1231        }        }
1232    
1233      if (matched_once || allow_zero)      if (matched_once || allow_zero)
1234        {        {
1235        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1236        break;        break;
1237        }        }
1238      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
1239    
1240      /* Control never reaches here. */      /* Control never reaches here. */
# Line 1052  for (;;) Line 1253  for (;;)
1253    
1254      if (ecode[LINK_SIZE+1] == OP_CALLOUT)      if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1255        {        {
1256        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
1257          {          {
1258          pcre_callout_block cb;          PUBL(callout_block) cb;
1259          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1260          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1261          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1262    #ifdef COMPILE_PCRE8
1263          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
1264    #else
1265            cb.subject          = (PCRE_SPTR16)md->start_subject;
1266    #endif
1267          cb.subject_length   = (int)(md->end_subject - md->start_subject);          cb.subject_length   = (int)(md->end_subject - md->start_subject);
1268          cb.start_match      = (int)(mstart - md->start_subject);          cb.start_match      = (int)(mstart - md->start_subject);
1269          cb.current_position = (int)(eptr - md->start_subject);          cb.current_position = (int)(eptr - md->start_subject);
# Line 1067  for (;;) Line 1272  for (;;)
1272          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1273          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1274          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1275          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);          cb.mark             = md->nomatch_mark;
1276            if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1277          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1278          }          }
1279        ecode += _pcre_OP_lengths[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1280        }        }
1281    
1282      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1087  for (;;) Line 1293  for (;;)
1293        else        else
1294          {          {
1295          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1296          condition =  (recno == RREF_ANY || recno == md->recursive->group_num);          condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1297    
1298          /* If the test is for recursion into a specific subpattern, and it is          /* If the test is for recursion into a specific subpattern, and it is
1299          false, but the test was set up by name, scan the table to see if the          false, but the test was set up by name, scan the table to see if the
1300          name refers to any other numbers, and test them. The condition is true          name refers to any other numbers, and test them. The condition is true
1301          if any one is set. */          if any one is set. */
1302    
1303          if (!condition && condcode == OP_NRREF && recno != RREF_ANY)          if (!condition && condcode == OP_NRREF)
1304            {            {
1305            uschar *slotA = md->name_table;            pcre_uchar *slotA = md->name_table;
1306            for (i = 0; i < md->name_count; i++)            for (i = 0; i < md->name_count; i++)
1307              {              {
1308              if (GET2(slotA, 0) == recno) break;              if (GET2(slotA, 0) == recno) break;
# Line 1109  for (;;) Line 1315  for (;;)
1315    
1316            if (i < md->name_count)            if (i < md->name_count)
1317              {              {
1318              uschar *slotB = slotA;              pcre_uchar *slotB = slotA;
1319              while (slotB > md->name_table)              while (slotB > md->name_table)
1320                {                {
1321                slotB -= md->name_entry_size;                slotB -= md->name_entry_size;
1322                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1323                  {                  {
1324                  condition = GET2(slotB, 0) == md->recursive->group_num;                  condition = GET2(slotB, 0) == md->recursive->group_num;
1325                  if (condition) break;                  if (condition) break;
# Line 1129  for (;;) Line 1335  for (;;)
1335                for (i++; i < md->name_count; i++)                for (i++; i < md->name_count; i++)
1336                  {                  {
1337                  slotB += md->name_entry_size;                  slotB += md->name_entry_size;
1338                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                  if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1339                    {                    {
1340                    condition = GET2(slotB, 0) == md->recursive->group_num;                    condition = GET2(slotB, 0) == md->recursive->group_num;
1341                    if (condition) break;                    if (condition) break;
# Line 1142  for (;;) Line 1348  for (;;)
1348    
1349          /* Chose branch according to the condition */          /* Chose branch according to the condition */
1350    
1351          ecode += condition? 3 : GET(ecode, 1);          ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1352          }          }
1353        }        }
1354    
# Line 1159  for (;;) Line 1365  for (;;)
1365        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
1366          {          {
1367          int refno = offset >> 1;          int refno = offset >> 1;
1368          uschar *slotA = md->name_table;          pcre_uchar *slotA = md->name_table;
1369    
1370          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
1371            {            {
# Line 1173  for (;;) Line 1379  for (;;)
1379    
1380          if (i < md->name_count)          if (i < md->name_count)
1381            {            {
1382            uschar *slotB = slotA;            pcre_uchar *slotB = slotA;
1383            while (slotB > md->name_table)            while (slotB > md->name_table)
1384              {              {
1385              slotB -= md->name_entry_size;              slotB -= md->name_entry_size;
1386              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1387                {                {
1388                offset = GET2(slotB, 0) << 1;                offset = GET2(slotB, 0) << 1;
1389                condition = offset < offset_top &&                condition = offset < offset_top &&
# Line 1195  for (;;) Line 1401  for (;;)
1401              for (i++; i < md->name_count; i++)              for (i++; i < md->name_count; i++)
1402                {                {
1403                slotB += md->name_entry_size;                slotB += md->name_entry_size;
1404                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1405                  {                  {
1406                  offset = GET2(slotB, 0) << 1;                  offset = GET2(slotB, 0) << 1;
1407                  condition = offset < offset_top &&                  condition = offset < offset_top &&
# Line 1210  for (;;) Line 1416  for (;;)
1416    
1417        /* Chose branch according to the condition */        /* Chose branch according to the condition */
1418    
1419        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1420        }        }
1421    
1422      else if (condcode == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
# Line 1225  for (;;) Line 1431  for (;;)
1431    
1432      else      else
1433        {        {
1434        md->match_function_type = MATCH_CONDASSERT;        md->match_function_type = MATCH_CONDASSERT;
1435        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1436        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1437          {          {
1438            if (md->end_offset_top > offset_top)
1439              offset_top = md->end_offset_top;  /* Captures may have happened */
1440          condition = TRUE;          condition = TRUE;
1441          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1442          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1443          }          }
1444        else if (rrc != MATCH_NOMATCH &&  
1445                (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1446          assertion; it is therefore treated as NOMATCH. */
1447    
1448          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1449          {          {
1450          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1451          }          }
# Line 1245  for (;;) Line 1456  for (;;)
1456          }          }
1457        }        }
1458    
1459      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one, can
1460      we used to use tail recursion to avoid using another stack frame, except      use tail recursion to avoid using another stack frame, except when there is
1461      when there was unlimited repeat of a possibly empty group. However, that      unlimited repeat of a possibly empty group. In the latter case, a recursive
1462      strategy no longer works because of the possibilty of (*THEN) being      call to match() is always required, unless the second alternative doesn't
1463      encountered in the branch. A recursive call to match() is always required,      exist, in which case we can just plough on. Note that, for compatibility
1464      unless the second alternative doesn't exist, in which case we can just      with Perl, the | in a conditional group is NOT treated as creating two
1465      plough on. */      alternatives. If a THEN is encountered in the branch, it propagates out to
1466        the enclosing alternative (unless nested in a deeper set of alternatives,
1467        of course). */
1468    
1469      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1470        {        {
1471        if (op == OP_SCOND) md->match_function_type = MATCH_CBEGROUP;        if (op != OP_SCOND)
1472            {
1473            ecode += 1 + LINK_SIZE;
1474            goto TAIL_RECURSE;
1475            }
1476    
1477          md->match_function_type = MATCH_CBEGROUP;
1478        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
       if (rrc == MATCH_THEN && md->start_match_ptr == ecode)  
         rrc = MATCH_NOMATCH;  
1479        RRETURN(rrc);        RRETURN(rrc);
1480        }        }
1481      else                         /* Condition false & no alternative */  
1482         /* Condition false & no alternative; continue after the group. */
1483    
1484        else
1485        {        {
1486        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1487        }        }
# Line 1288  for (;;) Line 1508  for (;;)
1508        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1509        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1510        }        }
1511      ecode += 3;      ecode += 1 + IMM2_SIZE;
1512      break;      break;
1513    
1514    
1515      /* End of the pattern, either real or forced. If we are in a recursion, we      /* End of the pattern, either real or forced. */
     should restore the offsets appropriately, and if it's a top-level  
     recursion, continue from after the call. */  
1516    
     case OP_ACCEPT:  
     case OP_ASSERT_ACCEPT:  
1517      case OP_END:      case OP_END:
1518      if (md->recursive != NULL)      case OP_ACCEPT:
1519        {      case OP_ASSERT_ACCEPT:
       recursion_info *rec = md->recursive;  
       md->recursive = rec->prevrec;  
       memmove(md->offset_vector, rec->offset_save,  
         rec->saved_max * sizeof(int));  
       offset_top = rec->save_offset_top;  
       if (rec->group_num == 0)  
         {  
         ecode = rec->after_call;  
         break;  
         }  
       }  
1520    
1521      /* Otherwise, if we have matched an empty string, fail if not in an      /* If we have matched an empty string, fail if not in an assertion and not
1522      assertion and if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART      in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1523      is set and we have matched at the start of the subject. In both cases,      is set and we have matched at the start of the subject. In both cases,
1524      backtracking will then try other alternatives, if any. */      backtracking will then try other alternatives, if any. */
1525    
1526      else if (eptr == mstart && op != OP_ASSERT_ACCEPT &&      if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1527          (md->notempty ||           md->recursive == NULL &&
1528            (md->notempty_atstart &&           (md->notempty ||
1529              mstart == md->start_subject + md->start_offset)))             (md->notempty_atstart &&
1530        MRRETURN(MATCH_NOMATCH);               mstart == md->start_subject + md->start_offset)))
1531          RRETURN(MATCH_NOMATCH);
1532    
1533      /* Otherwise, we have a match. */      /* Otherwise, we have a match. */
1534    
1535      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
1536      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
1537      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1538    
1539      /* For some reason, the macros don't work properly if an expression is      /* For some reason, the macros don't work properly if an expression is
1540      given as the argument to MRRETURN when the heap is in use. */      given as the argument to RRETURN when the heap is in use. */
1541    
1542      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1543      MRRETURN(rrc);      RRETURN(rrc);
1544    
1545      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
1546      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
1547      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1548      start of each branch to move the current point backwards, so the code at      start of each branch to move the current point backwards, so the code at
1549      this level is identical to the lookahead case. When the assertion is part      this level is identical to the lookahead case. When the assertion is part
1550      of a condition, we want to return immediately afterwards. The caller of      of a condition, we want to return immediately afterwards. The caller of
1551      this incarnation of the match() function will have set MATCH_CONDASSERT in      this incarnation of the match() function will have set MATCH_CONDASSERT in
1552      md->match_function type, and one of these opcodes will be the first opcode      md->match_function type, and one of these opcodes will be the first opcode
1553      that is processed. We use a local variable that is preserved over calls to      that is processed. We use a local variable that is preserved over calls to
1554      match() to remember this case. */      match() to remember this case. */
1555    
1556      case OP_ASSERT:      case OP_ASSERT:
1557      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1558        save_mark = md->mark;
1559      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1560        {        {
1561        condassert = TRUE;        condassert = TRUE;
1562        md->match_function_type = 0;        md->match_function_type = 0;
1563        }        }
1564      else condassert = FALSE;      else condassert = FALSE;
1565    
1566      do      do
1567        {        {
1568        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
# Line 1364  for (;;) Line 1571  for (;;)
1571          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1572          break;          break;
1573          }          }
1574        if (rrc != MATCH_NOMATCH &&  
1575            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1576          RRETURN(rrc);        as NOMATCH. */
1577    
1578          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1579        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1580          md->mark = save_mark;
1581        }        }
1582      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1583    
1584      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1585    
1586      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1587    
# Line 1391  for (;;) Line 1601  for (;;)
1601    
1602      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1603      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1604        save_mark = md->mark;
1605      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1606        {        {
1607        condassert = TRUE;        condassert = TRUE;
1608        md->match_function_type = 0;        md->match_function_type = 0;
1609        }        }
1610      else condassert = FALSE;      else condassert = FALSE;
1611    
1612      do      do
1613        {        {
1614        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1615        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);        md->mark = save_mark;
1616          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1617        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1618          {          {
1619          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1620          break;          break;
1621          }          }
1622        if (rrc != MATCH_NOMATCH &&  
1623            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1624          RRETURN(rrc);        as NOMATCH. */
1625    
1626          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1627        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1628        }        }
1629      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1630    
1631      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1632    
1633      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1634      continue;      continue;
1635    
# Line 1425  for (;;) Line 1639  for (;;)
1639      back a number of characters, not bytes. */      back a number of characters, not bytes. */
1640    
1641      case OP_REVERSE:      case OP_REVERSE:
1642  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1643      if (utf8)      if (utf)
1644        {        {
1645        i = GET(ecode, 1);        i = GET(ecode, 1);
1646        while (i-- > 0)        while (i-- > 0)
1647          {          {
1648          eptr--;          eptr--;
1649          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1650          BACKCHAR(eptr);          BACKCHAR(eptr);
1651          }          }
1652        }        }
# Line 1443  for (;;) Line 1657  for (;;)
1657    
1658        {        {
1659        eptr -= GET(ecode, 1);        eptr -= GET(ecode, 1);
1660        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1661        }        }
1662    
1663      /* Save the earliest consulted character, then skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
# Line 1457  for (;;) Line 1671  for (;;)
1671      function is able to force a failure. */      function is able to force a failure. */
1672    
1673      case OP_CALLOUT:      case OP_CALLOUT:
1674      if (pcre_callout != NULL)      if (PUBL(callout) != NULL)
1675        {        {
1676        pcre_callout_block cb;        PUBL(callout_block) cb;
1677        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1678        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1679        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1680    #ifdef COMPILE_PCRE8
1681        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1682    #else
1683          cb.subject          = (PCRE_SPTR16)md->start_subject;
1684    #endif
1685        cb.subject_length   = (int)(md->end_subject - md->start_subject);        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1686        cb.start_match      = (int)(mstart - md->start_subject);        cb.start_match      = (int)(mstart - md->start_subject);
1687        cb.current_position = (int)(eptr - md->start_subject);        cb.current_position = (int)(eptr - md->start_subject);
# Line 1472  for (;;) Line 1690  for (;;)
1690        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1691        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1692        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1693        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);        cb.mark             = md->nomatch_mark;
1694          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1695        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1696        }        }
1697      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 1482  for (;;) Line 1701  for (;;)
1701      offset data is the offset to the starting bracket from the start of the      offset data is the offset to the starting bracket from the start of the
1702      whole pattern. (This is so that it works from duplicated subpatterns.)      whole pattern. (This is so that it works from duplicated subpatterns.)
1703    
1704      If there are any capturing brackets started but not finished, we have to      The state of the capturing groups is preserved over recursion, and
1705      save their starting points and reinstate them after the recursion. However,      re-instated afterwards. We don't know how many are started and not yet
1706      we don't know how many such there are (offset_top records the completed      finished (offset_top records the completed total) so we just have to save
1707      total) so we just have to save all the potential data. There may be up to      all the potential data. There may be up to 65535 such values, which is too
1708      65535 such values, which is too large to put on the stack, but using malloc      large to put on the stack, but using malloc for small numbers seems
1709      for small numbers seems expensive. As a compromise, the stack is used when      expensive. As a compromise, the stack is used when there are no more than
1710      there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc      REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
     is used.  
1711    
1712      There are also other values that have to be saved. We use a chained      There are also other values that have to be saved. We use a chained
1713      sequence of blocks that actually live on the stack. Thanks to Robin Houston      sequence of blocks that actually live on the stack. Thanks to Robin Houston
1714      for the original version of this logic. */      for the original version of this logic. It has, however, been hacked around
1715        a lot, so he is not to blame for the current way it works. */
1716    
1717      case OP_RECURSE:      case OP_RECURSE:
1718        {        {
1719          recursion_info *ri;
1720          int recno;
1721    
1722        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1723        new_recursive.group_num = (callpat == md->start_code)? 0 :        recno = (callpat == md->start_code)? 0 :
1724          GET2(callpat, 1 + LINK_SIZE);          GET2(callpat, 1 + LINK_SIZE);
1725    
1726          /* Check for repeating a recursion without advancing the subject pointer.
1727          This should catch convoluted mutual recursions. (Some simple cases are
1728          caught at compile time.) */
1729    
1730          for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1731            if (recno == ri->group_num && eptr == ri->subject_position)
1732              RRETURN(PCRE_ERROR_RECURSELOOP);
1733    
1734        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1735    
1736          new_recursive.group_num = recno;
1737          new_recursive.subject_position = eptr;
1738        new_recursive.prevrec = md->recursive;        new_recursive.prevrec = md->recursive;
1739        md->recursive = &new_recursive;        md->recursive = &new_recursive;
1740    
1741        /* Find where to continue from afterwards */        /* Where to continue from afterwards */
1742    
1743        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
       new_recursive.after_call = ecode;  
1744    
1745        /* Now save the offset data. */        /* Now save the offset data */
1746    
1747        new_recursive.saved_max = md->offset_end;        new_recursive.saved_max = md->offset_end;
1748        if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)        if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
# Line 1519  for (;;) Line 1750  for (;;)
1750        else        else
1751          {          {
1752          new_recursive.offset_save =          new_recursive.offset_save =
1753            (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));            (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1754          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1755          }          }
   
1756        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1757              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1758        new_recursive.save_offset_top = offset_top;  
1759          /* OK, now we can do the recursion. After processing each alternative,
1760        /* OK, now we can do the recursion. For each top-level alternative we        restore the offset data. If there were nested recursions, md->recursive
1761        restore the offset and recursion data. */        might be changed, so reset it before looping. */
1762    
1763        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1764        cbegroup = (*callpat >= OP_SBRA);        cbegroup = (*callpat >= OP_SBRA);
1765        do        do
1766          {          {
1767          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1768          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1769            md, eptrb, RM6);            md, eptrb, RM6);
1770            memcpy(md->offset_vector, new_recursive.offset_save,
1771                new_recursive.saved_max * sizeof(int));
1772            md->recursive = new_recursive.prevrec;
1773          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1774            {            {
1775            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
           md->recursive = new_recursive.prevrec;  
1776            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1777              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1778            MRRETURN(MATCH_MATCH);  
1779              /* Set where we got to in the subject, and reset the start in case
1780              it was changed by \K. This *is* propagated back out of a recursion,
1781              for Perl compatibility. */
1782    
1783              eptr = md->end_match_ptr;
1784              mstart = md->start_match_ptr;
1785              goto RECURSION_MATCHED;        /* Exit loop; end processing */
1786            }            }
1787          else if (rrc != MATCH_NOMATCH &&  
1788                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* PCRE does not allow THEN to escape beyond a recursion; it is treated
1789            as NOMATCH. */
1790    
1791            else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1792            {            {
1793            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1794            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1795              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1796            RRETURN(rrc);            RRETURN(rrc);
1797            }            }
1798    
1799          md->recursive = &new_recursive;          md->recursive = &new_recursive;
         memcpy(md->offset_vector, new_recursive.offset_save,  
             new_recursive.saved_max * sizeof(int));  
1800          callpat += GET(callpat, 1);          callpat += GET(callpat, 1);
1801          }          }
1802        while (*callpat == OP_ALT);        while (*callpat == OP_ALT);
# Line 1564  for (;;) Line 1804  for (;;)
1804        DPRINTF(("Recursion didn't match\n"));        DPRINTF(("Recursion didn't match\n"));
1805        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1806        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1807          (pcre_free)(new_recursive.offset_save);          (PUBL(free))(new_recursive.offset_save);
1808        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
       }  
     /* Control never reaches here */  
   
     /* "Once" brackets are like assertion brackets except that after a match,  
     the point in the subject string is not moved back. Thus there can never be  
     a move back into the brackets. Friedl calls these "atomic" subpatterns.  
     Check the alternative branches in turn - the matching won't pass the KET  
     for this kind of subpattern. If any one branch matches, we carry on as at  
     the end of a normal bracket, leaving the subject pointer, but resetting  
     the start-of-match value in case it was changed by \K. */  
   
     case OP_ONCE:  
     prev = ecode;  
     saved_eptr = eptr;  
   
     do  
       {  
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);  
       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */  
         {  
         mstart = md->start_match_ptr;  
         break;  
         }  
       if (rrc != MATCH_NOMATCH &&  
           (rrc != MATCH_THEN || md->start_match_ptr != ecode))  
         RRETURN(rrc);  
       ecode += GET(ecode,1);  
       }  
     while (*ecode == OP_ALT);  
   
     /* If hit the end of the group (which could be repeated), fail */  
   
     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);  
   
     /* Continue after the group, updating the offsets high water mark, since  
     extracts may have been taken. */  
   
     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);  
   
     offset_top = md->end_offset_top;  
     eptr = md->end_match_ptr;  
   
     /* For a non-repeating ket, just continue at this level. This also  
     happens for a repeating ket if no characters were matched in the group.  
     This is the forcible breaking of infinite loops as implemented in Perl  
     5.005. */  
   
     if (*ecode == OP_KET || eptr == saved_eptr)  
       {  
       ecode += 1+LINK_SIZE;  
       break;  
       }  
   
     /* The repeating kets try the rest of the pattern or restart from the  
     preceding bracket, in the appropriate order. The second "call" of match()  
     uses tail recursion, to avoid using another stack frame. */  
   
     if (*ecode == OP_KETRMIN)  
       {  
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM8);  
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
       ecode = prev;  
       }  
     else  /* OP_KETRMAX */  
       {  
       md->match_function_type = MATCH_CBEGROUP;  
       RMATCH(eptr, prev, offset_top, md, eptrb, RM9);  
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
       ecode += 1 + LINK_SIZE;  
1809        }        }
     goto TAIL_RECURSE;  
1810    
1811      /* Control never gets here */      RECURSION_MATCHED:
1812        break;
1813    
1814      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1815      bracketed group and go to there. */      bracketed group and go to there. */
# Line 1652  for (;;) Line 1823  for (;;)
1823      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1824      with fixed upper repeat limits are compiled as a number of copies, with the      with fixed upper repeat limits are compiled as a number of copies, with the
1825      optional ones preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1826    
1827      case OP_BRAZERO:      case OP_BRAZERO:
1828      next = ecode + 1;      next = ecode + 1;
1829      RMATCH(eptr, next, offset_top, md, eptrb, RM10);      RMATCH(eptr, next, offset_top, md, eptrb, RM10);
# Line 1660  for (;;) Line 1831  for (;;)
1831      do next += GET(next, 1); while (*next == OP_ALT);      do next += GET(next, 1); while (*next == OP_ALT);
1832      ecode = next + 1 + LINK_SIZE;      ecode = next + 1 + LINK_SIZE;
1833      break;      break;
1834    
1835      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1836      next = ecode + 1;      next = ecode + 1;
1837      do next += GET(next, 1); while (*next == OP_ALT);      do next += GET(next, 1); while (*next == OP_ALT);
# Line 1674  for (;;) Line 1845  for (;;)
1845      do next += GET(next,1); while (*next == OP_ALT);      do next += GET(next,1); while (*next == OP_ALT);
1846      ecode = next + 1 + LINK_SIZE;      ecode = next + 1 + LINK_SIZE;
1847      break;      break;
1848    
1849      /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything      /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1850      here; just jump to the group, with allow_zero set TRUE. */      here; just jump to the group, with allow_zero set TRUE. */
1851    
1852      case OP_BRAPOSZERO:      case OP_BRAPOSZERO:
1853      op = *(++ecode);      op = *(++ecode);
1854      allow_zero = TRUE;      allow_zero = TRUE;
1855      if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;      if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1856        goto POSSESSIVE_NON_CAPTURE;        goto POSSESSIVE_NON_CAPTURE;
# Line 1689  for (;;) Line 1860  for (;;)
1860      case OP_KET:      case OP_KET:
1861      case OP_KETRMIN:      case OP_KETRMIN:
1862      case OP_KETRMAX:      case OP_KETRMAX:
1863      case OP_KETRPOS:      case OP_KETRPOS:
1864      prev = ecode - GET(ecode, 1);      prev = ecode - GET(ecode, 1);
1865    
1866      /* If this was a group that remembered the subject start, in order to break      /* If this was a group that remembered the subject start, in order to break
1867      infinite repeats of empty string matches, retrieve the subject start from      infinite repeats of empty string matches, retrieve the subject start from
1868      the chain. Otherwise, set it NULL. */      the chain. Otherwise, set it NULL. */
1869    
1870      if (*prev >= OP_SBRA)      if (*prev >= OP_SBRA || *prev == OP_ONCE)
1871        {        {
1872        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1873        eptrb = eptrb->epb_prev;              /* Backup to previous group */        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1874        }        }
1875      else saved_eptr = NULL;      else saved_eptr = NULL;
1876    
1877      /* If we are at the end of an assertion group or an atomic group, stop      /* If we are at the end of an assertion group or a non-capturing atomic
1878      matching and return MATCH_MATCH, but record the current high water mark for      group, stop matching and return MATCH_MATCH, but record the current high
1879      use by positive assertions. We also need to record the match start in case      water mark for use by positive assertions. We also need to record the match
1880      it was changed by \K. */      start in case it was changed by \K. */
1881    
1882      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1883          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||           *prev == OP_ONCE_NC)
         *prev == OP_ONCE)  
1884        {        {
1885        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE_NC */
1886        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1887        md->start_match_ptr = mstart;        md->start_match_ptr = mstart;
1888        MRRETURN(MATCH_MATCH);        RRETURN(MATCH_MATCH);         /* Sets md->mark */
1889        }        }
1890    
1891      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
1892      and if necessary complete handling an extraction by setting the offsets and      and if necessary complete handling an extraction by setting the offsets and
1893      bumping the high water mark. Note that whole-pattern recursion is coded as      bumping the high water mark. Whole-pattern recursion is coded as a recurse
1894      a recurse into group 0, so it won't be picked up here. Instead, we catch it      into group 0, so it won't be picked up here. Instead, we catch it when the
1895      when the OP_END is reached. Other recursion is handled here. */      OP_END is reached. Other recursion is handled here. We just have to record
1896        the current subject position and start match pointer and give a MATCH
1897        return. */
1898    
1899      if (*prev == OP_CBRA || *prev == OP_SCBRA ||      if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1900          *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)          *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
# Line 1735  for (;;) Line 1907  for (;;)
1907        printf("\n");        printf("\n");
1908  #endif  #endif
1909    
1910          /* Handle a recursively called group. */
1911    
1912          if (md->recursive != NULL && md->recursive->group_num == number)
1913            {
1914            md->end_match_ptr = eptr;
1915            md->start_match_ptr = mstart;
1916            RRETURN(MATCH_MATCH);
1917            }
1918    
1919          /* Deal with capturing */
1920    
1921        md->capture_last = number;        md->capture_last = number;
1922        if (offset >= md->offset_max) md->offset_overflow = TRUE; else        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1923          {          {
1924          /* If offset is greater than offset_top, it means that we are          /* If offset is greater than offset_top, it means that we are
1925          "skipping" a capturing group, and that group's offsets must be marked          "skipping" a capturing group, and that group's offsets must be marked
1926          unset. In earlier versions of PCRE, all the offsets were unset at the          unset. In earlier versions of PCRE, all the offsets were unset at the
1927          start of matching, but this doesn't work because atomic groups and          start of matching, but this doesn't work because atomic groups and
1928          assertions can cause a value to be set that should later be unset.          assertions can cause a value to be set that should later be unset.
1929          Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as          Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1930          part of the atomic group, but this is not on the final matching path,          part of the atomic group, but this is not on the final matching path,
1931          so must be unset when 2 is set. (If there is no group 2, there is no          so must be unset when 2 is set. (If there is no group 2, there is no
1932          problem, because offset_top will then be 2, indicating no capture.) */          problem, because offset_top will then be 2, indicating no capture.) */
1933    
1934          if (offset > offset_top)          if (offset > offset_top)
1935            {            {
1936            register int *iptr = md->offset_vector + offset_top;            register int *iptr = md->offset_vector + offset_top;
1937            register int *iend = md->offset_vector + offset;            register int *iend = md->offset_vector + offset;
1938            while (iptr < iend) *iptr++ = -1;            while (iptr < iend) *iptr++ = -1;
1939            }            }
1940    
1941          /* Now make the extraction */          /* Now make the extraction */
1942    
1943          md->offset_vector[offset] =          md->offset_vector[offset] =
# Line 1762  for (;;) Line 1945  for (;;)
1945          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1946          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1947          }          }
   
       /* Handle a recursively called group. Restore the offsets  
       appropriately and continue from after the call. */  
   
       if (md->recursive != NULL && md->recursive->group_num == number)  
         {  
         recursion_info *rec = md->recursive;  
         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));  
         md->recursive = rec->prevrec;  
         memcpy(md->offset_vector, rec->offset_save,  
           rec->saved_max * sizeof(int));  
         offset_top = rec->save_offset_top;  
         ecode = rec->after_call;  
         break;  
         }  
1948        }        }
1949    
1950      /* For a non-repeating ket, just continue at this level. This also      /* For an ordinary non-repeating ket, just continue at this level. This
1951      happens for a repeating ket if no characters were matched in the group.      also happens for a repeating ket if no characters were matched in the
1952      This is the forcible breaking of infinite loops as implemented in Perl      group. This is the forcible breaking of infinite loops as implemented in
1953      5.005. If there is an options reset, it will get obeyed in the normal      Perl 5.005. For a non-repeating atomic group that includes captures,
1954      course of events. */      establish a backup point by processing the rest of the pattern at a lower
1955        level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1956        original OP_ONCE level, thereby bypassing intermediate backup points, but
1957        resetting any captures that happened along the way. */
1958    
1959      if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1960        {        {
1961        ecode += 1 + LINK_SIZE;        if (*prev == OP_ONCE)
1962            {
1963            RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
1964            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1965            md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
1966            RRETURN(MATCH_ONCE);
1967            }
1968          ecode += 1 + LINK_SIZE;    /* Carry on at this level */
1969        break;        break;
1970        }        }
1971    
1972      /* OP_KETRPOS is a possessive repeating ket. Remember the current position,      /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
1973      and return the MATCH_KETRPOS. This makes it possible to do the repeats one      and return the MATCH_KETRPOS. This makes it possible to do the repeats one
1974      at a time from the outer level, thus saving stack. */      at a time from the outer level, thus saving stack. */
1975    
1976      if (*ecode == OP_KETRPOS)      if (*ecode == OP_KETRPOS)
1977        {        {
1978        md->end_match_ptr = eptr;        md->end_match_ptr = eptr;
1979        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1980        RRETURN(MATCH_KETRPOS);        RRETURN(MATCH_KETRPOS);
1981        }        }
1982    
1983      /* The normal repeating kets try the rest of the pattern or restart from      /* The normal repeating kets try the rest of the pattern or restart from
1984      the preceding bracket, in the appropriate order. In the second case, we can      the preceding bracket, in the appropriate order. In the second case, we can
1985      use tail recursion to avoid using another stack frame, unless we have an      use tail recursion to avoid using another stack frame, unless we have an
1986      unlimited repeat of a group that can match an empty string. */      an atomic group or an unlimited repeat of a group that can match an empty
1987        string. */
1988    
1989      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1990        {        {
1991        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
1992        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1993          if (*prev == OP_ONCE)
1994            {
1995            RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
1996            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1997            md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
1998            RRETURN(MATCH_ONCE);
1999            }
2000        if (*prev >= OP_SBRA)    /* Could match an empty string */        if (*prev >= OP_SBRA)    /* Could match an empty string */
2001          {          {
2002          md->match_function_type = MATCH_CBEGROUP;          md->match_function_type = MATCH_CBEGROUP;
2003          RMATCH(eptr, prev, offset_top, md, eptrb, RM50);          RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2004          RRETURN(rrc);          RRETURN(rrc);
2005          }          }
# Line 1822  for (;;) Line 2008  for (;;)
2008        }        }
2009      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
2010        {        {
2011        if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
2012        RMATCH(eptr, prev, offset_top, md, eptrb, RM13);        RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2013          if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2014        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2015          if (*prev == OP_ONCE)
2016            {
2017            RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2018            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2019            md->once_target = prev;
2020            RRETURN(MATCH_ONCE);
2021            }
2022        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
2023        goto TAIL_RECURSE;        goto TAIL_RECURSE;
2024        }        }
# Line 1833  for (;;) Line 2027  for (;;)
2027      /* Not multiline mode: start of subject assertion, unless notbol. */      /* Not multiline mode: start of subject assertion, unless notbol. */
2028    
2029      case OP_CIRC:      case OP_CIRC:
2030      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2031    
2032      /* Start of subject assertion */      /* Start of subject assertion */
2033    
2034      case OP_SOD:      case OP_SOD:
2035      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2036      ecode++;      ecode++;
2037      break;      break;
2038    
2039      /* Multiline mode: start of subject unless notbol, or after any newline. */      /* Multiline mode: start of subject unless notbol, or after any newline. */
2040    
2041      case OP_CIRCM:      case OP_CIRCM:
2042      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2043      if (eptr != md->start_subject &&      if (eptr != md->start_subject &&
2044          (eptr == md->end_subject || !WAS_NEWLINE(eptr)))          (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2045        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2046      ecode++;      ecode++;
2047      break;      break;
2048    
2049      /* Start of match assertion */      /* Start of match assertion */
2050    
2051      case OP_SOM:      case OP_SOM:
2052      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2053      ecode++;      ecode++;
2054      break;      break;
2055    
# Line 1871  for (;;) Line 2065  for (;;)
2065    
2066      case OP_DOLLM:      case OP_DOLLM:
2067      if (eptr < md->end_subject)      if (eptr < md->end_subject)
2068        { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }        {
2069          if (!IS_NEWLINE(eptr))
2070            {
2071            if (md->partial != 0 &&
2072                eptr + 1 >= md->end_subject &&
2073                NLBLOCK->nltype == NLTYPE_FIXED &&
2074                NLBLOCK->nllen == 2 &&
2075                *eptr == NLBLOCK->nl[0])
2076              {
2077              md->hitend = TRUE;
2078              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2079              }
2080            RRETURN(MATCH_NOMATCH);
2081            }
2082          }
2083      else      else
2084        {        {
2085        if (md->noteol) MRRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
2086        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2087        }        }
2088      ecode++;      ecode++;
2089      break;      break;
2090    
2091      /* Not multiline mode: assert before a terminating newline or before end of      /* Not multiline mode: assert before a terminating newline or before end of
2092      subject unless noteol is set. */      subject unless noteol is set. */
2093    
2094      case OP_DOLL:      case OP_DOLL:
2095      if (md->noteol) MRRETURN(MATCH_NOMATCH);      if (md->noteol) RRETURN(MATCH_NOMATCH);
2096      if (!md->endonly) goto ASSERT_NL_OR_EOS;      if (!md->endonly) goto ASSERT_NL_OR_EOS;
2097    
2098      /* ... else fall through for endonly */      /* ... else fall through for endonly */
# Line 1892  for (;;) Line 2100  for (;;)
2100      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
2101    
2102      case OP_EOD:      case OP_EOD:
2103      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2104      SCHECK_PARTIAL();      SCHECK_PARTIAL();
2105      ecode++;      ecode++;
2106      break;      break;
# Line 1903  for (;;) Line 2111  for (;;)
2111      ASSERT_NL_OR_EOS:      ASSERT_NL_OR_EOS:
2112      if (eptr < md->end_subject &&      if (eptr < md->end_subject &&
2113          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2114        MRRETURN(MATCH_NOMATCH);        {
2115          if (md->partial != 0 &&
2116              eptr + 1 >= md->end_subject &&
2117              NLBLOCK->nltype == NLTYPE_FIXED &&
2118              NLBLOCK->nllen == 2 &&
2119              *eptr == NLBLOCK->nl[0])
2120            {
2121            md->hitend = TRUE;
2122            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2123            }
2124          RRETURN(MATCH_NOMATCH);
2125          }
2126    
2127      /* Either at end of string or \n before end. */      /* Either at end of string or \n before end. */
2128    
# Line 1922  for (;;) Line 2141  for (;;)
2141        be "non-word" characters. Remember the earliest consulted character for        be "non-word" characters. Remember the earliest consulted character for
2142        partial matching. */        partial matching. */
2143    
2144  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2145        if (utf8)        if (utf)
2146          {          {
2147          /* Get status of previous character */          /* Get status of previous character */
2148    
2149          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
2150            {            {
2151            USPTR lastptr = eptr - 1;            PCRE_PUCHAR lastptr = eptr - 1;
2152            while((*lastptr & 0xc0) == 0x80) lastptr--;            BACKCHAR(lastptr);
2153            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2154            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
2155  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 1995  for (;;) Line 2214  for (;;)
2214              }              }
2215            else            else
2216  #endif  #endif
2217            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = MAX_255(eptr[-1])
2218                && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2219            }            }
2220    
2221          /* Get status of next character */          /* Get status of next character */
# Line 2018  for (;;) Line 2238  for (;;)
2238            }            }
2239          else          else
2240  #endif  #endif
2241          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          cur_is_word = MAX_255(*eptr)
2242              && ((md->ctypes[*eptr] & ctype_word) != 0);
2243          }          }
2244    
2245        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
2246    
2247        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
2248             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2249          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2250        }        }
2251      break;      break;
2252    
2253      /* Match a single character type; inline for speed */      /* Match any single character type except newline; have to take care with
2254        CRLF newlines and partial matching. */
2255    
2256      case OP_ANY:      case OP_ANY:
2257      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2258        if (md->partial != 0 &&
2259            eptr + 1 >= md->end_subject &&
2260            NLBLOCK->nltype == NLTYPE_FIXED &&
2261            NLBLOCK->nllen == 2 &&
2262            *eptr == NLBLOCK->nl[0])
2263          {
2264          md->hitend = TRUE;
2265          if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2266          }
2267    
2268      /* Fall through */      /* Fall through */
2269    
2270        /* Match any single character whatsoever. */
2271    
2272      case OP_ALLANY:      case OP_ALLANY:
2273      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2274        {        {                            /* not be updated before SCHECK_PARTIAL. */
2275        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2276        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2277        }        }
2278      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;      eptr++;
2279    #ifdef SUPPORT_UTF
2280        if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2281    #endif
2282      ecode++;      ecode++;
2283      break;      break;
2284    
# Line 2049  for (;;) Line 2286  for (;;)
2286      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
2287    
2288      case OP_ANYBYTE:      case OP_ANYBYTE:
2289      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2290        {        {                            /* not be updated before SCHECK_PARTIAL. */
2291        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2292        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2293        }        }
2294        eptr++;
2295      ecode++;      ecode++;
2296      break;      break;
2297    
# Line 2061  for (;;) Line 2299  for (;;)
2299      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2300        {        {
2301        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2302        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2303        }        }
2304      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2305      if (      if (
2306  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2307         c < 256 &&         c < 256 &&
2308  #endif  #endif
2309         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
2310         )         )
2311        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2312      ecode++;      ecode++;
2313      break;      break;
2314    
# Line 2078  for (;;) Line 2316  for (;;)
2316      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2317        {        {
2318        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2319        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2320        }        }
2321      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2322      if (      if (
2323  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2324         c >= 256 ||         c > 255 ||
2325  #endif  #endif
2326         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
2327         )         )
2328        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2329      ecode++;      ecode++;
2330      break;      break;
2331    
# Line 2095  for (;;) Line 2333  for (;;)
2333      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2334        {        {
2335        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2336        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2337        }        }
2338      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2339      if (      if (
2340  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2341         c < 256 &&         c < 256 &&
2342  #endif  #endif
2343         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
2344         )         )
2345        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2346      ecode++;      ecode++;
2347      break;      break;
2348    
# Line 2112  for (;;) Line 2350  for (;;)
2350      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2351        {        {
2352        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2353        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2354        }        }
2355      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2356      if (      if (
2357  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2358         c >= 256 ||         c > 255 ||
2359  #endif  #endif
2360         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
2361         )         )
2362        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2363      ecode++;      ecode++;
2364      break;      break;
2365    
# Line 2129  for (;;) Line 2367  for (;;)
2367      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2368        {        {
2369        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2370        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2371        }        }
2372      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2373      if (      if (
2374  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2375         c < 256 &&         c < 256 &&
2376  #endif  #endif
2377         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
2378         )         )
2379        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2380      ecode++;      ecode++;
2381      break;      break;
2382    
# Line 2146  for (;;) Line 2384  for (;;)
2384      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2385        {        {
2386        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2387        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2388        }        }
2389      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2390      if (      if (
2391  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2392         c >= 256 ||         c > 255 ||
2393  #endif  #endif
2394         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
2395         )         )
2396        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2397      ecode++;      ecode++;
2398      break;      break;
2399    
# Line 2163  for (;;) Line 2401  for (;;)
2401      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2402        {        {
2403        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2404        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2405        }        }
2406      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2407      switch(c)      switch(c)
2408        {        {
2409        default: MRRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2410    
2411        case 0x000d:        case 0x000d:
2412        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr >= md->end_subject)
2413            {
2414            SCHECK_PARTIAL();
2415            }
2416          else if (*eptr == 0x0a) eptr++;
2417        break;        break;
2418    
2419        case 0x000a:        case 0x000a:
# Line 2182  for (;;) Line 2424  for (;;)
2424        case 0x0085:        case 0x0085:
2425        case 0x2028:        case 0x2028:
2426        case 0x2029:        case 0x2029:
2427        if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);        if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2428        break;        break;
2429        }        }
2430      ecode++;      ecode++;
# Line 2192  for (;;) Line 2434  for (;;)
2434      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2435        {        {
2436        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2437        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2438        }        }
2439      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2440      switch(c)      switch(c)
# Line 2217  for (;;) Line 2459  for (;;)
2459        case 0x202f:    /* NARROW NO-BREAK SPACE */        case 0x202f:    /* NARROW NO-BREAK SPACE */
2460        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2461        case 0x3000:    /* IDEOGRAPHIC SPACE */        case 0x3000:    /* IDEOGRAPHIC SPACE */
2462        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2463        }        }
2464      ecode++;      ecode++;
2465      break;      break;
# Line 2226  for (;;) Line 2468  for (;;)
2468      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2469        {        {
2470        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2471        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2472        }        }
2473      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2474      switch(c)      switch(c)
2475        {        {
2476        default: MRRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2477        case 0x09:      /* HT */        case 0x09:      /* HT */
2478        case 0x20:      /* SPACE */        case 0x20:      /* SPACE */
2479        case 0xa0:      /* NBSP */        case 0xa0:      /* NBSP */
# Line 2260  for (;;) Line 2502  for (;;)
2502      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2503        {        {
2504        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2505        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2506        }        }
2507      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2508      switch(c)      switch(c)
# Line 2273  for (;;) Line 2515  for (;;)
2515        case 0x85:      /* NEL */        case 0x85:      /* NEL */
2516        case 0x2028:    /* LINE SEPARATOR */        case 0x2028:    /* LINE SEPARATOR */
2517        case 0x2029:    /* PARAGRAPH SEPARATOR */        case 0x2029:    /* PARAGRAPH SEPARATOR */
2518        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2519        }        }
2520      ecode++;      ecode++;
2521      break;      break;
# Line 2282  for (;;) Line 2524  for (;;)
2524      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2525        {        {
2526        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2527        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2528        }        }
2529      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2530      switch(c)      switch(c)
2531        {        {
2532        default: MRRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2533        case 0x0a:      /* LF */        case 0x0a:      /* LF */
2534        case 0x0b:      /* VT */        case 0x0b:      /* VT */
2535        case 0x0c:      /* FF */        case 0x0c:      /* FF */
# Line 2309  for (;;) Line 2551  for (;;)
2551      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2552        {        {
2553        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2554        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2555        }        }
2556      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2557        {        {
# Line 2318  for (;;) Line 2560  for (;;)
2560        switch(ecode[1])        switch(ecode[1])
2561          {          {
2562          case PT_ANY:          case PT_ANY:
2563          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2564          break;          break;
2565    
2566          case PT_LAMP:          case PT_LAMP:
2567          if ((prop->chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2568               prop->chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2569               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2570            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2571          break;          break;
2572    
2573          case PT_GC:          case PT_GC:
2574          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2575            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2576          break;          break;
2577    
2578          case PT_PC:          case PT_PC:
2579          if ((ecode[2] != prop->chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2580            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2581          break;          break;
2582    
2583          case PT_SC:          case PT_SC:
2584          if ((ecode[2] != prop->script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2585            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2586          break;          break;
2587    
2588          /* These are specials */          /* These are specials */
2589    
2590          case PT_ALNUM:          case PT_ALNUM:
2591          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2592               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2593            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2594          break;          break;
2595    
2596          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2597          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2598               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2599                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2600            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2601          break;          break;
2602    
2603          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2604          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2605               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2606               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2607                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2608            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2609          break;          break;
2610    
2611          case PT_WORD:          case PT_WORD:
2612          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2613               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2614               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2615            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2616          break;          break;
2617    
2618          /* This should never occur */          /* This should never occur */
# Line 2390  for (;;) Line 2632  for (;;)
2632      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2633        {        {
2634        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2635        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2636        }        }
2637      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2638        if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
2639        while (eptr < md->end_subject)
2640        {        {
2641        int category = UCD_CATEGORY(c);        int len = 1;
2642        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);        if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2643        while (eptr < md->end_subject)        if (UCD_CATEGORY(c) != ucp_M) break;
2644          {        eptr += len;
         int len = 1;  
         if (!utf8) c = *eptr; else  
           {  
           GETCHARLEN(c, eptr, len);  
           }  
         category = UCD_CATEGORY(c);  
         if (category != ucp_M) break;  
         eptr += len;  
         }  
2645        }        }
2646        CHECK_PARTIAL();
2647      ecode++;      ecode++;
2648      break;      break;
2649  #endif  #endif
# Line 2422  for (;;) Line 2658  for (;;)
2658      loops). */      loops). */
2659    
2660      case OP_REF:      case OP_REF:
2661      case OP_REFI:      case OP_REFI:
2662      caseless = op == OP_REFI;      caseless = op == OP_REFI;
2663      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2664      ecode += 3;      ecode += 1 + IMM2_SIZE;
2665    
2666      /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2667    
# Line 2465  for (;;) Line 2701  for (;;)
2701        case OP_CRMINRANGE:        case OP_CRMINRANGE:
2702        minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2703        min = GET2(ecode, 1);        min = GET2(ecode, 1);
2704        max = GET2(ecode, 3);        max = GET2(ecode, 1 + IMM2_SIZE);
2705        if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2706        ecode += 5;        ecode += 1 + 2 * IMM2_SIZE;
2707        break;        break;
2708    
2709        default:               /* No repeat follows */        default:               /* No repeat follows */
2710        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2711          {          {
2712            if (length == -2) eptr = md->end_subject;   /* Partial match */
2713          CHECK_PARTIAL();          CHECK_PARTIAL();
2714          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2715          }          }
2716        eptr += length;        eptr += length;
2717        continue;              /* With the main loop */        continue;              /* With the main loop */
2718        }        }
2719    
2720      /* Handle repeated back references. If the length of the reference is      /* Handle repeated back references. If the length of the reference is
2721      zero, just continue with the main loop. */      zero, just continue with the main loop. If the length is negative, it
2722        means the reference is unset in non-Java-compatible mode. If the minimum is
2723        zero, we can continue at the same level without recursion. For any other
2724        minimum, carrying on will result in NOMATCH. */
2725    
2726      if (length == 0) continue;      if (length == 0) continue;
2727        if (length < 0 && min == 0) continue;
2728    
2729      /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2730      the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
# Line 2491  for (;;) Line 2732  for (;;)
2732    
2733      for (i = 1; i <= min; i++)      for (i = 1; i <= min; i++)
2734        {        {
2735        int slength;        int slength;
2736        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2737          {          {
2738            if (slength == -2) eptr = md->end_subject;   /* Partial match */
2739          CHECK_PARTIAL();          CHECK_PARTIAL();
2740          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2741          }          }
2742        eptr += slength;        eptr += slength;
2743        }        }
# Line 2511  for (;;) Line 2753  for (;;)
2753        {        {
2754        for (fi = min;; fi++)        for (fi = min;; fi++)
2755          {          {
2756          int slength;          int slength;
2757          RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2758          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2759          if (fi >= max) MRRETURN(MATCH_NOMATCH);          if (fi >= max) RRETURN(MATCH_NOMATCH);
2760          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2761            {            {
2762              if (slength == -2) eptr = md->end_subject;   /* Partial match */
2763            CHECK_PARTIAL();            CHECK_PARTIAL();
2764            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2765            }            }
2766          eptr += slength;          eptr += slength;
2767          }          }
# Line 2532  for (;;) Line 2775  for (;;)
2775        pp = eptr;        pp = eptr;
2776        for (i = min; i < max; i++)        for (i = min; i < max; i++)
2777          {          {
2778          int slength;          int slength;
2779          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2780            {            {
2781            CHECK_PARTIAL();            /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2782              the soft partial matching case. */
2783    
2784              if (slength == -2 && md->partial != 0 &&
2785                  md->end_subject > md->start_used_ptr)
2786                {
2787                md->hitend = TRUE;
2788                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2789                }
2790            break;            break;
2791            }            }
2792          eptr += slength;          eptr += slength;
2793          }          }
2794    
2795        while (eptr >= pp)        while (eptr >= pp)
2796          {          {
2797          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2798          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2799          eptr -= length;          eptr -= length;
2800          }          }
2801        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2802        }        }
2803      /* Control never gets here */      /* Control never gets here */
2804    
# Line 2564  for (;;) Line 2816  for (;;)
2816      case OP_NCLASS:      case OP_NCLASS:
2817      case OP_CLASS:      case OP_CLASS:
2818        {        {
2819          /* The data variable is saved across frames, so the byte map needs to
2820          be stored there. */
2821    #define BYTE_MAP ((pcre_uint8 *)data)
2822        data = ecode + 1;                /* Save for matching */        data = ecode + 1;                /* Save for matching */
2823        ecode += 33;                     /* Advance past the item */        ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2824    
2825        switch (*ecode)        switch (*ecode)
2826          {          {
# Line 2586  for (;;) Line 2841  for (;;)
2841          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2842          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2843          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2844          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2845          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2846          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2847          break;          break;
2848    
2849          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2598  for (;;) Line 2853  for (;;)
2853    
2854        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
2855    
2856  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2857        /* UTF-8 mode */        if (utf)
       if (utf8)  
2858          {          {
2859          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2860            {            {
2861            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2862              {              {
2863              SCHECK_PARTIAL();              SCHECK_PARTIAL();
2864              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2865              }              }
2866            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2867            if (c > 255)            if (c > 255)
2868              {              {
2869              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2870              }              }
2871            else            else
2872              {              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  
             }  
2873            }            }
2874          }          }
2875        else        else
2876  #endif  #endif
2877        /* Not UTF-8 mode */        /* Not UTF mode */
2878          {          {
2879          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2880            {            {
2881            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2882              {              {
2883              SCHECK_PARTIAL();              SCHECK_PARTIAL();
2884              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2885              }              }
2886            c = *eptr++;            c = *eptr++;
2887            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2888              if (c > 255)
2889                {
2890                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2891                }
2892              else
2893    #endif
2894                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2895            }            }
2896          }          }
2897    
# Line 2646  for (;;) Line 2905  for (;;)
2905    
2906        if (minimize)        if (minimize)
2907          {          {
2908  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2909          /* UTF-8 mode */          if (utf)
         if (utf8)  
2910            {            {
2911            for (fi = min;; fi++)            for (fi = min;; fi++)
2912              {              {
2913              RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2914              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2915              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2916              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2917                {                {
2918                SCHECK_PARTIAL();                SCHECK_PARTIAL();
2919                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2920                }                }
2921              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2922              if (c > 255)              if (c > 255)
2923                {                {
2924                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2925                }                }
2926              else              else
2927                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
               if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  
               }  
2928              }              }
2929            }            }
2930          else          else
2931  #endif  #endif
2932          /* Not UTF-8 mode */          /* Not UTF mode */
2933            {            {
2934            for (fi = min;; fi++)            for (fi = min;; fi++)
2935              {              {
2936              RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2937              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2938              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2939              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2940                {                {
2941                SCHECK_PARTIAL();                SCHECK_PARTIAL();
2942                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2943                }                }
2944              c = *eptr++;              c = *eptr++;
2945              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2946                if (c > 255)
2947                  {
2948                  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2949                  }
2950                else
2951    #endif
2952                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2953              }              }
2954            }            }
2955          /* Control never gets here */          /* Control never gets here */
# Line 2698  for (;;) Line 2961  for (;;)
2961          {          {
2962          pp = eptr;          pp = eptr;
2963    
2964  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2965          /* UTF-8 mode */          if (utf)
         if (utf8)  
2966            {            {
2967            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2968              {              {
# Line 2716  for (;;) Line 2978  for (;;)
2978                if (op == OP_CLASS) break;                if (op == OP_CLASS) break;
2979                }                }
2980              else              else
2981                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
               if ((data[c/8] & (1 << (c&7))) == 0) break;  
               }  
2982              eptr += len;              eptr += len;
2983              }              }
2984            for (;;)            for (;;)
# Line 2731  for (;;) Line 2991  for (;;)
2991            }            }
2992          else          else
2993  #endif  #endif
2994            /* Not UTF-8 mode */            /* Not UTF mode */
2995            {            {
2996            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2997              {              {
# Line 2741  for (;;) Line 3001  for (;;)
3001                break;                break;
3002                }                }
3003              c = *eptr;              c = *eptr;
3004              if ((data[c/8] & (1 << (c&7))) == 0) break;  #ifndef COMPILE_PCRE8
3005                if (c > 255)
3006                  {
3007                  if (op == OP_CLASS) break;
3008                  }
3009                else
3010    #endif
3011                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3012              eptr++;              eptr++;
3013              }              }
3014            while (eptr >= pp)            while (eptr >= pp)
# Line 2752  for (;;) Line 3019  for (;;)
3019              }              }
3020            }            }
3021    
3022          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3023          }          }
3024    #undef BYTE_MAP
3025        }        }
3026      /* Control never gets here */      /* Control never gets here */
3027    
# Line 2762  for (;;) Line 3030  for (;;)
3030      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
3031      mode, because Unicode properties are supported in non-UTF-8 mode. */      mode, because Unicode properties are supported in non-UTF-8 mode. */
3032    
3033  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3034      case OP_XCLASS:      case OP_XCLASS:
3035        {        {
3036        data = ecode + 1 + LINK_SIZE;                /* Save for matching */        data = ecode + 1 + LINK_SIZE;                /* Save for matching */
# Line 2787  for (;;) Line 3055  for (;;)
3055          case OP_CRMINRANGE:          case OP_CRMINRANGE:
3056          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
3057          min = GET2(ecode, 1);          min = GET2(ecode, 1);
3058          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
3059          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
3060          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
3061          break;          break;
3062    
3063          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2804  for (;;) Line 3072  for (;;)
3072          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3073            {            {
3074            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3075            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3076            }            }
3077          GETCHARINCTEST(c, eptr);          GETCHARINCTEST(c, eptr);
3078          if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);          if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3079          }          }
3080    
3081        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 2824  for (;;) Line 3092  for (;;)
3092            {            {
3093            RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3094            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3095            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3096            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3097              {              {
3098              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3099              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3100              }              }
3101            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3102            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);            if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3103            }            }
3104          /* Control never gets here */          /* Control never gets here */
3105          }          }
# Line 2849  for (;;) Line 3117  for (;;)
3117              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3118              break;              break;
3119              }              }
3120    #ifdef SUPPORT_UTF
3121            GETCHARLENTEST(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
3122            if (!_pcre_xclass(c, data)) break;  #else
3123              c = *eptr;
3124    #endif
3125              if (!PRIV(xclass)(c, data, utf)) break;
3126            eptr += len;            eptr += len;
3127            }            }
3128          for(;;)          for(;;)
# Line 2858  for (;;) Line 3130  for (;;)
3130            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3131            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3132            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3133            if (utf8) BACKCHAR(eptr);  #ifdef SUPPORT_UTF
3134              if (utf) BACKCHAR(eptr);
3135    #endif
3136            }            }
3137          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3138          }          }
3139    
3140        /* Control never gets here */        /* Control never gets here */
# Line 2870  for (;;) Line 3144  for (;;)
3144      /* Match a single character, casefully */      /* Match a single character, casefully */
3145    
3146      case OP_CHAR:      case OP_CHAR:
3147  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3148      if (utf8)      if (utf)
3149        {        {
3150        length = 1;        length = 1;
3151        ecode++;        ecode++;
# Line 2879  for (;;) Line 3153  for (;;)
3153        if (length > md->end_subject - eptr)        if (length > md->end_subject - eptr)
3154          {          {
3155          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3156          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3157          }          }
3158        while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
3159        }        }
3160      else      else
3161  #endif  #endif
3162        /* Not UTF mode */
     /* Non-UTF-8 mode */  
3163        {        {
3164        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
3165          {          {
3166          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3167          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3168          }          }
3169        if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3170        ecode += 2;        ecode += 2;
3171        }        }
3172      break;      break;
3173    
3174      /* Match a single character, caselessly */      /* Match a single character, caselessly. If we are at the end of the
3175        subject, give up immediately. */
3176    
3177      case OP_CHARI:      case OP_CHARI:
3178  #ifdef SUPPORT_UTF8      if (eptr >= md->end_subject)
3179      if (utf8)        {
3180          SCHECK_PARTIAL();
3181          RRETURN(MATCH_NOMATCH);
3182          }
3183    
3184    #ifdef SUPPORT_UTF
3185        if (utf)
3186        {        {
3187        length = 1;        length = 1;
3188        ecode++;        ecode++;
3189        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
3190    
       if (length > md->end_subject - eptr)  
         {  
         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */  
         MRRETURN(MATCH_NOMATCH);  
         }  
   
3191        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
3192        can use the fast lookup table. */        we know that its other case must also be one byte long, so we can use the
3193          fast lookup table. We know that there is at least one byte left in the
3194          subject. */
3195    
3196        if (fc < 128)        if (fc < 128)
3197          {          {
3198          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);          if (md->lcc[fc]
3199                != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3200            ecode++;
3201            eptr++;
3202          }          }
3203    
3204        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character. Note that we cannot
3205          use the value of "length" to check for sufficient bytes left, because the
3206          other case of the character may have more or fewer bytes.  */
3207    
3208        else        else
3209          {          {
# Line 2938  for (;;) Line 3219  for (;;)
3219  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3220            if (dc != UCD_OTHERCASE(fc))            if (dc != UCD_OTHERCASE(fc))
3221  #endif  #endif
3222              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3223            }            }
3224          }          }
3225        }        }
3226      else      else
3227  #endif   /* SUPPORT_UTF8 */  #endif   /* SUPPORT_UTF */
3228    
3229      /* Non-UTF-8 mode */      /* Not UTF mode */
3230        {        {
3231        if (md->end_subject - eptr < 1)        if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3232          {            != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3233          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */        eptr++;
         MRRETURN(MATCH_NOMATCH);  
         }  
       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);  
3234        ecode += 2;        ecode += 2;
3235        }        }
3236      break;      break;
# Line 2962  for (;;) Line 3240  for (;;)
3240      case OP_EXACT:      case OP_EXACT:
3241      case OP_EXACTI:      case OP_EXACTI:
3242      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3243      ecode += 3;      ecode += 1 + IMM2_SIZE;
3244      goto REPEATCHAR;      goto REPEATCHAR;
3245    
3246      case OP_POSUPTO:      case OP_POSUPTO:
# Line 2977  for (;;) Line 3255  for (;;)
3255      min = 0;      min = 0;
3256      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3257      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3258      ecode += 3;      ecode += 1 + IMM2_SIZE;
3259      goto REPEATCHAR;      goto REPEATCHAR;
3260    
3261      case OP_POSSTAR:      case OP_POSSTAR:
# Line 3025  for (;;) Line 3303  for (;;)
3303      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. */
3304    
3305      REPEATCHAR:      REPEATCHAR:
3306  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3307      if (utf8)      if (utf)
3308        {        {
3309        length = 1;        length = 1;
3310        charptr = ecode;        charptr = ecode;
# Line 3042  for (;;) Line 3320  for (;;)
3320          unsigned int othercase;          unsigned int othercase;
3321          if (op >= OP_STARI &&     /* Caseless */          if (op >= OP_STARI &&     /* Caseless */
3322              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
3323            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = PRIV(ord2utf)(othercase, occhars);
3324          else oclength = 0;          else oclength = 0;
3325  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3326    
3327          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3328            {            {
3329            if (eptr <= md->end_subject - length &&            if (eptr <= md->end_subject - length &&
3330              memcmp(eptr, charptr, length) == 0) eptr += length;              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3331  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3332            else if (oclength > 0 &&            else if (oclength > 0 &&
3333                     eptr <= md->end_subject - oclength &&                     eptr <= md->end_subject - oclength &&
3334                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3335  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3336            else            else
3337              {              {
3338              CHECK_PARTIAL();              CHECK_PARTIAL();
3339              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3340              }              }
3341            }            }
3342    
# Line 3070  for (;;) Line 3348  for (;;)
3348              {              {
3349              RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3350              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3351              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3352              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3353                memcmp(eptr, charptr, length) == 0) eptr += length;                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3354  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3355              else if (oclength > 0 &&              else if (oclength > 0 &&
3356                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3357                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3358  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3359              else              else
3360                {                {
3361                CHECK_PARTIAL();                CHECK_PARTIAL();
3362                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3363                }                }
3364              }              }
3365            /* Control never gets here */            /* Control never gets here */
# Line 3093  for (;;) Line 3371  for (;;)
3371            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3372              {              {
3373              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3374                  memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3375  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3376              else if (oclength > 0 &&              else if (oclength > 0 &&
3377                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3378                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3379  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3380              else              else
3381                {                {
# Line 3112  for (;;) Line 3390  for (;;)
3390              {              {
3391              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3392              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3393              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }              if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
3394  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3395              eptr--;              eptr--;
3396              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 3129  for (;;) Line 3407  for (;;)
3407        value of fc will always be < 128. */        value of fc will always be < 128. */
3408        }        }
3409      else      else
3410  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3411          /* When not in UTF-8 mode, load a single-byte character. */
3412      /* When not in UTF-8 mode, load a single-byte character. */        fc = *ecode++;
3413    
3414      fc = *ecode++;      /* The value of fc at this point is always one character, though we may
3415        or may not be in UTF mode. The code is duplicated for the caseless and
     /* The value of fc at this point is always less than 256, though we may or  
     may not be in UTF-8 mode. The code is duplicated for the caseless and  
3416      caseful cases, for speed, since matching characters is likely to be quite      caseful cases, for speed, since matching characters is likely to be quite
3417      common. First, ensure the minimum number of matches are present. If min =      common. First, ensure the minimum number of matches are present. If min =
3418      max, continue at the same level without recursing. Otherwise, if      max, continue at the same level without recursing. Otherwise, if
# Line 3149  for (;;) Line 3425  for (;;)
3425    
3426      if (op >= OP_STARI)  /* Caseless */      if (op >= OP_STARI)  /* Caseless */
3427        {        {
3428        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3429          /* fc must be < 128 if UTF is enabled. */
3430          foc = md->fcc[fc];
3431    #else
3432    #ifdef SUPPORT_UTF
3433    #ifdef SUPPORT_UCP
3434          if (utf && fc > 127)
3435            foc = UCD_OTHERCASE(fc);
3436    #else
3437          if (utf && fc > 127)
3438            foc = fc;
3439    #endif /* SUPPORT_UCP */
3440          else
3441    #endif /* SUPPORT_UTF */
3442            foc = TABLE_GET(fc, md->fcc, fc);
3443    #endif /* COMPILE_PCRE8 */
3444    
3445        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3446          {          {
3447          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3448            {            {
3449            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3450            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3451            }            }
3452          if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);          if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3453            eptr++;
3454          }          }
3455        if (min == max) continue;        if (min == max) continue;
3456        if (minimize)        if (minimize)
# Line 3166  for (;;) Line 3459  for (;;)
3459            {            {
3460            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3461            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3462            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3463            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3464              {              {
3465              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3466              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3467              }              }
3468            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);            if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3469              eptr++;
3470            }            }
3471          /* Control never gets here */          /* Control never gets here */
3472          }          }
# Line 3186  for (;;) Line 3480  for (;;)
3480              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3481              break;              break;
3482              }              }
3483            if (fc != md->lcc[*eptr]) break;            if (fc != *eptr && foc != *eptr) break;
3484            eptr++;            eptr++;
3485            }            }
3486    
# Line 3198  for (;;) Line 3492  for (;;)
3492            eptr--;            eptr--;
3493            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3494            }            }
3495          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3496          }          }
3497        /* Control never gets here */        /* Control never gets here */
3498        }        }
# Line 3212  for (;;) Line 3506  for (;;)
3506          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3507            {            {
3508            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3509            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3510            }            }
3511          if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);          if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3512          }          }
3513    
3514        if (min == max) continue;        if (min == max) continue;
# Line 3225  for (;;) Line 3519  for (;;)
3519            {            {
3520            RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3521            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3522            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3523            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3524              {              {
3525              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3526              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3527              }              }
3528            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);            if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3529            }            }
3530          /* Control never gets here */          /* Control never gets here */
3531          }          }
# Line 3256  for (;;) Line 3550  for (;;)
3550            eptr--;            eptr--;
3551            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3552            }            }
3553          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3554          }          }
3555        }        }
3556      /* Control never gets here */      /* Control never gets here */
# Line 3265  for (;;) Line 3559  for (;;)
3559      checking can be multibyte. */      checking can be multibyte. */
3560    
3561      case OP_NOT:      case OP_NOT:
3562      case OP_NOTI:      case OP_NOTI:
3563      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
3564        {        {
3565        SCHECK_PARTIAL();        SCHECK_PARTIAL();
3566        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
3567        }        }
3568      ecode++;  #ifdef SUPPORT_UTF
3569      GETCHARINCTEST(c, eptr);      if (utf)
     if (op == OP_NOTI)         /* The caseless case */  
3570        {        {
3571  #ifdef SUPPORT_UTF8        register unsigned int ch, och;
3572        if (c < 256)  
3573  #endif        ecode++;
3574        c = md->lcc[c];        GETCHARINC(ch, ecode);
3575        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);        GETCHARINC(c, eptr);
3576    
3577          if (op == OP_NOT)
3578            {
3579            if (ch == c) RRETURN(MATCH_NOMATCH);
3580            }
3581          else
3582            {
3583    #ifdef SUPPORT_UCP
3584            if (ch > 127)
3585              och = UCD_OTHERCASE(ch);
3586    #else
3587            if (ch > 127)
3588              och = ch;
3589    #endif /* SUPPORT_UCP */
3590            else
3591              och = TABLE_GET(ch, md->fcc, ch);
3592            if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3593            }
3594        }        }
3595      else    /* Caseful */      else
3596    #endif
3597        {        {
3598        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);        register unsigned int ch = ecode[1];
3599          c = *eptr++;
3600          if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3601            RRETURN(MATCH_NOMATCH);
3602          ecode += 2;
3603        }        }
3604      break;      break;
3605    
# Line 3297  for (;;) Line 3613  for (;;)
3613      case OP_NOTEXACT:      case OP_NOTEXACT:
3614      case OP_NOTEXACTI:      case OP_NOTEXACTI:
3615      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3616      ecode += 3;      ecode += 1 + IMM2_SIZE;
3617      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3618    
3619      case OP_NOTUPTO:      case OP_NOTUPTO:
# Line 3307  for (;;) Line 3623  for (;;)
3623      min = 0;      min = 0;
3624      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3625      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3626      ecode += 3;      ecode += 1 + IMM2_SIZE;
3627      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3628    
3629      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
# Line 3339  for (;;) Line 3655  for (;;)
3655      possessive = TRUE;      possessive = TRUE;
3656      min = 0;      min = 0;
3657      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3658      ecode += 3;      ecode += 1 + IMM2_SIZE;
3659      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3660    
3661      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 3363  for (;;) Line 3679  for (;;)
3679      /* Common code for all repeated single-byte matches. */      /* Common code for all repeated single-byte matches. */
3680    
3681      REPEATNOTCHAR:      REPEATNOTCHAR:
3682      fc = *ecode++;      GETCHARINCTEST(fc, ecode);
3683    
3684      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
3685      since matching characters is likely to be quite common. First, ensure the      since matching characters is likely to be quite common. First, ensure the
# Line 3378  for (;;) Line 3694  for (;;)
3694    
3695      if (op >= OP_NOTSTARI)     /* Caseless */      if (op >= OP_NOTSTARI)     /* Caseless */
3696        {        {
3697        fc = md->lcc[fc];  #ifdef SUPPORT_UTF
3698    #ifdef SUPPORT_UCP
3699          if (utf && fc > 127)
3700            foc = UCD_OTHERCASE(fc);
3701    #else
3702          if (utf && fc > 127)
3703            foc = fc;
3704    #endif /* SUPPORT_UCP */
3705          else
3706    #endif /* SUPPORT_UTF */
3707            foc = TABLE_GET(fc, md->fcc, fc);
3708    
3709  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3710        /* UTF-8 mode */        if (utf)
       if (utf8)  
3711          {          {
3712          register unsigned int d;          register unsigned int d;
3713          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3390  for (;;) Line 3715  for (;;)
3715            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3716              {              {
3717              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3718              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3719              }              }
3720            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3721            if (d < 256) d = md->lcc[d];            if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
           if (fc == d) MRRETURN(MATCH_NOMATCH);  
3722            }            }
3723          }          }
3724        else        else
3725  #endif  #endif
3726          /* Not UTF mode */
       /* Not UTF-8 mode */  
3727          {          {
3728          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3729            {            {
3730            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3731              {              {
3732              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3733              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3734              }              }
3735            if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);            if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3736              eptr++;
3737            }            }
3738          }          }
3739    
# Line 3417  for (;;) Line 3741  for (;;)
3741    
3742        if (minimize)        if (minimize)
3743          {          {
3744  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3745          /* UTF-8 mode */          if (utf)
         if (utf8)  
3746            {            {
3747            register unsigned int d;            register unsigned int d;
3748            for (fi = min;; fi++)            for (fi = min;; fi++)
3749              {              {
3750              RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3751              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3752              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3753              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3754                {                {
3755                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3756                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3757                }                }
3758              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3759              if (d < 256) d = md->lcc[d];              if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
             if (fc == d) MRRETURN(MATCH_NOMATCH);  
3760              }              }
3761            }            }
3762          else          else
3763  #endif  #endif
3764          /* Not UTF-8 mode */          /* Not UTF mode */
3765            {            {
3766            for (fi = min;; fi++)            for (fi = min;; fi++)
3767              {              {
3768              RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3769              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3770              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3771              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3772                {                {
3773                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3774                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3775                }                }
3776              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);              if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3777                eptr++;
3778              }              }
3779            }            }
3780          /* Control never gets here */          /* Control never gets here */
# Line 3463  for (;;) Line 3786  for (;;)
3786          {          {
3787          pp = eptr;          pp = eptr;
3788    
3789  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3790          /* UTF-8 mode */          if (utf)
         if (utf8)  
3791            {            {
3792            register unsigned int d;            register unsigned int d;
3793            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3477  for (;;) Line 3799  for (;;)
3799                break;                break;
3800                }                }
3801              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3802              if (d < 256) d = md->lcc[d];              if (fc == d || (unsigned int)foc == d) break;
             if (fc == d) break;  
3803              eptr += len;              eptr += len;
3804              }              }
3805          if (possessive) continue;            if (possessive) continue;
3806          for(;;)            for(;;)
3807              {              {
3808              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3809              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 3492  for (;;) Line 3813  for (;;)
3813            }            }
3814          else          else
3815  #endif  #endif
3816          /* Not UTF-8 mode */          /* Not UTF mode */
3817            {            {
3818            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3819              {              {
# Line 3501  for (;;) Line 3822  for (;;)
3822                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3823                break;                break;
3824                }                }
3825              if (fc == md->lcc[*eptr]) break;              if (fc == *eptr || foc == *eptr) break;
3826              eptr++;              eptr++;
3827              }              }
3828            if (possessive) continue;            if (possessive) continue;
# Line 3513  for (;;) Line 3834  for (;;)
3834              }              }
3835            }            }
3836    
3837          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3838          }          }
3839        /* Control never gets here */        /* Control never gets here */
3840        }        }
# Line 3522  for (;;) Line 3843  for (;;)
3843    
3844      else      else
3845        {        {
3846  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3847        /* UTF-8 mode */        if (utf)
       if (utf8)  
3848          {          {
3849          register unsigned int d;          register unsigned int d;
3850          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3532  for (;;) Line 3852  for (;;)
3852            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3853              {              {
3854              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3855              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3856              }              }
3857            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3858            if (fc == d) MRRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
3859            }            }
3860          }          }
3861        else        else
3862  #endif  #endif
3863        /* Not UTF-8 mode */        /* Not UTF mode */
3864          {          {
3865          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3866            {            {
3867            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3868              {              {
3869              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3870              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3871              }              }
3872            if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3873            }            }
3874          }          }
3875    
# Line 3557  for (;;) Line 3877  for (;;)
3877    
3878        if (minimize)        if (minimize)
3879          {          {
3880  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3881          /* UTF-8 mode */          if (utf)
         if (utf8)  
3882            {            {
3883            register unsigned int d;            register unsigned int d;
3884            for (fi = min;; fi++)            for (fi = min;; fi++)
3885              {              {
3886              RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3887              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3888              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3889              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3890                {                {
3891                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3892                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3893                }                }
3894              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3895              if (fc == d) MRRETURN(MATCH_NOMATCH);              if (fc == d) RRETURN(MATCH_NOMATCH);
3896              }              }
3897            }            }
3898          else          else
3899  #endif  #endif
3900          /* Not UTF-8 mode */          /* Not UTF mode */
3901            {            {
3902            for (fi = min;; fi++)            for (fi = min;; fi++)
3903              {              {
3904              RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3905              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3906              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3907              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3908                {                {
3909                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3910                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3911                }                }
3912              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);              if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3913              }              }
3914            }            }
3915          /* Control never gets here */          /* Control never gets here */
# Line 3602  for (;;) Line 3921  for (;;)
3921          {          {
3922          pp = eptr;          pp = eptr;
3923    
3924  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3925          /* UTF-8 mode */          if (utf)
         if (utf8)  
3926            {            {
3927            register unsigned int d;            register unsigned int d;
3928            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3630  for (;;) Line 3948  for (;;)
3948            }            }
3949          else          else
3950  #endif  #endif
3951          /* Not UTF-8 mode */          /* Not UTF mode */
3952            {            {
3953            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3954              {              {
# Line 3651  for (;;) Line 3969  for (;;)
3969              }              }
3970            }            }
3971    
3972          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3973          }          }
3974        }        }
3975      /* Control never gets here */      /* Control never gets here */
# Line 3663  for (;;) Line 3981  for (;;)
3981      case OP_TYPEEXACT:      case OP_TYPEEXACT:
3982      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3983      minimize = TRUE;      minimize = TRUE;
3984      ecode += 3;      ecode += 1 + IMM2_SIZE;
3985      goto REPEATTYPE;      goto REPEATTYPE;
3986    
3987      case OP_TYPEUPTO:      case OP_TYPEUPTO:
# Line 3671  for (;;) Line 3989  for (;;)
3989      min = 0;      min = 0;
3990      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3991      minimize = *ecode == OP_TYPEMINUPTO;      minimize = *ecode == OP_TYPEMINUPTO;
3992      ecode += 3;      ecode += 1 + IMM2_SIZE;
3993      goto REPEATTYPE;      goto REPEATTYPE;
3994    
3995      case OP_TYPEPOSSTAR:      case OP_TYPEPOSSTAR:
# Line 3699  for (;;) Line 4017  for (;;)
4017      possessive = TRUE;      possessive = TRUE;
4018      min = 0;      min = 0;
4019      max = GET2(ecode, 1);      max = GET2(ecode, 1);
4020      ecode += 3;      ecode += 1 + IMM2_SIZE;
4021      goto REPEATTYPE;      goto REPEATTYPE;
4022    
4023      case OP_TYPESTAR:      case OP_TYPESTAR:
# Line 3745  for (;;) Line 4063  for (;;)
4063          switch(prop_type)          switch(prop_type)
4064            {            {
4065            case PT_ANY:            case PT_ANY:
4066            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4067            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4068              {              {
4069              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4070                {                {
4071                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4072                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4073                }                }
4074              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4075              }              }
# Line 3760  for (;;) Line 4078  for (;;)
4078            case PT_LAMP:            case PT_LAMP:
4079            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4080              {              {
4081                int chartype;
4082              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4083                {                {
4084                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4085                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4086                }                }
4087              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4088              prop_chartype = UCD_CHARTYPE(c);              chartype = UCD_CHARTYPE(c);
4089              if ((prop_chartype == ucp_Lu ||              if ((chartype == ucp_Lu ||
4090                   prop_chartype == ucp_Ll ||                   chartype == ucp_Ll ||
4091                   prop_chartype == ucp_Lt) == prop_fail_result)                   chartype == ucp_Lt) == prop_fail_result)
4092                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4093              }              }
4094            break;            break;
4095    
# Line 3780  for (;;) Line 4099  for (;;)
4099              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4100                {                {
4101                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4102                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4103                }                }
4104              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4105              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4106              if ((prop_category == prop_value) == prop_fail_result)                RRETURN(MATCH_NOMATCH);
               MRRETURN(MATCH_NOMATCH);  
4107              }              }
4108            break;            break;
4109    
# Line 3795  for (;;) Line 4113  for (;;)
4113              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4114                {                {
4115                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4116                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4117                }                }
4118              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4119              prop_chartype = UCD_CHARTYPE(c);              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4120              if ((prop_chartype == prop_value) == prop_fail_result)                RRETURN(MATCH_NOMATCH);
               MRRETURN(MATCH_NOMATCH);  
4121              }              }
4122            break;            break;
4123    
# Line 3810  for (;;) Line 4127  for (;;)
4127              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4128                {                {
4129                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4130                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4131                }                }
4132              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4133              prop_script = UCD_SCRIPT(c);              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4134              if ((prop_script == prop_value) == prop_fail_result)                RRETURN(MATCH_NOMATCH);
               MRRETURN(MATCH_NOMATCH);  
4135              }              }
4136            break;            break;
4137    
4138            case PT_ALNUM:            case PT_ALNUM:
4139            for (i = 1; i <= min; i++)            for (i = 1