/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 667 by ph10, Mon Aug 22 14:57:32 2011 UTC revision 1189 by ph10, Tue Oct 30 16:34:17 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
   
40  /* This module contains pcre_exec(), the externally visible function that does  /* This module contains pcre_exec(), the externally visible function that does
41  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
42  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
# Line 82  negative to avoid the external error cod Line 81  negative to avoid the external error cod
81  #define MATCH_SKIP_ARG     (-993)  #define MATCH_SKIP_ARG     (-993)
82  #define MATCH_THEN         (-992)  #define MATCH_THEN         (-992)
83    
 /* This is a convenience macro for code that occurs many times. */  
   
 #define MRRETURN(ra) \  
   { \  
   md->mark = markptr; \  
   RRETURN(ra); \  
   }  
   
84  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
85  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
86  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 101  because the offset vector is always a mu Line 92  because the offset vector is always a mu
92  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
93  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
94    
   
   
95  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
96  /*************************************************  /*************************************************
97  *        Debugging function to print chars       *  *        Debugging function to print chars       *
# Line 121  Returns:     nothing Line 110  Returns:     nothing
110  */  */
111    
112  static void  static void
113  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
114  {  {
115  unsigned int c;  pcre_uint32 c;
116    BOOL utf = md->utf;
117  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
118  while (length-- > 0)  while (length-- > 0)
119    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = RAWUCHARINCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
120  }  }
121  #endif  #endif
122    
# Line 148  Arguments: Line 138  Arguments:
138    md          points to match data block    md          points to match data block
139    caseless    TRUE if caseless    caseless    TRUE if caseless
140    
141  Returns:      < 0 if not matched, otherwise the number of subject bytes matched  Returns:      >= 0 the number of subject bytes matched
142                  -1 no match
143                  -2 partial match; always given if at end subject
144  */  */
145    
146  static int  static int
147  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
148    BOOL caseless)    BOOL caseless)
149  {  {
150  USPTR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
151  register USPTR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
152    #ifdef SUPPORT_UTF
153    BOOL utf = md->utf;
154    #endif
155    
156  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
157  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 171  pchars(p, length, FALSE, md); Line 166  pchars(p, length, FALSE, md);
166  printf("\n");  printf("\n");
167  #endif  #endif
168    
169  /* Always fail if reference not set (and not JavaScript compatible). */  /* Always fail if reference not set (and not JavaScript compatible - in that
170    case the length is passed as zero). */
171    
172  if (length < 0) return -1;  if (length < 0) return -1;
173    
# Line 181  ASCII characters. */ Line 177  ASCII characters. */
177    
178  if (caseless)  if (caseless)
179    {    {
180  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
181  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
182    if (md->utf8)    if (utf)
183      {      {
184      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
185      bytes matched may differ, because there are some characters whose upper and      data units matched may differ, because in UTF-8 there are some characters
186      lower case versions code as different numbers of bytes. For example, U+023A      whose upper and lower case versions code have different numbers of bytes.
187      (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);      For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
188      a sequence of 3 of the former uses 6 bytes, as does a sequence of two of      (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
189      the latter. It is important, therefore, to check the length along the      sequence of two of the latter. It is important, therefore, to check the
190      reference, not along the subject (earlier code did this wrong). */      length along the reference, not along the subject (earlier code did this
191        wrong). */
192    
193      USPTR endptr = p + length;      PCRE_PUCHAR endptr = p + length;
194      while (p < endptr)      while (p < endptr)
195        {        {
196        int c, d;        pcre_uint32 c, d;
197        if (eptr >= md->end_subject) return -1;        const ucd_record *ur;
198          if (eptr >= md->end_subject) return -2;   /* Partial match */
199        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
200        GETCHARINC(d, p);        GETCHARINC(d, p);
201        if (c != d && c != UCD_OTHERCASE(d)) return -1;        ur = GET_UCD(d);
202          if (c != d && c != d + ur->other_case)
203            {
204            const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
205            for (;;)
206              {
207              if (c < *pp) return -1;
208              if (c == *pp++) break;
209              }
210            }
211        }        }
212      }      }
213    else    else
# Line 210  if (caseless) Line 217  if (caseless)
217    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
218    is no UCP support. */    is no UCP support. */
219      {      {
     if (eptr + length > md->end_subject) return -1;  
220      while (length-- > 0)      while (length-- > 0)
221        { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }        {
222          pcre_uchar cc, cp;
223          if (eptr >= md->end_subject) return -2;   /* Partial match */
224          cc = RAWUCHARTEST(eptr);
225          cp = RAWUCHARTEST(p);
226          if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
227          p++;
228          eptr++;
229          }
230      }      }
231    }    }
232    
# Line 221  are in UTF-8 mode. */ Line 235  are in UTF-8 mode. */
235    
236  else  else
237    {    {
238    if (eptr + length > md->end_subject) return -1;    while (length-- > 0)
239    while (length-- > 0) if (*p++ != *eptr++) return -1;      {
240        if (eptr >= md->end_subject) return -2;   /* Partial match */
241        if (RAWUCHARINCTEST(p) != RAWUCHARINCTEST(eptr)) return -1;
242        }
243    }    }
244    
245  return eptr - eptr_start;  return (int)(eptr - eptr_start);
246  }  }
247    
248    
# Line 277  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 294  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
294         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
295         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
296         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
297         RM61,  RM62, RM63 };         RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
298    
299  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
300  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 290  actually used in this definition. */ Line 307  actually used in this definition. */
307  #define RMATCH(ra,rb,rc,rd,re,rw) \  #define RMATCH(ra,rb,rc,rd,re,rw) \
308    { \    { \
309    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
310    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1); \    rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
311    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
312    }    }
313  #define RRETURN(ra) \  #define RRETURN(ra) \
314    { \    { \
315    printf("match() returned %d from line %d ", ra, __LINE__); \    printf("match() returned %d from line %d\n", ra, __LINE__); \
316    return ra; \    return ra; \
317    }    }
318  #else  #else
319  #define RMATCH(ra,rb,rc,rd,re,rw) \  #define RMATCH(ra,rb,rc,rd,re,rw) \
320    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1)    rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
321  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
322  #endif  #endif
323    
# Line 315  argument of match(), which never changes Line 332  argument of match(), which never changes
332    
333  #define RMATCH(ra,rb,rc,rd,re,rw)\  #define RMATCH(ra,rb,rc,rd,re,rw)\
334    {\    {\
335    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = frame->Xnextframe;\
336    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\    if (newframe == NULL)\
337    frame->Xwhere = rw; \      {\
338        newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
339        if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
340        newframe->Xnextframe = NULL;\
341        frame->Xnextframe = newframe;\
342        }\
343      frame->Xwhere = rw;\
344    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
345    newframe->Xecode = rb;\    newframe->Xecode = rb;\
346    newframe->Xmstart = mstart;\    newframe->Xmstart = mstart;\
   newframe->Xmarkptr = markptr;\  
347    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
348    newframe->Xeptrb = re;\    newframe->Xeptrb = re;\
349    newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
# Line 337  argument of match(), which never changes Line 359  argument of match(), which never changes
359    {\    {\
360    heapframe *oldframe = frame;\    heapframe *oldframe = frame;\
361    frame = oldframe->Xprevframe;\    frame = oldframe->Xprevframe;\
   (pcre_stack_free)(oldframe);\  
362    if (frame != NULL)\    if (frame != NULL)\
363      {\      {\
364      rrc = ra;\      rrc = ra;\
# Line 351  argument of match(), which never changes Line 372  argument of match(), which never changes
372    
373  typedef struct heapframe {  typedef struct heapframe {
374    struct heapframe *Xprevframe;    struct heapframe *Xprevframe;
375      struct heapframe *Xnextframe;
376    
377    /* Function arguments that may change */    /* Function arguments that may change */
378    
379    USPTR Xeptr;    PCRE_PUCHAR Xeptr;
380    const uschar *Xecode;    const pcre_uchar *Xecode;
381    USPTR Xmstart;    PCRE_PUCHAR Xmstart;
   USPTR Xmarkptr;  
382    int Xoffset_top;    int Xoffset_top;
383    eptrblock *Xeptrb;    eptrblock *Xeptrb;
384    unsigned int Xrdepth;    unsigned int Xrdepth;
385    
386    /* Function local variables */    /* Function local variables */
387    
388    USPTR Xcallpat;    PCRE_PUCHAR Xcallpat;
389  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
390    USPTR Xcharptr;    PCRE_PUCHAR Xcharptr;
391  #endif  #endif
392    USPTR Xdata;    PCRE_PUCHAR Xdata;
393    USPTR Xnext;    PCRE_PUCHAR Xnext;
394    USPTR Xpp;    PCRE_PUCHAR Xpp;
395    USPTR Xprev;    PCRE_PUCHAR Xprev;
396    USPTR Xsaved_eptr;    PCRE_PUCHAR Xsaved_eptr;
397    
398    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
399    
# Line 382  typedef struct heapframe { Line 403  typedef struct heapframe {
403    
404  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
405    int Xprop_type;    int Xprop_type;
406    int Xprop_value;    unsigned int Xprop_value;
407    int Xprop_fail_result;    int Xprop_fail_result;
408    int Xoclength;    int Xoclength;
409    uschar Xocchars[8];    pcre_uchar Xocchars[6];
410  #endif  #endif
411    
412    int Xcodelink;    int Xcodelink;
# Line 427  returns a negative (error) response, the Line 448  returns a negative (error) response, the
448  same response. */  same response. */
449    
450  /* These macros pack up tests that are used for partial matching, and which  /* These macros pack up tests that are used for partial matching, and which
451  appears several times in the code. We set the "hit end" flag if the pointer is  appear several times in the code. We set the "hit end" flag if the pointer is
452  at the end of the subject and also past the start of the subject (i.e.  at the end of the subject and also past the start of the subject (i.e.
453  something has been matched). For hard partial matching, we then return  something has been matched). For hard partial matching, we then return
454  immediately. The second one is used when we already know we are past the end of  immediately. The second one is used when we already know we are past the end of
# Line 438  the subject. */ Line 459  the subject. */
459        eptr > md->start_used_ptr) \        eptr > md->start_used_ptr) \
460      { \      { \
461      md->hitend = TRUE; \      md->hitend = TRUE; \
462      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
463      }      }
464    
465  #define SCHECK_PARTIAL()\  #define SCHECK_PARTIAL()\
466    if (md->partial != 0 && eptr > md->start_used_ptr) \    if (md->partial != 0 && eptr > md->start_used_ptr) \
467      { \      { \
468      md->hitend = TRUE; \      md->hitend = TRUE; \
469      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
470      }      }
471    
472    
473  /* Performance note: It might be tempting to extract commonly used fields from  /* Performance note: It might be tempting to extract commonly used fields from
474  the md structure (e.g. utf8, end_subject) into individual variables to improve  the md structure (e.g. utf, end_subject) into individual variables to improve
475  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
476  made performance worse.  made performance worse.
477    
# Line 459  Arguments: Line 480  Arguments:
480     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
481     mstart      pointer to the current match start position (can be modified     mstart      pointer to the current match start position (can be modified
482                   by encountering \K)                   by encountering \K)
    markptr     pointer to the most recent MARK name, or NULL  
483     offset_top  current top pointer     offset_top  current top pointer
484     md          pointer to "static" info for the match     md          pointer to "static" info for the match
485     eptrb       pointer to chain of blocks containing eptr at start of     eptrb       pointer to chain of blocks containing eptr at start of
# Line 474  Returns:       MATCH_MATCH if matched Line 494  Returns:       MATCH_MATCH if matched
494  */  */
495    
496  static int  static int
497  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
498    const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb,    PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
499    unsigned int rdepth)    unsigned int rdepth)
500  {  {
501  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
# Line 484  so they can be ordinary variables in all Line 504  so they can be ordinary variables in all
504    
505  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
506  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
507  register unsigned int c;   /* Character values not kept over RMATCH() calls */  register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
508  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf;         /* Local copy of UTF flag for speed */
509    
510  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
511  BOOL caseless;  BOOL caseless;
512  int condcode;  int condcode;
513    
514  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
515  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame". We set up the top-level
516  heap storage. Set up the top-level frame here; others are obtained from the  frame on the stack here; subsequent instantiations are obtained from the heap
517  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  whenever RMATCH() does a "recursion". See the macro definitions above. Putting
518    the top-level on the stack rather than malloc-ing them all gives a performance
519    boost in many cases where there is not much "recursion". */
520    
521  #ifdef NO_RECURSE  #ifdef NO_RECURSE
522  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)md->match_frames_base;
 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  
 frame->Xprevframe = NULL;            /* Marks the top level */  
523    
524  /* Copy in the original argument variables */  /* Copy in the original argument variables */
525    
526  frame->Xeptr = eptr;  frame->Xeptr = eptr;
527  frame->Xecode = ecode;  frame->Xecode = ecode;
528  frame->Xmstart = mstart;  frame->Xmstart = mstart;
 frame->Xmarkptr = markptr;  
529  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
530  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
531  frame->Xrdepth = rdepth;  frame->Xrdepth = rdepth;
# Line 520  HEAP_RECURSE: Line 539  HEAP_RECURSE:
539  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
540  #define ecode              frame->Xecode  #define ecode              frame->Xecode
541  #define mstart             frame->Xmstart  #define mstart             frame->Xmstart
 #define markptr            frame->Xmarkptr  
542  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
543  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
544  #define rdepth             frame->Xrdepth  #define rdepth             frame->Xrdepth
545    
546  /* Ditto for the local variables */  /* Ditto for the local variables */
547    
548  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
549  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
550  #endif  #endif
551  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
# Line 585  declarations can be cut out in a block. Line 603  declarations can be cut out in a block.
603  below are for variables that do not have to be preserved over a recursive call  below are for variables that do not have to be preserved over a recursive call
604  to RMATCH(). */  to RMATCH(). */
605    
606  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
607  const uschar *charptr;  const pcre_uchar *charptr;
608  #endif  #endif
609  const uschar *callpat;  const pcre_uchar *callpat;
610  const uschar *data;  const pcre_uchar *data;
611  const uschar *next;  const pcre_uchar *next;
612  USPTR         pp;  PCRE_PUCHAR       pp;
613  const uschar *prev;  const pcre_uchar *prev;
614  USPTR         saved_eptr;  PCRE_PUCHAR       saved_eptr;
615    
616  recursion_info new_recursive;  recursion_info new_recursive;
617    
# Line 603  BOOL prev_is_word; Line 621  BOOL prev_is_word;
621    
622  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
623  int prop_type;  int prop_type;
624  int prop_value;  unsigned int prop_value;
625  int prop_fail_result;  int prop_fail_result;
626  int oclength;  int oclength;
627  uschar occhars[8];  pcre_uchar occhars[6];
628  #endif  #endif
629    
630  int codelink;  int codelink;
# Line 614  int ctype; Line 632  int ctype;
632  int length;  int length;
633  int max;  int max;
634  int min;  int min;
635  int number;  unsigned int number;
636  int offset;  int offset;
637  int op;  pcre_uchar op;
638  int save_capture_last;  int save_capture_last;
639  int save_offset1, save_offset2, save_offset3;  int save_offset1, save_offset2, save_offset3;
640  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
641    
642  eptrblock newptrb;  eptrblock newptrb;
643    
644    /* There is a special fudge for calling match() in a way that causes it to
645    measure the size of its basic stack frame when the stack is being used for
646    recursion. The second argument (ecode) being NULL triggers this behaviour. It
647    cannot normally ever be NULL. The return is the negated value of the frame
648    size. */
649    
650    if (ecode == NULL)
651      {
652      if (rdepth == 0)
653        return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
654      else
655        {
656        int len = (char *)&rdepth - (char *)eptr;
657        return (len > 0)? -len : len;
658        }
659      }
660  #endif     /* NO_RECURSE */  #endif     /* NO_RECURSE */
661    
662  /* To save space on the stack and in the heap frame, I have doubled up on some  /* To save space on the stack and in the heap frame, I have doubled up on some
# Line 634  the alternative names that are used. */ Line 669  the alternative names that are used. */
669  #define code_offset   codelink  #define code_offset   codelink
670  #define condassert    condition  #define condassert    condition
671  #define matched_once  prev_is_word  #define matched_once  prev_is_word
672    #define foc           number
673    #define save_mark     data
674    
675  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
676  variables. */  variables. */
# Line 659  defined). However, RMATCH isn't like a f Line 696  defined). However, RMATCH isn't like a f
696  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
697  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
698    
699  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
700  utf8 = md->utf8;       /* Local copy of the flag */  utf = md->utf;       /* Local copy of the flag */
701  #else  #else
702  utf8 = FALSE;  utf = FALSE;
703  #endif  #endif
704    
705  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
# Line 701  for (;;) Line 738  for (;;)
738    switch(op)    switch(op)
739      {      {
740      case OP_MARK:      case OP_MARK:
741      markptr = ecode + 2;      md->nomatch_mark = ecode + 2;
742      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      md->mark = NULL;    /* In case previously set by assertion */
743        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
744        eptrb, RM55);        eptrb, RM55);
745        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
746             md->mark == NULL) md->mark = ecode + 2;
747    
748      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
749      argument, and we must check whether that argument matches this MARK's      argument, and we must check whether that argument matches this MARK's
# Line 712  for (;;) Line 752  for (;;)
752      position and return MATCH_SKIP. Otherwise, pass back the return code      position and return MATCH_SKIP. Otherwise, pass back the return code
753      unaltered. */      unaltered. */
754    
755      if (rrc == MATCH_SKIP_ARG &&      else if (rrc == MATCH_SKIP_ARG &&
756          strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)          STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
757        {        {
758        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
759        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
760        }        }
   
     if (md->mark == NULL) md->mark = markptr;  
761      RRETURN(rrc);      RRETURN(rrc);
762    
763      case OP_FAIL:      case OP_FAIL:
764      MRRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
765    
766      /* COMMIT overrides PRUNE, SKIP, and THEN */      /* COMMIT overrides PRUNE, SKIP, and THEN */
767    
768      case OP_COMMIT:      case OP_COMMIT:
769      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
770        eptrb, RM52);        eptrb, RM52);
771      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
772          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
773          rrc != MATCH_THEN)          rrc != MATCH_THEN)
774        RRETURN(rrc);        RRETURN(rrc);
775      MRRETURN(MATCH_COMMIT);      RRETURN(MATCH_COMMIT);
776    
777      /* PRUNE overrides THEN */      /* PRUNE overrides THEN */
778    
779      case OP_PRUNE:      case OP_PRUNE:
780      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
781        eptrb, RM51);        eptrb, RM51);
782      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
783      MRRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
784    
785      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
786      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      md->nomatch_mark = ecode + 2;
787        md->mark = NULL;    /* In case previously set by assertion */
788        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
789        eptrb, RM56);        eptrb, RM56);
790        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
791             md->mark == NULL) md->mark = ecode + 2;
792      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
     md->mark = ecode + 2;  
793      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
794    
795      /* SKIP overrides PRUNE and THEN */      /* SKIP overrides PRUNE and THEN */
796    
797      case OP_SKIP:      case OP_SKIP:
798      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
799        eptrb, RM53);        eptrb, RM53);
800      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
801        RRETURN(rrc);        RRETURN(rrc);
802      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
803      MRRETURN(MATCH_SKIP);      RRETURN(MATCH_SKIP);
804    
805        /* Note that, for Perl compatibility, SKIP with an argument does NOT set
806        nomatch_mark. There is a flag that disables this opcode when re-matching a
807        pattern that ended with a SKIP for which there was not a matching MARK. */
808    
809      case OP_SKIP_ARG:      case OP_SKIP_ARG:
810      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      if (md->ignore_skip_arg)
811          {
812          ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
813          break;
814          }
815        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
816        eptrb, RM57);        eptrb, RM57);
817      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
818        RRETURN(rrc);        RRETURN(rrc);
819    
820      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
821      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
822      caught by a matching MARK, or get to the top, where it is treated the same      caught by a matching MARK, or get to the top, where it causes a rematch
823      as PRUNE. */      with the md->ignore_skip_arg flag set. */
824    
825      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
826      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
827    
828      /* For THEN (and THEN_ARG) we pass back the address of the bracket or      /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
829      the alt that is at the start of the current branch. This makes it possible      the branch in which it occurs can be determined. Overload the start of
830      to skip back past alternatives that precede the THEN within the current      match pointer to do this. */
     branch. */  
831    
832      case OP_THEN:      case OP_THEN:
833      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
834        eptrb, RM54);        eptrb, RM54);
835      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
836      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode;
837      MRRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
838    
839      case OP_THEN_ARG:      case OP_THEN_ARG:
840      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],      md->nomatch_mark = ecode + 2;
841        offset_top, md, eptrb, RM58);      md->mark = NULL;    /* In case previously set by assertion */
842        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
843          md, eptrb, RM58);
844        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
845             md->mark == NULL) md->mark = ecode + 2;
846      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
847      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode;
     md->mark = ecode + LINK_SIZE + 2;  
848      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
849    
850        /* Handle an atomic group that does not contain any capturing parentheses.
851        This can be handled like an assertion. Prior to 8.13, all atomic groups
852        were handled this way. In 8.13, the code was changed as below for ONCE, so
853        that backups pass through the group and thereby reset captured values.
854        However, this uses a lot more stack, so in 8.20, atomic groups that do not
855        contain any captures generate OP_ONCE_NC, which can be handled in the old,
856        less stack intensive way.
857    
858        Check the alternative branches in turn - the matching won't pass the KET
859        for this kind of subpattern. If any one branch matches, we carry on as at
860        the end of a normal bracket, leaving the subject pointer, but resetting
861        the start-of-match value in case it was changed by \K. */
862    
863        case OP_ONCE_NC:
864        prev = ecode;
865        saved_eptr = eptr;
866        save_mark = md->mark;
867        do
868          {
869          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
870          if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
871            {
872            mstart = md->start_match_ptr;
873            break;
874            }
875          if (rrc == MATCH_THEN)
876            {
877            next = ecode + GET(ecode,1);
878            if (md->start_match_ptr < next &&
879                (*ecode == OP_ALT || *next == OP_ALT))
880              rrc = MATCH_NOMATCH;
881            }
882    
883          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
884          ecode += GET(ecode,1);
885          md->mark = save_mark;
886          }
887        while (*ecode == OP_ALT);
888    
889        /* If hit the end of the group (which could be repeated), fail */
890    
891        if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
892    
893        /* Continue as from after the group, updating the offsets high water
894        mark, since extracts may have been taken. */
895    
896        do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
897    
898        offset_top = md->end_offset_top;
899        eptr = md->end_match_ptr;
900    
901        /* For a non-repeating ket, just continue at this level. This also
902        happens for a repeating ket if no characters were matched in the group.
903        This is the forcible breaking of infinite loops as implemented in Perl
904        5.005. */
905    
906        if (*ecode == OP_KET || eptr == saved_eptr)
907          {
908          ecode += 1+LINK_SIZE;
909          break;
910          }
911    
912        /* The repeating kets try the rest of the pattern or restart from the
913        preceding bracket, in the appropriate order. The second "call" of match()
914        uses tail recursion, to avoid using another stack frame. */
915    
916        if (*ecode == OP_KETRMIN)
917          {
918          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
919          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
920          ecode = prev;
921          goto TAIL_RECURSE;
922          }
923        else  /* OP_KETRMAX */
924          {
925          RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
926          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
927          ecode += 1 + LINK_SIZE;
928          goto TAIL_RECURSE;
929          }
930        /* Control never gets here */
931    
932      /* Handle a capturing bracket, other than those that are possessive with an      /* Handle a capturing bracket, other than those that are possessive with an
933      unlimited repeat. If there is space in the offset vector, save the current      unlimited repeat. If there is space in the offset vector, save the current
934      subject position in the working slot at the top of the vector. We mustn't      subject position in the working slot at the top of the vector. We mustn't
# Line 827  for (;;) Line 961  for (;;)
961        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
962        save_offset3 = md->offset_vector[md->offset_end - number];        save_offset3 = md->offset_vector[md->offset_end - number];
963        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
964          save_mark = md->mark;
965    
966        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
967        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
# Line 835  for (;;) Line 970  for (;;)
970        for (;;)        for (;;)
971          {          {
972          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
973          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
974            eptrb, RM1);            eptrb, RM1);
975          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
976          if (rrc != MATCH_NOMATCH &&  
977              (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* If we backed up to a THEN, check whether it is within the current
978            RRETURN(rrc);          branch by comparing the address of the THEN that is passed back with
979            the end of the branch. If it is within the current branch, and the
980            branch is one of two or more alternatives (it either starts or ends
981            with OP_ALT), we have reached the limit of THEN's action, so convert
982            the return code to NOMATCH, which will cause normal backtracking to
983            happen from now on. Otherwise, THEN is passed back to an outer
984            alternative. This implements Perl's treatment of parenthesized groups,
985            where a group not containing | does not affect the current alternative,
986            that is, (X) is NOT the same as (X|(*F)). */
987    
988            if (rrc == MATCH_THEN)
989              {
990              next = ecode + GET(ecode,1);
991              if (md->start_match_ptr < next &&
992                  (*ecode == OP_ALT || *next == OP_ALT))
993                rrc = MATCH_NOMATCH;
994              }
995    
996            /* Anything other than NOMATCH is passed back. */
997    
998            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
999          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
1000          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
1001            md->mark = save_mark;
1002          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
1003          }          }
1004    
# Line 851  for (;;) Line 1007  for (;;)
1007        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
1008        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
1009    
1010        /* At this point, rrc will be one of MATCH_ONCE, MATCH_NOMATCH, or        /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
       MATCH_THEN. */  
1011    
1012        if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;        RRETURN(rrc);
       RRETURN(((rrc == MATCH_ONCE)? MATCH_ONCE:MATCH_NOMATCH));  
1013        }        }
1014    
1015      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
# Line 870  for (;;) Line 1024  for (;;)
1024      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1025    
1026      /* Non-capturing or atomic group, except for possessive with unlimited      /* Non-capturing or atomic group, except for possessive with unlimited
1027      repeat. Loop for all the alternatives. When we get to the final alternative      repeat and ONCE group with no captures. Loop for all the alternatives.
1028      within the brackets, we used to return the result of a recursive call to  
1029      match() whatever happened so it was possible to reduce stack usage by      When we get to the final alternative within the brackets, we used to return
1030      turning this into a tail recursion, except in the case of a possibly empty      the result of a recursive call to match() whatever happened so it was
1031      group. However, now that there is the possiblity of (*THEN) occurring in      possible to reduce stack usage by turning this into a tail recursion,
1032      the final alternative, this optimization is no longer possible.      except in the case of a possibly empty group. However, now that there is
1033        the possiblity of (*THEN) occurring in the final alternative, this
1034        optimization is no longer always possible.
1035    
1036        We can optimize if we know there are no (*THEN)s in the pattern; at present
1037        this is the best that can be done.
1038    
1039      MATCH_ONCE is returned when the end of an atomic group is successfully      MATCH_ONCE is returned when the end of an atomic group is successfully
1040      reached, but subsequent matching fails. It passes back up the tree (causing      reached, but subsequent matching fails. It passes back up the tree (causing
# Line 891  for (;;) Line 1050  for (;;)
1050    
1051      for (;;)      for (;;)
1052        {        {
1053        if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA || op == OP_ONCE)
1054        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,          md->match_function_type = MATCH_CBEGROUP;
1055    
1056          /* If this is not a possibly empty group, and there are no (*THEN)s in
1057          the pattern, and this is the final alternative, optimize as described
1058          above. */
1059    
1060          else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1061            {
1062            ecode += PRIV(OP_lengths)[*ecode];
1063            goto TAIL_RECURSE;
1064            }
1065    
1066          /* In all other cases, we have to make another call to match(). */
1067    
1068          save_mark = md->mark;
1069          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1070          RM2);          RM2);
1071        if (rrc != MATCH_NOMATCH &&  
1072            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* See comment in the code for capturing groups above about handling
1073          THEN. */
1074    
1075          if (rrc == MATCH_THEN)
1076            {
1077            next = ecode + GET(ecode,1);
1078            if (md->start_match_ptr < next &&
1079                (*ecode == OP_ALT || *next == OP_ALT))
1080              rrc = MATCH_NOMATCH;
1081            }
1082    
1083          if (rrc != MATCH_NOMATCH)
1084          {          {
1085          if (rrc == MATCH_ONCE)          if (rrc == MATCH_ONCE)
1086            {            {
1087            const uschar *scode = ecode;            const pcre_uchar *scode = ecode;
1088            if (*scode != OP_ONCE)           /* If not at start, find it */            if (*scode != OP_ONCE)           /* If not at start, find it */
1089              {              {
1090              while (*scode == OP_ALT) scode += GET(scode, 1);              while (*scode == OP_ALT) scode += GET(scode, 1);
# Line 910  for (;;) Line 1095  for (;;)
1095          RRETURN(rrc);          RRETURN(rrc);
1096          }          }
1097        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1098          md->mark = save_mark;
1099        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1100        }        }
1101      if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;  
1102      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
1103    
1104      /* Handle possessive capturing brackets with an unlimited repeat. We come      /* Handle possessive capturing brackets with an unlimited repeat. We come
# Line 941  for (;;) Line 1127  for (;;)
1127      if (offset < md->offset_max)      if (offset < md->offset_max)
1128        {        {
1129        matched_once = FALSE;        matched_once = FALSE;
1130        code_offset = ecode - md->start_code;        code_offset = (int)(ecode - md->start_code);
1131    
1132        save_offset1 = md->offset_vector[offset];        save_offset1 = md->offset_vector[offset];
1133        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
# Line 964  for (;;) Line 1150  for (;;)
1150          md->offset_vector[md->offset_end - number] =          md->offset_vector[md->offset_end - number] =
1151            (int)(eptr - md->start_subject);            (int)(eptr - md->start_subject);
1152          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1153          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1154            eptrb, RM63);            eptrb, RM63);
1155          if (rrc == MATCH_KETRPOS)          if (rrc == MATCH_KETRPOS)
1156            {            {
# Line 975  for (;;) Line 1161  for (;;)
1161            matched_once = TRUE;            matched_once = TRUE;
1162            continue;            continue;
1163            }            }
1164          if (rrc != MATCH_NOMATCH &&  
1165              (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* See comment in the code for capturing groups above about handling
1166            RRETURN(rrc);          THEN. */
1167    
1168            if (rrc == MATCH_THEN)
1169              {
1170              next = ecode + GET(ecode,1);
1171              if (md->start_match_ptr < next &&
1172                  (*ecode == OP_ALT || *next == OP_ALT))
1173                rrc = MATCH_NOMATCH;
1174              }
1175    
1176            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1177          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
1178          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
1179          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
# Line 990  for (;;) Line 1186  for (;;)
1186          md->offset_vector[md->offset_end - number] = save_offset3;          md->offset_vector[md->offset_end - number] = save_offset3;
1187          }          }
1188    
       if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;  
1189        if (allow_zero || matched_once)        if (allow_zero || matched_once)
1190          {          {
1191          ecode += 1 + LINK_SIZE;          ecode += 1 + LINK_SIZE;
# Line 1022  for (;;) Line 1217  for (;;)
1217    
1218      POSSESSIVE_NON_CAPTURE:      POSSESSIVE_NON_CAPTURE:
1219      matched_once = FALSE;      matched_once = FALSE;
1220      code_offset = ecode - md->start_code;      code_offset = (int)(ecode - md->start_code);
1221    
1222      for (;;)      for (;;)
1223        {        {
1224        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1225        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1226          eptrb, RM48);          eptrb, RM48);
1227        if (rrc == MATCH_KETRPOS)        if (rrc == MATCH_KETRPOS)
1228          {          {
# Line 1037  for (;;) Line 1232  for (;;)
1232          matched_once = TRUE;          matched_once = TRUE;
1233          continue;          continue;
1234          }          }
1235        if (rrc != MATCH_NOMATCH &&  
1236            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* See comment in the code for capturing groups above about handling
1237          RRETURN(rrc);        THEN. */
1238    
1239          if (rrc == MATCH_THEN)
1240            {
1241            next = ecode + GET(ecode,1);
1242            if (md->start_match_ptr < next &&
1243                (*ecode == OP_ALT || *next == OP_ALT))
1244              rrc = MATCH_NOMATCH;
1245            }
1246    
1247          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1248        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1249        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1250        }        }
# Line 1067  for (;;) Line 1272  for (;;)
1272    
1273      if (ecode[LINK_SIZE+1] == OP_CALLOUT)      if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1274        {        {
1275        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
1276          {          {
1277          pcre_callout_block cb;          PUBL(callout_block) cb;
1278          cb.version          = 2;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1279          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1280          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1281    #if defined COMPILE_PCRE8
1282          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
1283    #elif defined COMPILE_PCRE16
1284            cb.subject          = (PCRE_SPTR16)md->start_subject;
1285    #elif defined COMPILE_PCRE32
1286            cb.subject          = (PCRE_SPTR32)md->start_subject;
1287    #endif
1288          cb.subject_length   = (int)(md->end_subject - md->start_subject);          cb.subject_length   = (int)(md->end_subject - md->start_subject);
1289          cb.start_match      = (int)(mstart - md->start_subject);          cb.start_match      = (int)(mstart - md->start_subject);
1290          cb.current_position = (int)(eptr - md->start_subject);          cb.current_position = (int)(eptr - md->start_subject);
# Line 1082  for (;;) Line 1293  for (;;)
1293          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1294          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1295          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1296          cb.mark             = markptr;          cb.mark             = md->nomatch_mark;
1297          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1298          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1299          }          }
1300        ecode += _pcre_OP_lengths[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1301        }        }
1302    
1303      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1102  for (;;) Line 1313  for (;;)
1313          }          }
1314        else        else
1315          {          {
1316          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/          unsigned int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1317          condition =  (recno == RREF_ANY || recno == md->recursive->group_num);          condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1318    
1319          /* If the test is for recursion into a specific subpattern, and it is          /* If the test is for recursion into a specific subpattern, and it is
1320          false, but the test was set up by name, scan the table to see if the          false, but the test was set up by name, scan the table to see if the
1321          name refers to any other numbers, and test them. The condition is true          name refers to any other numbers, and test them. The condition is true
1322          if any one is set. */          if any one is set. */
1323    
1324          if (!condition && condcode == OP_NRREF && recno != RREF_ANY)          if (!condition && condcode == OP_NRREF)
1325            {            {
1326            uschar *slotA = md->name_table;            pcre_uchar *slotA = md->name_table;
1327            for (i = 0; i < md->name_count; i++)            for (i = 0; i < md->name_count; i++)
1328              {              {
1329              if (GET2(slotA, 0) == recno) break;              if (GET2(slotA, 0) == recno) break;
# Line 1125  for (;;) Line 1336  for (;;)
1336    
1337            if (i < md->name_count)            if (i < md->name_count)
1338              {              {
1339              uschar *slotB = slotA;              pcre_uchar *slotB = slotA;
1340              while (slotB > md->name_table)              while (slotB > md->name_table)
1341                {                {
1342                slotB -= md->name_entry_size;                slotB -= md->name_entry_size;
1343                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1344                  {                  {
1345                  condition = GET2(slotB, 0) == md->recursive->group_num;                  condition = GET2(slotB, 0) == md->recursive->group_num;
1346                  if (condition) break;                  if (condition) break;
# Line 1145  for (;;) Line 1356  for (;;)
1356                for (i++; i < md->name_count; i++)                for (i++; i < md->name_count; i++)
1357                  {                  {
1358                  slotB += md->name_entry_size;                  slotB += md->name_entry_size;
1359                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                  if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1360                    {                    {
1361                    condition = GET2(slotB, 0) == md->recursive->group_num;                    condition = GET2(slotB, 0) == md->recursive->group_num;
1362                    if (condition) break;                    if (condition) break;
# Line 1158  for (;;) Line 1369  for (;;)
1369    
1370          /* Chose branch according to the condition */          /* Chose branch according to the condition */
1371    
1372          ecode += condition? 3 : GET(ecode, 1);          ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1373          }          }
1374        }        }
1375    
# Line 1174  for (;;) Line 1385  for (;;)
1385    
1386        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
1387          {          {
1388          int refno = offset >> 1;          unsigned int refno = offset >> 1;
1389          uschar *slotA = md->name_table;          pcre_uchar *slotA = md->name_table;
1390    
1391          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
1392            {            {
# Line 1189  for (;;) Line 1400  for (;;)
1400    
1401          if (i < md->name_count)          if (i < md->name_count)
1402            {            {
1403            uschar *slotB = slotA;            pcre_uchar *slotB = slotA;
1404            while (slotB > md->name_table)            while (slotB > md->name_table)
1405              {              {
1406              slotB -= md->name_entry_size;              slotB -= md->name_entry_size;
1407              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1408                {                {
1409                offset = GET2(slotB, 0) << 1;                offset = GET2(slotB, 0) << 1;
1410                condition = offset < offset_top &&                condition = offset < offset_top &&
# Line 1211  for (;;) Line 1422  for (;;)
1422              for (i++; i < md->name_count; i++)              for (i++; i < md->name_count; i++)
1423                {                {
1424                slotB += md->name_entry_size;                slotB += md->name_entry_size;
1425                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1426                  {                  {
1427                  offset = GET2(slotB, 0) << 1;                  offset = GET2(slotB, 0) << 1;
1428                  condition = offset < offset_top &&                  condition = offset < offset_top &&
# Line 1226  for (;;) Line 1437  for (;;)
1437    
1438        /* Chose branch according to the condition */        /* Chose branch according to the condition */
1439    
1440        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1441        }        }
1442    
1443      else if (condcode == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
# Line 1251  for (;;) Line 1462  for (;;)
1462          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1463          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1464          }          }
1465        else if (rrc != MATCH_NOMATCH &&  
1466                (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1467          assertion; it is therefore treated as NOMATCH. */
1468    
1469          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1470          {          {
1471          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1472          }          }
# Line 1263  for (;;) Line 1477  for (;;)
1477          }          }
1478        }        }
1479    
1480      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one, can
1481      we used to use tail recursion to avoid using another stack frame, except      use tail recursion to avoid using another stack frame, except when there is
1482      when there was unlimited repeat of a possibly empty group. However, that      unlimited repeat of a possibly empty group. In the latter case, a recursive
1483      strategy no longer works because of the possibilty of (*THEN) being      call to match() is always required, unless the second alternative doesn't
1484      encountered in the branch. A recursive call to match() is always required,      exist, in which case we can just plough on. Note that, for compatibility
1485      unless the second alternative doesn't exist, in which case we can just      with Perl, the | in a conditional group is NOT treated as creating two
1486      plough on. */      alternatives. If a THEN is encountered in the branch, it propagates out to
1487        the enclosing alternative (unless nested in a deeper set of alternatives,
1488        of course). */
1489    
1490      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1491        {        {
1492        if (op == OP_SCOND) md->match_function_type = MATCH_CBEGROUP;        if (op != OP_SCOND)
1493            {
1494            ecode += 1 + LINK_SIZE;
1495            goto TAIL_RECURSE;
1496            }
1497    
1498          md->match_function_type = MATCH_CBEGROUP;
1499        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
       if (rrc == MATCH_THEN && md->start_match_ptr == ecode)  
         rrc = MATCH_NOMATCH;  
1500        RRETURN(rrc);        RRETURN(rrc);
1501        }        }
1502      else                         /* Condition false & no alternative */  
1503         /* Condition false & no alternative; continue after the group. */
1504    
1505        else
1506        {        {
1507        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1508        }        }
# Line 1306  for (;;) Line 1529  for (;;)
1529        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1530        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1531        }        }
1532      ecode += 3;      ecode += 1 + IMM2_SIZE;
1533      break;      break;
1534    
1535    
# Line 1326  for (;;) Line 1549  for (;;)
1549           (md->notempty ||           (md->notempty ||
1550             (md->notempty_atstart &&             (md->notempty_atstart &&
1551               mstart == md->start_subject + md->start_offset)))               mstart == md->start_subject + md->start_offset)))
1552        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1553    
1554      /* Otherwise, we have a match. */      /* Otherwise, we have a match. */
1555    
# Line 1335  for (;;) Line 1558  for (;;)
1558      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1559    
1560      /* For some reason, the macros don't work properly if an expression is      /* For some reason, the macros don't work properly if an expression is
1561      given as the argument to MRRETURN when the heap is in use. */      given as the argument to RRETURN when the heap is in use. */
1562    
1563      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1564      MRRETURN(rrc);      RRETURN(rrc);
1565    
1566      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
1567      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
# Line 1353  for (;;) Line 1576  for (;;)
1576    
1577      case OP_ASSERT:      case OP_ASSERT:
1578      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1579        save_mark = md->mark;
1580      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1581        {        {
1582        condassert = TRUE;        condassert = TRUE;
# Line 1366  for (;;) Line 1590  for (;;)
1590        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1591          {          {
1592          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
         markptr = md->mark;  
1593          break;          break;
1594          }          }
1595        if (rrc != MATCH_NOMATCH &&        md->mark = save_mark;
1596            (rrc != MATCH_THEN || md->start_match_ptr != ecode))  
1597          RRETURN(rrc);        /* A COMMIT failure must fail the entire assertion, without trying any
1598          subsequent branches. */
1599    
1600          if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
1601    
1602          /* PCRE does not allow THEN to escape beyond an assertion; it
1603          is treated as NOMATCH. */
1604    
1605          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1606        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1607        }        }
1608      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1609    
1610      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1611    
1612      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1613    
# Line 1396  for (;;) Line 1627  for (;;)
1627    
1628      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1629      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1630        save_mark = md->mark;
1631      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1632        {        {
1633        condassert = TRUE;        condassert = TRUE;
# Line 1406  for (;;) Line 1638  for (;;)
1638      do      do
1639        {        {
1640        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1641        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);        md->mark = save_mark;
1642          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1643        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1644          {          {
1645          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1646          break;          break;
1647          }          }
1648        if (rrc != MATCH_NOMATCH &&  
1649            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1650          RRETURN(rrc);        as NOMATCH. */
1651    
1652          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1653        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1654        }        }
1655      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1430  for (;;) Line 1665  for (;;)
1665      back a number of characters, not bytes. */      back a number of characters, not bytes. */
1666    
1667      case OP_REVERSE:      case OP_REVERSE:
1668  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1669      if (utf8)      if (utf)
1670        {        {
1671        i = GET(ecode, 1);        i = GET(ecode, 1);
1672        while (i-- > 0)        while (i-- > 0)
1673          {          {
1674          eptr--;          eptr--;
1675          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1676          BACKCHAR(eptr);          BACKCHAR(eptr);
1677          }          }
1678        }        }
# Line 1448  for (;;) Line 1683  for (;;)
1683    
1684        {        {
1685        eptr -= GET(ecode, 1);        eptr -= GET(ecode, 1);
1686        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1687        }        }
1688    
1689      /* Save the earliest consulted character, then skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
# Line 1462  for (;;) Line 1697  for (;;)
1697      function is able to force a failure. */      function is able to force a failure. */
1698    
1699      case OP_CALLOUT:      case OP_CALLOUT:
1700      if (pcre_callout != NULL)      if (PUBL(callout) != NULL)
1701        {        {
1702        pcre_callout_block cb;        PUBL(callout_block) cb;
1703        cb.version          = 2;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1704        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1705        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1706    #if defined COMPILE_PCRE8
1707        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1708    #elif defined COMPILE_PCRE16
1709          cb.subject          = (PCRE_SPTR16)md->start_subject;
1710    #elif defined COMPILE_PCRE32
1711          cb.subject          = (PCRE_SPTR32)md->start_subject;
1712    #endif
1713        cb.subject_length   = (int)(md->end_subject - md->start_subject);        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1714        cb.start_match      = (int)(mstart - md->start_subject);        cb.start_match      = (int)(mstart - md->start_subject);
1715        cb.current_position = (int)(eptr - md->start_subject);        cb.current_position = (int)(eptr - md->start_subject);
# Line 1477  for (;;) Line 1718  for (;;)
1718        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1719        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1720        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1721        cb.mark             = markptr;        cb.mark             = md->nomatch_mark;
1722        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1723        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1724        }        }
1725      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 1504  for (;;) Line 1745  for (;;)
1745      case OP_RECURSE:      case OP_RECURSE:
1746        {        {
1747        recursion_info *ri;        recursion_info *ri;
1748        int recno;        unsigned int recno;
1749    
1750        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1751        recno = (callpat == md->start_code)? 0 :        recno = (callpat == md->start_code)? 0 :
# Line 1537  for (;;) Line 1778  for (;;)
1778        else        else
1779          {          {
1780          new_recursive.offset_save =          new_recursive.offset_save =
1781            (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));            (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1782          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1783          }          }
1784        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
# Line 1552  for (;;) Line 1793  for (;;)
1793        do        do
1794          {          {
1795          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1796          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1797            md, eptrb, RM6);            md, eptrb, RM6);
1798          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1799              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1800            md->recursive = new_recursive.prevrec;
1801          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1802            {            {
1803            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
           md->recursive = new_recursive.prevrec;  
1804            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1805              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1806    
1807            /* Set where we got to in the subject, and reset the start in case            /* Set where we got to in the subject, and reset the start in case
1808            it was changed by \K. This *is* propagated back out of a recursion,            it was changed by \K. This *is* propagated back out of a recursion,
# Line 1571  for (;;) Line 1812  for (;;)
1812            mstart = md->start_match_ptr;            mstart = md->start_match_ptr;
1813            goto RECURSION_MATCHED;        /* Exit loop; end processing */            goto RECURSION_MATCHED;        /* Exit loop; end processing */
1814            }            }
1815          else if (rrc != MATCH_NOMATCH &&  
1816                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
1817            is treated as NOMATCH. */
1818    
1819            else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
1820                     rrc != MATCH_COMMIT)
1821            {            {
1822            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1823            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1824              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1825            RRETURN(rrc);            RRETURN(rrc);
1826            }            }
1827    
# Line 1588  for (;;) Line 1833  for (;;)
1833        DPRINTF(("Recursion didn't match\n"));        DPRINTF(("Recursion didn't match\n"));
1834        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1835        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1836          (pcre_free)(new_recursive.offset_save);          (PUBL(free))(new_recursive.offset_save);
1837        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1838        }        }
1839    
1840      RECURSION_MATCHED:      RECURSION_MATCHED:
# Line 1658  for (;;) Line 1903  for (;;)
1903        }        }
1904      else saved_eptr = NULL;      else saved_eptr = NULL;
1905    
1906      /* If we are at the end of an assertion group, stop matching and return      /* If we are at the end of an assertion group or a non-capturing atomic
1907      MATCH_MATCH, but record the current high water mark for use by positive      group, stop matching and return MATCH_MATCH, but record the current high
1908      assertions. We also need to record the match start in case it was changed      water mark for use by positive assertions. We also need to record the match
1909      by \K. */      start in case it was changed by \K. */
1910    
1911      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1912          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT)           *prev == OP_ONCE_NC)
1913        {        {
1914        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE_NC */
1915        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1916        md->start_match_ptr = mstart;        md->start_match_ptr = mstart;
1917        MRRETURN(MATCH_MATCH);         /* Sets md->mark */        RRETURN(MATCH_MATCH);         /* Sets md->mark */
1918        }        }
1919    
1920      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
# Line 1734  for (;;) Line 1979  for (;;)
1979      /* For an ordinary non-repeating ket, just continue at this level. This      /* For an ordinary non-repeating ket, just continue at this level. This
1980      also happens for a repeating ket if no characters were matched in the      also happens for a repeating ket if no characters were matched in the
1981      group. This is the forcible breaking of infinite loops as implemented in      group. This is the forcible breaking of infinite loops as implemented in
1982      Perl 5.005. For a non-repeating atomic group, establish a backup point by      Perl 5.005. For a non-repeating atomic group that includes captures,
1983      processing the rest of the pattern at a lower level. If this results in a      establish a backup point by processing the rest of the pattern at a lower
1984      NOMATCH return, pass MATCH_ONCE back to the original OP_ONCE level, thereby      level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1985      bypassing intermediate backup points, but resetting any captures that      original OP_ONCE level, thereby bypassing intermediate backup points, but
1986      happened along the way. */      resetting any captures that happened along the way. */
1987    
1988      if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1989        {        {
# Line 1783  for (;;) Line 2028  for (;;)
2028          }          }
2029        if (*prev >= OP_SBRA)    /* Could match an empty string */        if (*prev >= OP_SBRA)    /* Could match an empty string */
2030          {          {
         md->match_function_type = MATCH_CBEGROUP;  
2031          RMATCH(eptr, prev, offset_top, md, eptrb, RM50);          RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2032          RRETURN(rrc);          RRETURN(rrc);
2033          }          }
# Line 1792  for (;;) Line 2036  for (;;)
2036        }        }
2037      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
2038        {        {
       if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;  
2039        RMATCH(eptr, prev, offset_top, md, eptrb, RM13);        RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2040        if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;        if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2041        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 1811  for (;;) Line 2054  for (;;)
2054      /* Not multiline mode: start of subject assertion, unless notbol. */      /* Not multiline mode: start of subject assertion, unless notbol. */
2055    
2056      case OP_CIRC:      case OP_CIRC:
2057      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2058    
2059      /* Start of subject assertion */      /* Start of subject assertion */
2060    
2061      case OP_SOD:      case OP_SOD:
2062      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2063      ecode++;      ecode++;
2064      break;      break;
2065    
2066      /* Multiline mode: start of subject unless notbol, or after any newline. */      /* Multiline mode: start of subject unless notbol, or after any newline. */
2067    
2068      case OP_CIRCM:      case OP_CIRCM:
2069      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2070      if (eptr != md->start_subject &&      if (eptr != md->start_subject &&
2071          (eptr == md->end_subject || !WAS_NEWLINE(eptr)))          (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2072        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2073      ecode++;      ecode++;
2074      break;      break;
2075    
2076      /* Start of match assertion */      /* Start of match assertion */
2077    
2078      case OP_SOM:      case OP_SOM:
2079      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2080      ecode++;      ecode++;
2081      break;      break;
2082    
# Line 1849  for (;;) Line 2092  for (;;)
2092    
2093      case OP_DOLLM:      case OP_DOLLM:
2094      if (eptr < md->end_subject)      if (eptr < md->end_subject)
2095        { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }        {
2096          if (!IS_NEWLINE(eptr))
2097            {
2098            if (md->partial != 0 &&
2099                eptr + 1 >= md->end_subject &&
2100                NLBLOCK->nltype == NLTYPE_FIXED &&
2101                NLBLOCK->nllen == 2 &&
2102                RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
2103              {
2104              md->hitend = TRUE;
2105              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2106              }
2107            RRETURN(MATCH_NOMATCH);
2108            }
2109          }
2110      else      else
2111        {        {
2112        if (md->noteol) MRRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
2113        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2114        }        }
2115      ecode++;      ecode++;
# Line 1862  for (;;) Line 2119  for (;;)
2119      subject unless noteol is set. */      subject unless noteol is set. */
2120    
2121      case OP_DOLL:      case OP_DOLL:
2122      if (md->noteol) MRRETURN(MATCH_NOMATCH);      if (md->noteol) RRETURN(MATCH_NOMATCH);
2123      if (!md->endonly) goto ASSERT_NL_OR_EOS;      if (!md->endonly) goto ASSERT_NL_OR_EOS;
2124    
2125      /* ... else fall through for endonly */      /* ... else fall through for endonly */
# Line 1870  for (;;) Line 2127  for (;;)
2127      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
2128    
2129      case OP_EOD:      case OP_EOD:
2130      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2131      SCHECK_PARTIAL();      SCHECK_PARTIAL();
2132      ecode++;      ecode++;
2133      break;      break;
# Line 1881  for (;;) Line 2138  for (;;)
2138      ASSERT_NL_OR_EOS:      ASSERT_NL_OR_EOS:
2139      if (eptr < md->end_subject &&      if (eptr < md->end_subject &&
2140          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2141        MRRETURN(MATCH_NOMATCH);        {
2142          if (md->partial != 0 &&
2143              eptr + 1 >= md->end_subject &&
2144              NLBLOCK->nltype == NLTYPE_FIXED &&
2145              NLBLOCK->nllen == 2 &&
2146              RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
2147            {
2148            md->hitend = TRUE;
2149            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2150            }
2151          RRETURN(MATCH_NOMATCH);
2152          }
2153    
2154      /* Either at end of string or \n before end. */      /* Either at end of string or \n before end. */
2155    
# Line 1900  for (;;) Line 2168  for (;;)
2168        be "non-word" characters. Remember the earliest consulted character for        be "non-word" characters. Remember the earliest consulted character for
2169        partial matching. */        partial matching. */
2170    
2171  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2172        if (utf8)        if (utf)
2173          {          {
2174          /* Get status of previous character */          /* Get status of previous character */
2175    
2176          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
2177            {            {
2178            USPTR lastptr = eptr - 1;            PCRE_PUCHAR lastptr = eptr - 1;
2179            while((*lastptr & 0xc0) == 0x80) lastptr--;            BACKCHAR(lastptr);
2180            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2181            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
2182  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 1973  for (;;) Line 2241  for (;;)
2241              }              }
2242            else            else
2243  #endif  #endif
2244            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = MAX_255(eptr[-1])
2245                && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2246            }            }
2247    
2248          /* Get status of next character */          /* Get status of next character */
# Line 1996  for (;;) Line 2265  for (;;)
2265            }            }
2266          else          else
2267  #endif  #endif
2268          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          cur_is_word = MAX_255(*eptr)
2269              && ((md->ctypes[*eptr] & ctype_word) != 0);
2270          }          }
2271    
2272        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
2273    
2274        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
2275             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2276          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2277        }        }
2278      break;      break;
2279    
2280      /* Match a single character type; inline for speed */      /* Match any single character type except newline; have to take care with
2281        CRLF newlines and partial matching. */
2282    
2283      case OP_ANY:      case OP_ANY:
2284      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2285        if (md->partial != 0 &&
2286            eptr + 1 >= md->end_subject &&
2287            NLBLOCK->nltype == NLTYPE_FIXED &&
2288            NLBLOCK->nllen == 2 &&
2289            RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
2290          {
2291          md->hitend = TRUE;
2292          if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2293          }
2294    
2295      /* Fall through */      /* Fall through */
2296    
2297        /* Match any single character whatsoever. */
2298    
2299      case OP_ALLANY:      case OP_ALLANY:
2300      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2301        {                            /* not be updated before SCHECK_PARTIAL. */        {                            /* not be updated before SCHECK_PARTIAL. */
2302        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2303        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2304        }        }
2305      eptr++;      eptr++;
2306      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  #ifdef SUPPORT_UTF
2307        if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2308    #endif
2309      ecode++;      ecode++;
2310      break;      break;
2311    
# Line 2031  for (;;) Line 2316  for (;;)
2316      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2317        {                            /* not be updated before SCHECK_PARTIAL. */        {                            /* not be updated before SCHECK_PARTIAL. */
2318        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2319        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2320        }        }
2321      eptr++;      eptr++;
2322      ecode++;      ecode++;
# Line 2041  for (;;) Line 2326  for (;;)
2326      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2327        {        {
2328        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2329        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2330        }        }
2331      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2332      if (      if (
2333  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2334         c < 256 &&         c < 256 &&
2335  #endif  #endif
2336         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
2337         )         )
2338        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2339      ecode++;      ecode++;
2340      break;      break;
2341    
# Line 2058  for (;;) Line 2343  for (;;)
2343      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2344        {        {
2345        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2346        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2347        }        }
2348      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2349      if (      if (
2350  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2351         c >= 256 ||         c > 255 ||
2352  #endif  #endif
2353         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
2354         )         )
2355        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2356      ecode++;      ecode++;
2357      break;      break;
2358    
# Line 2075  for (;;) Line 2360  for (;;)
2360      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2361        {        {
2362        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2363        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2364        }        }
2365      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2366      if (      if (
2367  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2368         c < 256 &&         c < 256 &&
2369  #endif  #endif
2370         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
2371         )         )
2372        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2373      ecode++;      ecode++;
2374      break;      break;
2375    
# Line 2092  for (;;) Line 2377  for (;;)
2377      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2378        {        {
2379        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2380        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2381        }        }
2382      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2383      if (      if (
2384  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2385         c >= 256 ||         c > 255 ||
2386  #endif  #endif
2387         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
2388         )         )
2389        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2390      ecode++;      ecode++;
2391      break;      break;
2392    
# Line 2109  for (;;) Line 2394  for (;;)
2394      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2395        {        {
2396        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2397        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2398        }        }
2399      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2400      if (      if (
2401  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2402         c < 256 &&         c < 256 &&
2403  #endif  #endif
2404         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
2405         )         )
2406        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2407      ecode++;      ecode++;
2408      break;      break;
2409    
# Line 2126  for (;;) Line 2411  for (;;)
2411      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2412        {        {
2413        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2414        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2415        }        }
2416      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2417      if (      if (
2418  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2419         c >= 256 ||         c > 255 ||
2420  #endif  #endif
2421         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
2422         )         )
2423        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2424      ecode++;      ecode++;
2425      break;      break;
2426    
# Line 2143  for (;;) Line 2428  for (;;)
2428      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2429        {        {
2430        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2431        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2432        }        }
2433      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2434      switch(c)      switch(c)
2435        {        {
2436        default: MRRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2437    
2438        case 0x000d:        case CHAR_CR:
2439        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr >= md->end_subject)
2440            {
2441            SCHECK_PARTIAL();
2442            }
2443          else if (RAWUCHARTEST(eptr) == CHAR_LF) eptr++;
2444        break;        break;
2445    
2446        case 0x000a:        case CHAR_LF:
2447        break;        break;
2448    
2449        case 0x000b:        case CHAR_VT:
2450        case 0x000c:        case CHAR_FF:
2451        case 0x0085:        case CHAR_NEL:
2452    #ifndef EBCDIC
2453        case 0x2028:        case 0x2028:
2454        case 0x2029:        case 0x2029:
2455        if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);  #endif  /* Not EBCDIC */
2456          if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2457        break;        break;
2458        }        }
2459      ecode++;      ecode++;
# Line 2172  for (;;) Line 2463  for (;;)
2463      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2464        {        {
2465        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2466        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2467        }        }
2468      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2469      switch(c)      switch(c)
2470        {        {
2471          HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2472        default: break;        default: break;
       case 0x09:      /* HT */  
       case 0x20:      /* SPACE */  
       case 0xa0:      /* NBSP */  
       case 0x1680:    /* OGHAM SPACE MARK */  
       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
       case 0x2000:    /* EN QUAD */  
       case 0x2001:    /* EM QUAD */  
       case 0x2002:    /* EN SPACE */  
       case 0x2003:    /* EM SPACE */  
       case 0x2004:    /* THREE-PER-EM SPACE */  
       case 0x2005:    /* FOUR-PER-EM SPACE */  
       case 0x2006:    /* SIX-PER-EM SPACE */  
       case 0x2007:    /* FIGURE SPACE */  
       case 0x2008:    /* PUNCTUATION SPACE */  
       case 0x2009:    /* THIN SPACE */  
       case 0x200A:    /* HAIR SPACE */  
       case 0x202f:    /* NARROW NO-BREAK SPACE */  
       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
       case 0x3000:    /* IDEOGRAPHIC SPACE */  
       MRRETURN(MATCH_NOMATCH);  
2473        }        }
2474      ecode++;      ecode++;
2475      break;      break;
# Line 2206  for (;;) Line 2478  for (;;)
2478      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2479        {        {
2480        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2481        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2482        }        }
2483      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2484      switch(c)      switch(c)
2485        {        {
2486        default: MRRETURN(MATCH_NOMATCH);        HSPACE_CASES: break;  /* Byte and multibyte cases */
2487        case 0x09:      /* HT */        default: RRETURN(MATCH_NOMATCH);
       case 0x20:      /* SPACE */  
       case 0xa0:      /* NBSP */  
       case 0x1680:    /* OGHAM SPACE MARK */  
       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
       case 0x2000:    /* EN QUAD */  
       case 0x2001:    /* EM QUAD */  
       case 0x2002:    /* EN SPACE */  
       case 0x2003:    /* EM SPACE */  
       case 0x2004:    /* THREE-PER-EM SPACE */  
       case 0x2005:    /* FOUR-PER-EM SPACE */  
       case 0x2006:    /* SIX-PER-EM SPACE */  
       case 0x2007:    /* FIGURE SPACE */  
       case 0x2008:    /* PUNCTUATION SPACE */  
       case 0x2009:    /* THIN SPACE */  
       case 0x200A:    /* HAIR SPACE */  
       case 0x202f:    /* NARROW NO-BREAK SPACE */  
       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
       case 0x3000:    /* IDEOGRAPHIC SPACE */  
       break;  
2488        }        }
2489      ecode++;      ecode++;
2490      break;      break;
# Line 2240  for (;;) Line 2493  for (;;)
2493      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2494        {        {
2495        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2496        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2497        }        }
2498      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2499      switch(c)      switch(c)
2500        {        {
2501          VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2502        default: break;        default: break;
       case 0x0a:      /* LF */  
       case 0x0b:      /* VT */  
       case 0x0c:      /* FF */  
       case 0x0d:      /* CR */  
       case 0x85:      /* NEL */  
       case 0x2028:    /* LINE SEPARATOR */  
       case 0x2029:    /* PARAGRAPH SEPARATOR */  
       MRRETURN(MATCH_NOMATCH);  
2503        }        }
2504      ecode++;      ecode++;
2505      break;      break;
# Line 2262  for (;;) Line 2508  for (;;)
2508      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2509        {        {
2510        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2511        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2512        }        }
2513      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2514      switch(c)      switch(c)
2515        {        {
2516        default: MRRETURN(MATCH_NOMATCH);        VSPACE_CASES: break;
2517        case 0x0a:      /* LF */        default: RRETURN(MATCH_NOMATCH);
       case 0x0b:      /* VT */  
       case 0x0c:      /* FF */  
       case 0x0d:      /* CR */  
       case 0x85:      /* NEL */  
       case 0x2028:    /* LINE SEPARATOR */  
       case 0x2029:    /* PARAGRAPH SEPARATOR */  
       break;  
2518        }        }
2519      ecode++;      ecode++;
2520      break;      break;
# Line 2289  for (;;) Line 2528  for (;;)
2528      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2529        {        {
2530        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2531        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2532        }        }
2533      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2534        {        {
2535          const pcre_uint32 *cp;
2536        const ucd_record *prop = GET_UCD(c);        const ucd_record *prop = GET_UCD(c);
2537    
2538        switch(ecode[1])        switch(ecode[1])
2539          {          {
2540          case PT_ANY:          case PT_ANY:
2541          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2542          break;          break;
2543    
2544          case PT_LAMP:          case PT_LAMP:
2545          if ((prop->chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2546               prop->chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2547               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2548            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2549          break;          break;
2550    
2551          case PT_GC:          case PT_GC:
2552          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2553            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2554          break;          break;
2555    
2556          case PT_PC:          case PT_PC:
2557          if ((ecode[2] != prop->chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2558            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2559          break;          break;
2560    
2561          case PT_SC:          case PT_SC:
2562          if ((ecode[2] != prop->script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2563            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2564          break;          break;
2565    
2566          /* These are specials */          /* These are specials */
2567    
2568          case PT_ALNUM:          case PT_ALNUM:
2569          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2570               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2571            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2572          break;          break;
2573    
2574          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2575          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2576               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2577                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2578            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2579          break;          break;
2580    
2581          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2582          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2583               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2584               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2585                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2586            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2587          break;          break;
2588    
2589          case PT_WORD:          case PT_WORD:
2590          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2591               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2592               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2593            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2594            break;
2595    
2596            case PT_CLIST:
2597            cp = PRIV(ucd_caseless_sets) + prop->caseset;
2598            for (;;)
2599              {
2600              if (c < *cp)
2601                { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2602              if (c == *cp++)
2603                { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2604              }
2605          break;          break;
2606    
2607          /* This should never occur */          /* This should never occur */
# Line 2370  for (;;) Line 2621  for (;;)
2621      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2622        {        {
2623        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2624        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2625        }        }
2626      GETCHARINCTEST(c, eptr);      else
     if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH);  
     while (eptr < md->end_subject)  
2627        {        {
2628        int len = 1;        int lgb, rgb;
2629        if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }        GETCHARINCTEST(c, eptr);
2630        if (UCD_CATEGORY(c) != ucp_M) break;        lgb = UCD_GRAPHBREAK(c);
2631        eptr += len;        while (eptr < md->end_subject)
2632            {
2633            int len = 1;
2634            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2635            rgb = UCD_GRAPHBREAK(c);
2636            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2637            lgb = rgb;
2638            eptr += len;
2639            }
2640        }        }
2641        CHECK_PARTIAL();
2642      ecode++;      ecode++;
2643      break;      break;
2644  #endif  #endif  /* SUPPORT_UCP */
2645    
2646    
2647      /* Match a back reference, possibly repeatedly. Look past the end of the      /* Match a back reference, possibly repeatedly. Look past the end of the
# Line 2398  for (;;) Line 2656  for (;;)
2656      case OP_REFI:      case OP_REFI:
2657      caseless = op == OP_REFI;      caseless = op == OP_REFI;
2658      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2659      ecode += 3;      ecode += 1 + IMM2_SIZE;
2660    
2661      /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2662    
# Line 2438  for (;;) Line 2696  for (;;)
2696        case OP_CRMINRANGE:        case OP_CRMINRANGE:
2697        minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2698        min = GET2(ecode, 1);        min = GET2(ecode, 1);
2699        max = GET2(ecode, 3);        max = GET2(ecode, 1 + IMM2_SIZE);
2700        if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2701        ecode += 5;        ecode += 1 + 2 * IMM2_SIZE;
2702        break;        break;
2703    
2704        default:               /* No repeat follows */        default:               /* No repeat follows */
2705        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2706          {          {
2707            if (length == -2) eptr = md->end_subject;   /* Partial match */
2708          CHECK_PARTIAL();          CHECK_PARTIAL();
2709          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2710          }          }
2711        eptr += length;        eptr += length;
2712        continue;              /* With the main loop */        continue;              /* With the main loop */
2713        }        }
2714    
2715      /* Handle repeated back references. If the length of the reference is      /* Handle repeated back references. If the length of the reference is
2716      zero, just continue with the main loop. */      zero, just continue with the main loop. If the length is negative, it
2717        means the reference is unset in non-Java-compatible mode. If the minimum is
2718        zero, we can continue at the same level without recursion. For any other
2719        minimum, carrying on will result in NOMATCH. */
2720    
2721      if (length == 0) continue;      if (length == 0) continue;
2722        if (length < 0 && min == 0) continue;
2723    
2724      /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2725      the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
# Line 2467  for (;;) Line 2730  for (;;)
2730        int slength;        int slength;
2731        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2732          {          {
2733            if (slength == -2) eptr = md->end_subject;   /* Partial match */
2734          CHECK_PARTIAL();          CHECK_PARTIAL();
2735          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2736          }          }
2737        eptr += slength;        eptr += slength;
2738        }        }
# Line 2487  for (;;) Line 2751  for (;;)
2751          int slength;          int slength;
2752          RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2753          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2754          if (fi >= max) MRRETURN(MATCH_NOMATCH);          if (fi >= max) RRETURN(MATCH_NOMATCH);
2755          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2756            {            {
2757              if (slength == -2) eptr = md->end_subject;   /* Partial match */
2758            CHECK_PARTIAL();            CHECK_PARTIAL();
2759            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2760            }            }
2761          eptr += slength;          eptr += slength;
2762          }          }
# Line 2508  for (;;) Line 2773  for (;;)
2773          int slength;          int slength;
2774          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2775            {            {
2776            CHECK_PARTIAL();            /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2777              the soft partial matching case. */
2778    
2779              if (slength == -2 && md->partial != 0 &&
2780                  md->end_subject > md->start_used_ptr)
2781                {
2782                md->hitend = TRUE;
2783                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2784                }
2785            break;            break;
2786            }            }
2787          eptr += slength;          eptr += slength;
2788          }          }
2789    
2790        while (eptr >= pp)        while (eptr >= pp)
2791          {          {
2792          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2793          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2794          eptr -= length;          eptr -= length;
2795          }          }
2796        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2797        }        }
2798      /* Control never gets here */      /* Control never gets here */
2799    
# Line 2537  for (;;) Line 2811  for (;;)
2811      case OP_NCLASS:      case OP_NCLASS:
2812      case OP_CLASS:      case OP_CLASS:
2813        {        {
2814          /* The data variable is saved across frames, so the byte map needs to
2815          be stored there. */
2816    #define BYTE_MAP ((pcre_uint8 *)data)
2817        data = ecode + 1;                /* Save for matching */        data = ecode + 1;                /* Save for matching */
2818        ecode += 33;                     /* Advance past the item */        ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2819    
2820        switch (*ecode)        switch (*ecode)
2821          {          {
# Line 2559  for (;;) Line 2836  for (;;)
2836          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2837          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2838          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2839          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2840          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2841          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2842          break;          break;
2843    
2844          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2571  for (;;) Line 2848  for (;;)
2848    
2849        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
2850    
2851  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2852        /* UTF-8 mode */        if (utf)
       if (utf8)  
2853          {          {
2854          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2855            {            {
2856            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2857              {              {
2858              SCHECK_PARTIAL();              SCHECK_PARTIAL();
2859              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2860              }              }
2861            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2862            if (c > 255)            if (c > 255)
2863              {              {
2864              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2865              }              }
2866            else            else
2867              {              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  
             }  
2868            }            }
2869          }          }
2870        else        else
2871  #endif  #endif
2872        /* Not UTF-8 mode */        /* Not UTF mode */
2873          {          {
2874          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2875            {            {
2876            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2877              {              {
2878              SCHECK_PARTIAL();              SCHECK_PARTIAL();
2879              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2880              }              }
2881            c = *eptr++;            c = *eptr++;
2882            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2883              if (c > 255)
2884                {
2885                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2886                }
2887              else
2888    #endif
2889                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2890            }            }
2891          }          }
2892    
# Line 2619  for (;;) Line 2900  for (;;)
2900    
2901        if (minimize)        if (minimize)
2902          {          {
2903  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2904          /* UTF-8 mode */          if (utf)
         if (utf8)  
2905            {            {
2906            for (fi = min;; fi++)            for (fi = min;; fi++)
2907              {              {
2908              RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2909              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2910              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2911              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2912                {                {
2913                SCHECK_PARTIAL();                SCHECK_PARTIAL();
2914                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2915                }                }
2916              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2917              if (c > 255)              if (c > 255)
2918                {                {
2919                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2920                }                }
2921              else              else
2922                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
               if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  
               }  
2923              }              }
2924            }            }
2925          else          else
2926  #endif  #endif
2927          /* Not UTF-8 mode */          /* Not UTF mode */
2928            {            {
2929            for (fi = min;; fi++)            for (fi = min;; fi++)
2930              {              {
2931              RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2932              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2933              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2934              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2935                {                {
2936                SCHECK_PARTIAL();                SCHECK_PARTIAL();
2937                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2938                }                }
2939              c = *eptr++;              c = *eptr++;
2940              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2941                if (c > 255)
2942                  {
2943                  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2944                  }
2945                else
2946    #endif
2947                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2948              }              }
2949            }            }
2950          /* Control never gets here */          /* Control never gets here */
# Line 2671  for (;;) Line 2956  for (;;)
2956          {          {
2957          pp = eptr;          pp = eptr;
2958    
2959  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2960          /* UTF-8 mode */          if (utf)
         if (utf8)  
2961            {            {
2962            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2963              {              {
# Line 2689  for (;;) Line 2973  for (;;)
2973                if (op == OP_CLASS) break;                if (op == OP_CLASS) break;
2974                }                }
2975              else              else
2976                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
               if ((data[c/8] & (1 << (c&7))) == 0) break;  
               }  
2977              eptr += len;              eptr += len;
2978              }              }
2979            for (;;)            for (;;)
# Line 2704  for (;;) Line 2986  for (;;)
2986            }            }
2987          else          else
2988  #endif  #endif
2989            /* Not UTF-8 mode */            /* Not UTF mode */
2990            {            {
2991            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2992              {              {
# Line 2714  for (;;) Line 2996  for (;;)
2996                break;                break;
2997                }                }
2998              c = *eptr;              c = *eptr;
2999              if ((data[c/8] & (1 << (c&7))) == 0) break;  #ifndef COMPILE_PCRE8
3000                if (c > 255)
3001                  {
3002                  if (op == OP_CLASS) break;
3003                  }
3004                else
3005    #endif
3006                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3007              eptr++;              eptr++;
3008              }              }
3009            while (eptr >= pp)            while (eptr >= pp)
# Line 2725  for (;;) Line 3014  for (;;)
3014              }              }
3015            }            }
3016    
3017          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3018          }          }
3019    #undef BYTE_MAP
3020        }        }
3021      /* Control never gets here */      /* Control never gets here */
3022    
# Line 2735  for (;;) Line 3025  for (;;)
3025      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
3026      mode, because Unicode properties are supported in non-UTF-8 mode. */      mode, because Unicode properties are supported in non-UTF-8 mode. */
3027    
3028  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3029      case OP_XCLASS:      case OP_XCLASS:
3030        {        {
3031        data = ecode + 1 + LINK_SIZE;                /* Save for matching */        data = ecode + 1 + LINK_SIZE;                /* Save for matching */
# Line 2760  for (;;) Line 3050  for (;;)
3050          case OP_CRMINRANGE:          case OP_CRMINRANGE:
3051          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
3052          min = GET2(ecode, 1);          min = GET2(ecode, 1);
3053          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
3054          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
3055          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
3056          break;          break;
3057    
3058          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2777  for (;;) Line 3067  for (;;)
3067          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3068            {            {
3069            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3070            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3071            }            }
3072          GETCHARINCTEST(c, eptr);          GETCHARINCTEST(c, eptr);
3073          if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);          if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3074          }          }
3075    
3076        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 2797  for (;;) Line 3087  for (;;)
3087            {            {
3088            RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3089            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3090            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3091            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3092              {              {
3093              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3094              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3095              }              }
3096            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3097            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);            if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3098            }            }
3099          /* Control never gets here */          /* Control never gets here */
3100          }          }
# Line 2822  for (;;) Line 3112  for (;;)
3112              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3113              break;              break;
3114              }              }
3115    #ifdef SUPPORT_UTF
3116            GETCHARLENTEST(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
3117            if (!_pcre_xclass(c, data)) break;  #else
3118              c = *eptr;
3119    #endif
3120              if (!PRIV(xclass)(c, data, utf)) break;
3121            eptr += len;            eptr += len;
3122            }            }
3123          for(;;)          for(;;)
# Line 2831  for (;;) Line 3125  for (;;)
3125            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3126            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3127            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3128            if (utf8) BACKCHAR(eptr);  #ifdef SUPPORT_UTF
3129              if (utf) BACKCHAR(eptr);
3130    #endif
3131            }            }
3132          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3133          }          }
3134    
3135        /* Control never gets here */        /* Control never gets here */
# Line 2843  for (;;) Line 3139  for (;;)
3139      /* Match a single character, casefully */      /* Match a single character, casefully */
3140    
3141      case OP_CHAR:      case OP_CHAR:
3142  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3143      if (utf8)      if (utf)
3144        {        {
3145        length = 1;        length = 1;
3146        ecode++;        ecode++;
# Line 2852  for (;;) Line 3148  for (;;)
3148        if (length > md->end_subject - eptr)        if (length > md->end_subject - eptr)
3149          {          {
3150          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3151          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3152          }          }
3153        while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);        while (length-- > 0) if (*ecode++ != RAWUCHARINC(eptr)) RRETURN(MATCH_NOMATCH);
3154        }        }
3155      else      else
3156  #endif  #endif
3157        /* Not UTF mode */
     /* Non-UTF-8 mode */  
3158        {        {
3159        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
3160          {          {
3161          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3162          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3163          }          }
3164        if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3165        ecode += 2;        ecode += 2;
3166        }        }
3167      break;      break;
3168    
3169      /* Match a single character, caselessly */      /* Match a single character, caselessly. If we are at the end of the
3170        subject, give up immediately. */
3171    
3172      case OP_CHARI:      case OP_CHARI:
3173  #ifdef SUPPORT_UTF8      if (eptr >= md->end_subject)
3174      if (utf8)        {
3175          SCHECK_PARTIAL();
3176          RRETURN(MATCH_NOMATCH);
3177          }
3178    
3179    #ifdef SUPPORT_UTF
3180        if (utf)
3181        {        {
3182        length = 1;        length = 1;
3183        ecode++;        ecode++;
3184        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
3185    
       if (length > md->end_subject - eptr)  
         {  
         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */  
         MRRETURN(MATCH_NOMATCH);  
         }  
   
3186        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
3187        can use the fast lookup table. */        we know that its other case must also be one byte long, so we can use the
3188          fast lookup table. We know that there is at least one byte left in the
3189          subject. */
3190    
3191        if (fc < 128)        if (fc < 128)
3192          {          {
3193          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);          pcre_uchar cc = RAWUCHAR(eptr);
3194            if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3195            ecode++;
3196            eptr++;
3197          }          }
3198    
3199        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character. Note that we cannot
3200          use the value of "length" to check for sufficient bytes left, because the
3201          other case of the character may have more or fewer bytes.  */
3202    
3203        else        else
3204          {          {
3205          unsigned int dc;          pcre_uint32 dc;
3206          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
3207          ecode += length;          ecode += length;
3208    
# Line 2911  for (;;) Line 3214  for (;;)
3214  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3215            if (dc != UCD_OTHERCASE(fc))            if (dc != UCD_OTHERCASE(fc))
3216  #endif  #endif
3217              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3218            }            }
3219          }          }
3220        }        }
3221      else      else
3222  #endif   /* SUPPORT_UTF8 */  #endif   /* SUPPORT_UTF */
3223    
3224      /* Non-UTF-8 mode */      /* Not UTF mode */
3225        {        {
3226        if (md->end_subject - eptr < 1)        if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3227          {            != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3228          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */        eptr++;
         MRRETURN(MATCH_NOMATCH);  
         }  
       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);  
3229        ecode += 2;        ecode += 2;
3230        }        }
3231      break;      break;
# Line 2935  for (;;) Line 3235  for (;;)
3235      case OP_EXACT:      case OP_EXACT:
3236      case OP_EXACTI:      case OP_EXACTI:
3237      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3238      ecode += 3;      ecode += 1 + IMM2_SIZE;
3239      goto REPEATCHAR;      goto REPEATCHAR;
3240    
3241      case OP_POSUPTO:      case OP_POSUPTO:
# Line 2950  for (;;) Line 3250  for (;;)
3250      min = 0;      min = 0;
3251      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3252      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3253      ecode += 3;      ecode += 1 + IMM2_SIZE;
3254      goto REPEATCHAR;      goto REPEATCHAR;
3255    
3256      case OP_POSSTAR:      case OP_POSSTAR:
# Line 2998  for (;;) Line 3298  for (;;)
3298      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. */
3299    
3300      REPEATCHAR:      REPEATCHAR:
3301  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3302      if (utf8)      if (utf)
3303        {        {
3304        length = 1;        length = 1;
3305        charptr = ecode;        charptr = ecode;
# Line 3012  for (;;) Line 3312  for (;;)
3312        if (length > 1)        if (length > 1)
3313          {          {
3314  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3315          unsigned int othercase;          pcre_uint32 othercase;
3316          if (op >= OP_STARI &&     /* Caseless */          if (op >= OP_STARI &&     /* Caseless */
3317              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
3318            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = PRIV(ord2utf)(othercase, occhars);
3319          else oclength = 0;          else oclength = 0;
3320  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3321    
3322          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3323            {            {
3324            if (eptr <= md->end_subject - length &&            if (eptr <= md->end_subject - length &&
3325              memcmp(eptr, charptr, length) == 0) eptr += length;              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3326  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3327            else if (oclength > 0 &&            else if (oclength > 0 &&
3328                     eptr <= md->end_subject - oclength &&                     eptr <= md->end_subject - oclength &&
3329                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3330  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3331            else            else
3332              {              {
3333              CHECK_PARTIAL();              CHECK_PARTIAL();
3334              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3335              }              }
3336            }            }
3337    
# Line 3043  for (;;) Line 3343  for (;;)
3343              {              {
3344              RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3345              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3346              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3347              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3348                memcmp(eptr, charptr, length) == 0) eptr += length;                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3349  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3350              else if (oclength > 0 &&              else if (oclength > 0 &&
3351                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3352                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3353  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3354              else              else
3355                {                {
3356                CHECK_PARTIAL();                CHECK_PARTIAL();
3357                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3358                }                }
3359              }              }
3360            /* Control never gets here */            /* Control never gets here */
# Line 3066  for (;;) Line 3366  for (;;)
3366            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3367              {              {
3368              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3369                  memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3370  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3371              else if (oclength > 0 &&              else if (oclength > 0 &&
3372                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3373                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3374  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3375              else              else
3376                {                {
# Line 3085  for (;;) Line 3385  for (;;)
3385              {              {
3386              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3387              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3388              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }              if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
3389  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3390              eptr--;              eptr--;
3391              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 3102  for (;;) Line 3402  for (;;)
3402        value of fc will always be < 128. */        value of fc will always be < 128. */
3403        }        }
3404      else      else
3405  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3406          /* When not in UTF-8 mode, load a single-byte character. */
3407          fc = *ecode++;
3408    
3409      /* When not in UTF-8 mode, load a single-byte character. */      /* The value of fc at this point is always one character, though we may
3410        or may not be in UTF mode. The code is duplicated for the caseless and
     fc = *ecode++;  
   
     /* The value of fc at this point is always less than 256, though we may or  
     may not be in UTF-8 mode. The code is duplicated for the caseless and  
3411      caseful cases, for speed, since matching characters is likely to be quite      caseful cases, for speed, since matching characters is likely to be quite
3412      common. First, ensure the minimum number of matches are present. If min =      common. First, ensure the minimum number of matches are present. If min =
3413      max, continue at the same level without recursing. Otherwise, if      max, continue at the same level without recursing. Otherwise, if
# Line 3118  for (;;) Line 3416  for (;;)
3416      maximizing, find the maximum number of characters and work backwards. */      maximizing, find the maximum number of characters and work backwards. */
3417    
3418      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3419        max, eptr));        max, (char *)eptr));
3420    
3421      if (op >= OP_STARI)  /* Caseless */      if (op >= OP_STARI)  /* Caseless */
3422        {        {
3423        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3424          /* fc must be < 128 if UTF is enabled. */
3425          foc = md->fcc[fc];
3426    #else
3427    #ifdef SUPPORT_UTF
3428    #ifdef SUPPORT_UCP
3429          if (utf && fc > 127)
3430            foc = UCD_OTHERCASE(fc);
3431    #else
3432          if (utf && fc > 127)
3433            foc = fc;
3434    #endif /* SUPPORT_UCP */
3435          else
3436    #endif /* SUPPORT_UTF */
3437            foc = TABLE_GET(fc, md->fcc, fc);
3438    #endif /* COMPILE_PCRE8 */
3439    
3440        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3441          {          {
3442            pcre_uchar cc;
3443    
3444          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3445            {            {
3446            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3447            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3448            }            }
3449          if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);          cc = RAWUCHARTEST(eptr);
3450            if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3451            eptr++;
3452          }          }
3453        if (min == max) continue;        if (min == max) continue;
3454        if (minimize)        if (minimize)
3455          {          {
3456          for (fi = min;; fi++)          for (fi = min;; fi++)
3457            {            {
3458              pcre_uchar cc;
3459    
3460            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3461            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3462            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3463            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3464              {              {
3465              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3466              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3467              }              }
3468            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);            cc = RAWUCHARTEST(eptr);
3469              if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3470              eptr++;
3471            }            }
3472          /* Control never gets here */          /* Control never gets here */
3473          }          }
# Line 3154  for (;;) Line 3476  for (;;)
3476          pp = eptr;          pp = eptr;
3477          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3478            {            {
3479              pcre_uchar cc;
3480    
3481            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3482              {              {
3483              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3484              break;              break;
3485              }              }
3486            if (fc != md->lcc[*eptr]) break;            cc = RAWUCHARTEST(eptr);
3487              if (fc != cc && foc != cc) break;
3488            eptr++;            eptr++;
3489            }            }
3490    
# Line 3171  for (;;) Line 3496  for (;;)
3496            eptr--;            eptr--;
3497            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3498            }            }
3499          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3500          }          }
3501        /* Control never gets here */        /* Control never gets here */
3502        }        }
# Line 3185  for (;;) Line 3510  for (;;)
3510          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3511            {            {
3512            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3513            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3514            }            }
3515          if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);          if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3516          }          }
3517    
3518        if (min == max) continue;        if (min == max) continue;
# Line 3198  for (;;) Line 3523  for (;;)
3523            {            {
3524            RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3525            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3526            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3527            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3528              {              {
3529              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3530              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3531              }              }
3532            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);            if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3533            }            }
3534          /* Control never gets here */          /* Control never gets here */
3535          }          }
# Line 3218  for (;;) Line 3543  for (;;)
3543              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3544              break;              break;
3545              }              }
3546            if (fc != *eptr) break;            if (fc != RAWUCHARTEST(eptr)) break;
3547            eptr++;            eptr++;
3548            }            }
3549          if (possessive) continue;          if (possessive) continue;
# Line 3229  for (;;) Line 3554  for (;;)
3554            eptr--;            eptr--;
3555            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3556            }            }
3557          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3558          }          }
3559        }        }
3560      /* Control never gets here */      /* Control never gets here */
# Line 3242  for (;;) Line 3567  for (;;)
3567      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
3568        {        {
3569        SCHECK_PARTIAL();        SCHECK_PARTIAL();
3570        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
3571        }        }
3572      ecode++;  #ifdef SUPPORT_UTF
3573      GETCHARINCTEST(c, eptr);      if (utf)
     if (op == OP_NOTI)         /* The caseless case */  
3574        {        {
3575  #ifdef SUPPORT_UTF8        register pcre_uint32 ch, och;
3576        if (c < 256)  
3577  #endif        ecode++;
3578        c = md->lcc[c];        GETCHARINC(ch, ecode);
3579        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);        GETCHARINC(c, eptr);
3580    
3581          if (op == OP_NOT)
3582            {
3583            if (ch == c) RRETURN(MATCH_NOMATCH);
3584            }
3585          else
3586            {
3587    #ifdef SUPPORT_UCP
3588            if (ch > 127)
3589              och = UCD_OTHERCASE(ch);
3590    #else
3591            if (ch > 127)
3592              och = ch;
3593    #endif /* SUPPORT_UCP */
3594            else
3595              och = TABLE_GET(ch, md->fcc, ch);
3596            if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3597            }
3598        }        }
3599      else    /* Caseful */      else
3600    #endif
3601        {        {
3602        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);        register pcre_uint32 ch = ecode[1];
3603          c = *eptr++;
3604          if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3605            RRETURN(MATCH_NOMATCH);
3606          ecode += 2;
3607        }        }
3608      break;      break;
3609    
# Line 3270  for (;;) Line 3617  for (;;)
3617      case OP_NOTEXACT:      case OP_NOTEXACT:
3618      case OP_NOTEXACTI:      case OP_NOTEXACTI:
3619      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3620      ecode += 3;      ecode += 1 + IMM2_SIZE;
3621      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3622    
3623      case OP_NOTUPTO:      case OP_NOTUPTO:
# Line 3280  for (;;) Line 3627  for (;;)
3627      min = 0;      min = 0;
3628      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3629      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3630      ecode += 3;      ecode += 1 + IMM2_SIZE;
3631      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3632    
3633      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
# Line 3312  for (;;) Line 3659  for (;;)
3659      possessive = TRUE;      possessive = TRUE;
3660      min = 0;      min = 0;
3661      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3662      ecode += 3;      ecode += 1 + IMM2_SIZE;
3663      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3664    
3665      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 3336  for (;;) Line 3683  for (;;)
3683      /* Common code for all repeated single-byte matches. */      /* Common code for all repeated single-byte matches. */
3684    
3685      REPEATNOTCHAR:      REPEATNOTCHAR:
3686      fc = *ecode++;      GETCHARINCTEST(fc, ecode);
3687    
3688      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
3689      since matching characters is likely to be quite common. First, ensure the      since matching characters is likely to be quite common. First, ensure the
# Line 3347  for (;;) Line 3694  for (;;)
3694      characters and work backwards. */      characters and work backwards. */
3695    
3696      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3697        max, eptr));        max, (char *)eptr));
3698    
3699      if (op >= OP_NOTSTARI)     /* Caseless */      if (op >= OP_NOTSTARI)     /* Caseless */
3700        {        {
3701        fc = md->lcc[fc];  #ifdef SUPPORT_UTF
3702    #ifdef SUPPORT_UCP
3703          if (utf && fc > 127)
3704            foc = UCD_OTHERCASE(fc);
3705    #else
3706          if (utf && fc > 127)
3707            foc = fc;
3708    #endif /* SUPPORT_UCP */
3709          else
3710    #endif /* SUPPORT_UTF */
3711            foc = TABLE_GET(fc, md->fcc, fc);
3712    
3713  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3714        /* UTF-8 mode */        if (utf)
       if (utf8)  
3715          {          {
3716          register unsigned int d;          register pcre_uint32 d;
3717          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3718            {            {
3719            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3720              {              {
3721              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3722              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3723              }              }
3724            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3725            if (d < 256) d = md->lcc[d];            if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
           if (fc == d) MRRETURN(MATCH_NOMATCH);  
3726            }            }
3727          }          }
3728        else        else
3729  #endif  #endif
3730          /* Not UTF mode */
       /* Not UTF-8 mode */  
3731          {          {
3732          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3733            {            {
3734            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3735              {              {
3736              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3737              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3738              }              }
3739            if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);            if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3740              eptr++;
3741            }            }
3742          }          }
3743    
# Line 3390  for (;;) Line 3745  for (;;)
3745    
3746        if (minimize)        if (minimize)
3747          {          {
3748  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3749          /* UTF-8 mode */          if (utf)
         if (utf8)  
3750            {            {
3751            register unsigned int d;            register pcre_uint32 d;
3752            for (fi = min;; fi++)            for (fi = min;; fi++)
3753              {              {
3754              RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3755              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3756              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3757              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3758                {                {
3759                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3760                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3761                }                }
3762              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3763              if (d < 256) d = md->lcc[d];              if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
             if (fc == d) MRRETURN(MATCH_NOMATCH);  
3764              }              }
3765            }            }
3766          else          else
3767  #endif  #endif
3768          /* Not UTF-8 mode */          /* Not UTF mode */
3769            {            {
3770            for (fi = min;; fi++)            for (fi = min;; fi++)
3771              {              {
3772              RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3773              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3774              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3775              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3776                {                {
3777                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3778                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3779                }                }
3780              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);              if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3781                eptr++;
3782              }              }
3783            }            }
3784          /* Control never gets here */          /* Control never gets here */
# Line 3436  for (;;) Line 3790  for (;;)
3790          {          {
3791          pp = eptr;          pp = eptr;
3792    
3793  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3794          /* UTF-8 mode */          if (utf)
         if (utf8)  
3795            {            {
3796            register unsigned int d;            register pcre_uint32 d;
3797            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3798              {              {
3799              int len = 1;              int len = 1;
# Line 3450  for (;;) Line 3803  for (;;)
3803                break;                break;
3804                }                }
3805              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3806              if (d < 256) d = md->lcc[d];              if (fc == d || (unsigned int)foc == d) break;
             if (fc == d) break;  
3807              eptr += len;              eptr += len;
3808              }              }
3809          if (possessive) continue;            if (possessive) continue;
3810          for(;;)            for(;;)
3811              {              {
3812              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3813              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 3465  for (;;) Line 3817  for (;;)
3817            }            }
3818          else          else
3819  #endif  #endif
3820          /* Not UTF-8 mode */          /* Not UTF mode */
3821            {            {
3822            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3823              {              {
# Line 3474  for (;;) Line 3826  for (;;)
3826                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3827                break;                break;
3828                }                }
3829              if (fc == md->lcc[*eptr]) break;              if (fc == *eptr || foc == *eptr) break;
3830              eptr++;              eptr++;
3831              }              }
3832            if (possessive) continue;            if (possessive) continue;
# Line 3486  for (;;) Line 3838  for (;;)
3838              }              }
3839            }            }
3840    
3841          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3842          }          }
3843        /* Control never gets here */        /* Control never gets here */
3844        }        }
# Line 3495  for (;;) Line 3847  for (;;)
3847    
3848      else      else
3849        {        {
3850  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3851        /* UTF-8 mode */        if (utf)
       if (utf8)  
3852          {          {
3853          register unsigned int d;          register pcre_uint32 d;
3854          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3855            {            {
3856            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3857              {              {
3858              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3859              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3860              }              }
3861            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3862            if (fc == d) MRRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
3863            }            }
3864          }          }
3865        else        else
3866  #endif  #endif
3867        /* Not UTF-8 mode */        /* Not UTF mode */
3868          {          {
3869          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3870            {            {
3871            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3872              {              {
3873              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3874              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3875              }              }
3876            if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3877            }            }
3878          }          }
3879    
# Line 3530  for (;;) Line 3881  for (;;)
3881    
3882        if (minimize)        if (minimize)
3883          {          {
3884  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3885          /* UTF-8 mode */          if (utf)
         if (utf8)  
3886            {            {
3887            register unsigned int d;            register pcre_uint32 d;
3888            for (fi = min;; fi++)            for (fi = min;; fi++)
3889              {              {
3890              RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3891              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3892              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3893              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3894                {                {
3895                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3896                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3897                }                }
3898              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3899              if (fc == d) MRRETURN(MATCH_NOMATCH);              if (fc == d) RRETURN(MATCH_NOMATCH);
3900              }              }
3901            }            }
3902          else          else
3903  #endif  #endif
3904          /* Not UTF-8 mode */          /* Not UTF mode */
3905            {            {
3906            for (fi = min;; fi++)            for (fi = min;; fi++)
3907              {              {
3908              RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3909              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3910              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3911              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3912                {                {
3913                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3914                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3915                }                }
3916              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);              if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3917              }              }
3918            }            }
3919          /* Control never gets here */          /* Control never gets here */
# Line 3575  for (;;) Line 3925  for (;;)
3925          {          {
3926          pp = eptr;          pp = eptr;
3927    
3928  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3929          /* UTF-8 mode */          if (utf)
         if (utf8)  
3930            {            {
3931            register unsigned int d;            register pcre_uint32 d;
3932            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3933              {              {
3934              int len = 1;              int len = 1;
# Line 3603  for (;;) Line 3952  for (;;)
3952            }            }
3953          else          else
3954  #endif  #endif
3955          /* Not UTF-8 mode */          /* Not UTF mode */
3956            {            {
3957            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3958              {              {
# Line 3624  for (;;) Line 3973  for (;;)
3973              }              }
3974            }            }
3975    
3976          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3977          }          }
3978        }        }
3979      /* Control never gets here */      /* Control never gets here */
# Line 3636  for (;;) Line 3985  for (;;)
3985      case OP_TYPEEXACT:      case OP_TYPEEXACT:
3986      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3987      minimize = TRUE;      minimize = TRUE;
3988      ecode += 3;      ecode += 1 + IMM2_SIZE;
3989      goto REPEATTYPE;      goto REPEATTYPE;
3990    
3991      case OP_TYPEUPTO:      case OP_TYPEUPTO:
# Line 3644  for (;;) Line 3993  for (;;)
3993      min = 0;      min = 0;
3994      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3995      minimize = *ecode == OP_TYPEMINUPTO;      minimize = *ecode == OP_TYPEMINUPTO;
3996      ecode += 3;      ecode += 1 + IMM2_SIZE;
3997      goto REPEATTYPE;      goto REPEATTYPE;
3998    
3999      case OP_TYPEPOSSTAR:      case OP_TYPEPOSSTAR:
# Line 3672  for (;;) Line 4021  for (;;)
4021      possessive = TRUE;      possessive = TRUE;
4022      min = 0;      min = 0;
4023      max = GET2(ecode, 1);      max = GET2(ecode, 1);
4024      ecode += 3;      ecode += 1 + IMM2_SIZE;
4025      goto REPEATTYPE;      goto REPEATTYPE;
4026    
4027      case OP_TYPESTAR:      case OP_TYPESTAR:
# Line 3718  for (;;) Line 4067  for (;;)
4067          switch(prop_type)          switch(prop_type)
4068            {            {
4069            case PT_ANY:            case PT_ANY:
4070            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4071            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4072              {              {
4073              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4074                {                {
4075                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4076                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4077                }                }
4078              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4079              }              }
# Line 3737  for (;;) Line 4086  for (;;)
4086              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4087                {                {
4088                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4089                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4090                }                }
4091              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4092              chartype = UCD_CHARTYPE(c);              chartype = UCD_CHARTYPE(c);
4093              if ((chartype == ucp_Lu ||              if ((chartype == ucp_Lu ||
4094                   chartype == ucp_Ll ||                   chartype == ucp_Ll ||
4095                   chartype == ucp_Lt) == prop_fail_result)                   chartype == ucp_Lt) == prop_fail_result)
4096                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4097              }              }
4098            break;            break;
4099    
# Line 3754  for (;;) Line 4103  for (;;)
4103              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4104                {                {
4105                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4106                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4107                }                }
4108              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4109              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4110                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4111              }              }
4112            break;            break;
4113    
# Line 3768  for (;;) Line 4117  for (;;)
4117              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4118                {                {
4119                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4120                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4121                }                }
4122              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4123              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4124                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4125              }              }
4126            break;            break;
4127    
# Line 3782  for (;;) Line 4131  for (;;)
4131              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4132                {                {
4133                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4134                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4135                }                }
4136              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4137              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4138                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4139              }              }
4140            break;            break;
4141    
# Line 3797  for (;;) Line 4146  for (;;)
4146              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4147                {                {
4148                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4149                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4150                }                }
4151              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4152              category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
4153              if ((category == ucp_L || category == ucp_N) == prop_fail_result)              if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4154                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4155              }              }
4156            break;            break;
4157    
# Line 3812  for (;;) Line 4161  for (;;)
4161              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4162                {                {
4163                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4164                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4165                }                }
4166              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4167              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4168                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
4169                     == prop_fail_result)                     == prop_fail_result)
4170                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4171              }              }
4172            break;            break;
4173    
# Line 3828  for (;;) Line 4177  for (;;)
4177              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4178                {                {
4179                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4180                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4181                }                }
4182              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4183              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4184                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4185                     == prop_fail_result)                     == prop_fail_result)
4186                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4187              }              }
4188            break;            break;
4189    
# Line 3845  for (;;) Line 4194  for (;;)
4194              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4195                {                {
4196                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4197                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4198                }                }
4199              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4200              category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
4201              if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)              if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4202                     == prop_fail_result)                     == prop_fail_result)
4203                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4204              }              }
4205            break;            break;
4206    
4207              case PT_CLIST:
4208              for (i = 1; i <= min; i++)
4209                {
4210                const pcre_uint32 *cp;
4211                if (eptr >= md->end_subject)
4212                  {
4213                  SCHECK_PARTIAL();
4214                  RRETURN(MATCH_NOMATCH);
4215                  }
4216                GETCHARINCTEST(c, eptr);
4217                cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);
4218                for (;;)
4219                  {
4220                  if (c < *cp)
4221                    { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4222                  if (c == *cp++)
4223                    { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4224                  }
4225                }
4226              break;
4227    
4228            /* This should not occur */            /* This should not occur */
4229    
4230            default:            default:
# Line 3872  for (;;) Line 4242  for (;;)
4242            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4243              {              {
4244              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4245              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4246              }              }
4247            GETCHARINCTEST(c, eptr);            else
           if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH);  
           while (eptr < md->end_subject)  
4248              {              {
4249              int len = 1;              int lgb, rgb;
4250              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              GETCHARINCTEST(c, eptr);
4251              if (UCD_CATEGORY(c) != ucp_M) break;              lgb = UCD_GRAPHBREAK(c);
4252              eptr += len;             while (eptr < md->end_subject)
4253                  {
4254                  int len = 1;
4255                  if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4256                  rgb = UCD_GRAPHBREAK(c);
4257                  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
4258                  lgb = rgb;
4259                  eptr += len;
4260                  }
4261              }              }
4262              CHECK_PARTIAL();
4263            }            }
4264          }          }
4265    
# Line 3891  for (;;) Line 4268  for (;;)
4268    
4269  /* Handle all other cases when the coding is UTF-8 */  /* Handle all other cases when the coding is UTF-8 */
4270    
4271  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4272        if (utf8) switch(ctype)        if (utf) switch(ctype)
4273          {          {
4274          case OP_ANY:          case OP_ANY:
4275          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3900  for (;;) Line 4277  for (;;)
4277            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4278              {              {
4279              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4280              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4281                }
4282              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4283              if (md->partial != 0 &&
4284                  eptr + 1 >= md->end_subject &&
4285                  NLBLOCK->nltype == NLTYPE_FIXED &&
4286                  NLBLOCK->nllen == 2 &&
4287                  RAWUCHAR(eptr) == NLBLOCK->nl[0])
4288                {
4289                md->hitend = TRUE;
4290                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4291              }              }
           if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);  
4292            eptr++;            eptr++;
4293            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4294            }            }
4295          break;          break;
4296    
# Line 3914  for (;;) Line 4300  for (;;)
4300            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4301              {              {
4302              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4303              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4304              }              }
4305            eptr++;            eptr++;
4306            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4307            }            }
4308          break;          break;
4309    
4310          case OP_ANYBYTE:          case OP_ANYBYTE:
4311          if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);          if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4312          eptr += min;          eptr += min;
4313          break;          break;
4314    
# Line 3932  for (;;) Line 4318  for (;;)
4318            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4319              {              {
4320              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4321              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4322              }              }
4323            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4324            switch(c)            switch(c)
4325              {              {
4326              default: MRRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4327    
4328              case 0x000d:              case CHAR_CR:
4329              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
4330              break;              break;
4331    
4332              case 0x000a:              case CHAR_LF:
4333              break;              break;
4334    
4335              case 0x000b:              case CHAR_VT:
4336              case 0x000c:              case CHAR_FF:
4337              case 0x0085:              case CHAR_NEL:
4338    #ifndef EBCDIC
4339              case 0x2028:              case 0x2028:
4340              case 0x2029:              case 0x2029:
4341              if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);  #endif  /* Not EBCDIC */
4342                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4343              break;              break;
4344              }              }
4345            }            }
# Line 3963  for (;;) Line 4351  for (;;)
4351            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4352              {              {
4353              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4354              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4355              }              }
4356            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4357            switch(c)            switch(c)
4358              {              {
4359                HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
4360              default: break;              default: break;
             case 0x09:      /* HT */  
             case 0x20:      /* SPACE */  
             case 0xa0:      /* NBSP */  
             case 0x1680:    /* OGHAM SPACE MARK */  
             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
             case 0x2000:    /* EN QUAD */  
             case 0x2001:    /* EM QUAD */  
             case 0x2002:    /* EN SPACE */  
             case 0x2003:    /* EM SPACE */  
             case 0x2004:    /* THREE-PER-EM SPACE */  
             case 0x2005:    /* FOUR-PER-EM SPACE */  
             case 0x2006:    /* SIX-PER-EM SPACE */  
             case 0x2007:    /* FIGURE SPACE */  
             case 0x2008:    /* PUNCTUATION SPACE */  
             case 0x2009:    /* THIN SPACE */  
             case 0x200A:    /* HAIR SPACE */  
             case 0x202f:    /* NARROW NO-BREAK SPACE */  
             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
             case 0x3000:    /* IDEOGRAPHIC SPACE */  
             MRRETURN(MATCH_NOMATCH);  
4361              }              }
4362            }            }
4363          break;          break;
# Line 3999  for (;;) Line 4368  for (;;)
4368            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4369              {              {
4370              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4371              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4372              }              }
4373            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4374            switch(c)            switch(c)
4375              {              {
4376              default: MRRETURN(MATCH_NOMATCH);              HSPACE_CASES: break;  /* Byte and multibyte cases */
4377              case 0x09:      /* HT */              default: RRETURN(MATCH_NOMATCH);
             case 0x20:      /* SPACE */  
             case 0xa0:      /* NBSP */  
             case 0x1680:    /* OGHAM SPACE MARK */  
             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
             case 0x2000:    /* EN QUAD */  
             case 0x2001:    /* EM QUAD */  
             case 0x2002:    /* EN SPACE */  
             case 0x2003:    /* EM SPACE */  
             case 0x2004:    /* THREE-PER-EM SPACE */  
             case 0x2005:    /* FOUR-PER-EM SPACE */  
             case 0x2006:    /* SIX-PER-EM SPACE */  
             case 0x2007:    /* FIGURE SPACE */  
             case 0x2008:    /* PUNCTUATION SPACE */  
             case 0x2009:    /* THIN SPACE */  
             case 0x200A:    /* HAIR SPACE */  
             case 0x202f:    /* NARROW NO-BREAK SPACE */  
             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
             case 0x3000:    /* IDEOGRAPHIC SPACE */  
             break;  
4378              }              }
4379            }            }
4380          break;          break;
# Line 4035  for (;;) Line 4385  for (;;)
4385            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4386              {              {
4387              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4388              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4389              }              }
4390            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4391            switch(c)            switch(c)
4392              {              {
4393                VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4394              default: break;              default: break;
             case 0x0a:      /* LF */  
             case 0x0b:      /* VT */  
             case 0x0c:      /* FF */  
             case 0x0d:      /* CR */  
             case 0x85:      /* NEL */  
             case 0x2028:    /* LINE SEPARATOR */  
             case 0x2029:    /* PARAGRAPH SEPARATOR */  
             MRRETURN(MATCH_NOMATCH);  
4395              }              }
4396            }            }
4397          break;          break;
# Line 4059  for (;;) Line 4402  for (;;)
4402            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4403              {              {
4404              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4405              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4406              }              }
4407            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4408            switch(c)            switch(c)
4409              {              {
4410              default: MRRETURN(MATCH_NOMATCH);              VSPACE_CASES: break;
4411              case 0x0a:      /* LF */              default: RRETURN(MATCH_NOMATCH);
             case 0x0b:      /* VT */  
             case 0x0c:      /* FF */  
             case 0x0d:      /* CR */  
             case 0x85:      /* NEL */  
             case 0x2028:    /* LINE SEPARATOR */  
             case 0x2029:    /* PARAGRAPH SEPARATOR */  
             break;  
4412              }              }
4413            }            }
4414          break;          break;
# Line 4083  for (;;) Line 4419  for (;;)
4419            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4420              {              {
4421              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4422              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4423              }              }
4424            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4425            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4426              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4427            }            }
4428          break;          break;
4429    
4430          case OP_DIGIT:          case OP_DIGIT:
4431          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4432            {            {
4433              pcre_uchar cc;
4434    
4435            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4436              {              {
4437              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4438              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4439              }              }
4440            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)            cc = RAWUCHAR(eptr);
4441              MRRETURN(MATCH_NOMATCH);            if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
4442                RRETURN(MATCH_NOMATCH);
4443              eptr++;
4444            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4445            }            }
4446          break;          break;
# Line 4108  for (;;) Line 4448  for (;;)
4448          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4449          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4450            {            {
4451              pcre_uchar cc;
4452    
4453            if (eptr >= md->end_subject)