/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 648 by ph10, Mon Aug 1 11:02:08 2011 UTC revision 916 by ph10, Wed Feb 15 09:50:53 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 82  negative to avoid the external error cod Line 82  negative to avoid the external error cod
82  #define MATCH_SKIP_ARG     (-993)  #define MATCH_SKIP_ARG     (-993)
83  #define MATCH_THEN         (-992)  #define MATCH_THEN         (-992)
84    
 /* This is a convenience macro for code that occurs many times. */  
   
 #define MRRETURN(ra) \  
   { \  
   md->mark = markptr; \  
   RRETURN(ra); \  
   }  
   
85  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
86  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
87  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 121  Returns:     nothing Line 113  Returns:     nothing
113  */  */
114    
115  static void  static void
116  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
117  {  {
118  unsigned int c;  unsigned int c;
119  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 148  Arguments: Line 140  Arguments:
140    md          points to match data block    md          points to match data block
141    caseless    TRUE if caseless    caseless    TRUE if caseless
142    
143  Returns:      < 0 if not matched, otherwise the number of subject bytes matched  Returns:      >= 0 the number of subject bytes matched
144                  -1 no match
145                  -2 partial match; always given if at end subject
146  */  */
147    
148  static int  static int
149  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
150    BOOL caseless)    BOOL caseless)
151  {  {
152  USPTR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
153  register USPTR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
154    
155  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
156  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 171  pchars(p, length, FALSE, md); Line 165  pchars(p, length, FALSE, md);
165  printf("\n");  printf("\n");
166  #endif  #endif
167    
168  /* Always fail if reference not set (and not JavaScript compatible). */  /* Always fail if reference not set (and not JavaScript compatible - in that
169    case the length is passed as zero). */
170    
171  if (length < 0) return -1;  if (length < 0) return -1;
172    
# Line 181  ASCII characters. */ Line 176  ASCII characters. */
176    
177  if (caseless)  if (caseless)
178    {    {
179  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
180  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
181    if (md->utf8)    if (md->utf)
182      {      {
183      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
184      bytes matched may differ, because there are some characters whose upper and      bytes matched may differ, because there are some characters whose upper and
# Line 193  if (caseless) Line 188  if (caseless)
188      the latter. It is important, therefore, to check the length along the      the latter. It is important, therefore, to check the length along the
189      reference, not along the subject (earlier code did this wrong). */      reference, not along the subject (earlier code did this wrong). */
190    
191      USPTR endptr = p + length;      PCRE_PUCHAR endptr = p + length;
192      while (p < endptr)      while (p < endptr)
193        {        {
194        int c, d;        int c, d;
195        if (eptr >= md->end_subject) return -1;        if (eptr >= md->end_subject) return -2;   /* Partial match */
196        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
197        GETCHARINC(d, p);        GETCHARINC(d, p);
198        if (c != d && c != UCD_OTHERCASE(d)) return -1;        if (c != d && c != UCD_OTHERCASE(d)) return -1;
# Line 210  if (caseless) Line 205  if (caseless)
205    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
206    is no UCP support. */    is no UCP support. */
207      {      {
     if (eptr + length > md->end_subject) return -1;  
208      while (length-- > 0)      while (length-- > 0)
209        { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }        {
210          if (eptr >= md->end_subject) return -2;   /* Partial match */
211          if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
212          p++;
213          eptr++;
214          }
215      }      }
216    }    }
217    
# Line 221  are in UTF-8 mode. */ Line 220  are in UTF-8 mode. */
220    
221  else  else
222    {    {
223    if (eptr + length > md->end_subject) return -1;    while (length-- > 0)
224    while (length-- > 0) if (*p++ != *eptr++) return -1;      {
225        if (eptr >= md->end_subject) return -2;   /* Partial match */
226        if (*p++ != *eptr++) return -1;
227        }
228    }    }
229    
230  return eptr - eptr_start;  return (int)(eptr - eptr_start);
231  }  }
232    
233    
# Line 277  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 279  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
279         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
280         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
281         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
282         RM61,  RM62, RM63 };         RM61,  RM62, RM63, RM64, RM65, RM66 };
283    
284  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
285  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 290  actually used in this definition. */ Line 292  actually used in this definition. */
292  #define RMATCH(ra,rb,rc,rd,re,rw) \  #define RMATCH(ra,rb,rc,rd,re,rw) \
293    { \    { \
294    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
295    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1); \    rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
296    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
297    }    }
298  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 300  actually used in this definition. */ Line 302  actually used in this definition. */
302    }    }
303  #else  #else
304  #define RMATCH(ra,rb,rc,rd,re,rw) \  #define RMATCH(ra,rb,rc,rd,re,rw) \
305    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1)    rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
306  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
307  #endif  #endif
308    
# Line 315  argument of match(), which never changes Line 317  argument of match(), which never changes
317    
318  #define RMATCH(ra,rb,rc,rd,re,rw)\  #define RMATCH(ra,rb,rc,rd,re,rw)\
319    {\    {\
320    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
321    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
322    frame->Xwhere = rw; \    frame->Xwhere = rw; \
323    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
324    newframe->Xecode = rb;\    newframe->Xecode = rb;\
325    newframe->Xmstart = mstart;\    newframe->Xmstart = mstart;\
   newframe->Xmarkptr = markptr;\  
326    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
327    newframe->Xeptrb = re;\    newframe->Xeptrb = re;\
328    newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
# Line 337  argument of match(), which never changes Line 338  argument of match(), which never changes
338    {\    {\
339    heapframe *oldframe = frame;\    heapframe *oldframe = frame;\
340    frame = oldframe->Xprevframe;\    frame = oldframe->Xprevframe;\
341    (pcre_stack_free)(oldframe);\    if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
342    if (frame != NULL)\    if (frame != NULL)\
343      {\      {\
344      rrc = ra;\      rrc = ra;\
# Line 354  typedef struct heapframe { Line 355  typedef struct heapframe {
355    
356    /* Function arguments that may change */    /* Function arguments that may change */
357    
358    USPTR Xeptr;    PCRE_PUCHAR Xeptr;
359    const uschar *Xecode;    const pcre_uchar *Xecode;
360    USPTR Xmstart;    PCRE_PUCHAR Xmstart;
   USPTR Xmarkptr;  
361    int Xoffset_top;    int Xoffset_top;
362    eptrblock *Xeptrb;    eptrblock *Xeptrb;
363    unsigned int Xrdepth;    unsigned int Xrdepth;
364    
365    /* Function local variables */    /* Function local variables */
366    
367    USPTR Xcallpat;    PCRE_PUCHAR Xcallpat;
368  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
369    USPTR Xcharptr;    PCRE_PUCHAR Xcharptr;
370  #endif  #endif
371    USPTR Xdata;    PCRE_PUCHAR Xdata;
372    USPTR Xnext;    PCRE_PUCHAR Xnext;
373    USPTR Xpp;    PCRE_PUCHAR Xpp;
374    USPTR Xprev;    PCRE_PUCHAR Xprev;
375    USPTR Xsaved_eptr;    PCRE_PUCHAR Xsaved_eptr;
376    
377    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
378    
# Line 385  typedef struct heapframe { Line 385  typedef struct heapframe {
385    int Xprop_value;    int Xprop_value;
386    int Xprop_fail_result;    int Xprop_fail_result;
387    int Xoclength;    int Xoclength;
388    uschar Xocchars[8];    pcre_uchar Xocchars[6];
389  #endif  #endif
390    
391    int Xcodelink;    int Xcodelink;
# Line 427  returns a negative (error) response, the Line 427  returns a negative (error) response, the
427  same response. */  same response. */
428    
429  /* These macros pack up tests that are used for partial matching, and which  /* These macros pack up tests that are used for partial matching, and which
430  appears several times in the code. We set the "hit end" flag if the pointer is  appear several times in the code. We set the "hit end" flag if the pointer is
431  at the end of the subject and also past the start of the subject (i.e.  at the end of the subject and also past the start of the subject (i.e.
432  something has been matched). For hard partial matching, we then return  something has been matched). For hard partial matching, we then return
433  immediately. The second one is used when we already know we are past the end of  immediately. The second one is used when we already know we are past the end of
# Line 438  the subject. */ Line 438  the subject. */
438        eptr > md->start_used_ptr) \        eptr > md->start_used_ptr) \
439      { \      { \
440      md->hitend = TRUE; \      md->hitend = TRUE; \
441      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
442      }      }
443    
444  #define SCHECK_PARTIAL()\  #define SCHECK_PARTIAL()\
445    if (md->partial != 0 && eptr > md->start_used_ptr) \    if (md->partial != 0 && eptr > md->start_used_ptr) \
446      { \      { \
447      md->hitend = TRUE; \      md->hitend = TRUE; \
448      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
449      }      }
450    
451    
452  /* Performance note: It might be tempting to extract commonly used fields from  /* Performance note: It might be tempting to extract commonly used fields from
453  the md structure (e.g. utf8, end_subject) into individual variables to improve  the md structure (e.g. utf, end_subject) into individual variables to improve
454  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
455  made performance worse.  made performance worse.
456    
# Line 459  Arguments: Line 459  Arguments:
459     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
460     mstart      pointer to the current match start position (can be modified     mstart      pointer to the current match start position (can be modified
461                   by encountering \K)                   by encountering \K)
    markptr     pointer to the most recent MARK name, or NULL  
462     offset_top  current top pointer     offset_top  current top pointer
463     md          pointer to "static" info for the match     md          pointer to "static" info for the match
464     eptrb       pointer to chain of blocks containing eptr at start of     eptrb       pointer to chain of blocks containing eptr at start of
# Line 474  Returns:       MATCH_MATCH if matched Line 473  Returns:       MATCH_MATCH if matched
473  */  */
474    
475  static int  static int
476  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
477    const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb,    PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
478    unsigned int rdepth)    unsigned int rdepth)
479  {  {
480  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
# Line 485  so they can be ordinary variables in all Line 484  so they can be ordinary variables in all
484  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
485  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
486  register unsigned int c;   /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
487  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf;         /* Local copy of UTF flag for speed */
488    
489  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
490  BOOL caseless;  BOOL caseless;
491  int condcode;  int condcode;
492    
493  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
494  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame". We set up the top-level
495  heap storage. Set up the top-level frame here; others are obtained from the  frame on the stack here; subsequent instantiations are obtained from the heap
496  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  whenever RMATCH() does a "recursion". See the macro definitions above. Putting
497    the top-level on the stack rather than malloc-ing them all gives a performance
498    boost in many cases where there is not much "recursion". */
499    
500  #ifdef NO_RECURSE  #ifdef NO_RECURSE
501  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));  heapframe frame_zero;
502  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  heapframe *frame = &frame_zero;
503  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
504    
505  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 506  frame->Xprevframe = NULL;            /* Line 507  frame->Xprevframe = NULL;            /*
507  frame->Xeptr = eptr;  frame->Xeptr = eptr;
508  frame->Xecode = ecode;  frame->Xecode = ecode;
509  frame->Xmstart = mstart;  frame->Xmstart = mstart;
 frame->Xmarkptr = markptr;  
510  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
511  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
512  frame->Xrdepth = rdepth;  frame->Xrdepth = rdepth;
# Line 520  HEAP_RECURSE: Line 520  HEAP_RECURSE:
520  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
521  #define ecode              frame->Xecode  #define ecode              frame->Xecode
522  #define mstart             frame->Xmstart  #define mstart             frame->Xmstart
 #define markptr            frame->Xmarkptr  
523  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
524  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
525  #define rdepth             frame->Xrdepth  #define rdepth             frame->Xrdepth
526    
527  /* Ditto for the local variables */  /* Ditto for the local variables */
528    
529  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
530  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
531  #endif  #endif
532  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
# Line 585  declarations can be cut out in a block. Line 584  declarations can be cut out in a block.
584  below are for variables that do not have to be preserved over a recursive call  below are for variables that do not have to be preserved over a recursive call
585  to RMATCH(). */  to RMATCH(). */
586    
587  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
588  const uschar *charptr;  const pcre_uchar *charptr;
589  #endif  #endif
590  const uschar *callpat;  const pcre_uchar *callpat;
591  const uschar *data;  const pcre_uchar *data;
592  const uschar *next;  const pcre_uchar *next;
593  USPTR         pp;  PCRE_PUCHAR       pp;
594  const uschar *prev;  const pcre_uchar *prev;
595  USPTR         saved_eptr;  PCRE_PUCHAR       saved_eptr;
596    
597  recursion_info new_recursive;  recursion_info new_recursive;
598    
# Line 606  int prop_type; Line 605  int prop_type;
605  int prop_value;  int prop_value;
606  int prop_fail_result;  int prop_fail_result;
607  int oclength;  int oclength;
608  uschar occhars[8];  pcre_uchar occhars[6];
609  #endif  #endif
610    
611  int codelink;  int codelink;
# Line 622  int save_offset1, save_offset2, save_off Line 621  int save_offset1, save_offset2, save_off
621  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
622    
623  eptrblock newptrb;  eptrblock newptrb;
624    
625    /* There is a special fudge for calling match() in a way that causes it to
626    measure the size of its basic stack frame when the stack is being used for
627    recursion. The second argument (ecode) being NULL triggers this behaviour. It
628    cannot normally ever be NULL. The return is the negated value of the frame
629    size. */
630    
631    if (ecode == NULL)
632      {
633      if (rdepth == 0)
634        return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
635      else
636        {
637        int len = (char *)&rdepth - (char *)eptr;
638        return (len > 0)? -len : len;
639        }
640      }
641  #endif     /* NO_RECURSE */  #endif     /* NO_RECURSE */
642    
643  /* To save space on the stack and in the heap frame, I have doubled up on some  /* To save space on the stack and in the heap frame, I have doubled up on some
# Line 634  the alternative names that are used. */ Line 650  the alternative names that are used. */
650  #define code_offset   codelink  #define code_offset   codelink
651  #define condassert    condition  #define condassert    condition
652  #define matched_once  prev_is_word  #define matched_once  prev_is_word
653    #define foc           number
654    #define save_mark     data
655    
656  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
657  variables. */  variables. */
# Line 659  defined). However, RMATCH isn't like a f Line 677  defined). However, RMATCH isn't like a f
677  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
678  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
679    
680  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
681  utf8 = md->utf8;       /* Local copy of the flag */  utf = md->utf;       /* Local copy of the flag */
682  #else  #else
683  utf8 = FALSE;  utf = FALSE;
684  #endif  #endif
685    
686  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
# Line 701  for (;;) Line 719  for (;;)
719    switch(op)    switch(op)
720      {      {
721      case OP_MARK:      case OP_MARK:
722      markptr = ecode + 2;      md->nomatch_mark = ecode + 2;
723      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      md->mark = NULL;    /* In case previously set by assertion */
724        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
725        eptrb, RM55);        eptrb, RM55);
726        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
727             md->mark == NULL) md->mark = ecode + 2;
728    
729      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
730      argument, and we must check whether that argument matches this MARK's      argument, and we must check whether that argument matches this MARK's
# Line 712  for (;;) Line 733  for (;;)
733      position and return MATCH_SKIP. Otherwise, pass back the return code      position and return MATCH_SKIP. Otherwise, pass back the return code
734      unaltered. */      unaltered. */
735    
736      if (rrc == MATCH_SKIP_ARG &&      else if (rrc == MATCH_SKIP_ARG &&
737          strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)          STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
738        {        {
739        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
740        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
741        }        }
   
     if (md->mark == NULL) md->mark = markptr;  
742      RRETURN(rrc);      RRETURN(rrc);
743    
744      case OP_FAIL:      case OP_FAIL:
745      MRRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
746    
747      /* COMMIT overrides PRUNE, SKIP, and THEN */      /* COMMIT overrides PRUNE, SKIP, and THEN */
748    
749      case OP_COMMIT:      case OP_COMMIT:
750      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
751        eptrb, RM52);        eptrb, RM52);
752      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
753          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
754          rrc != MATCH_THEN)          rrc != MATCH_THEN)
755        RRETURN(rrc);        RRETURN(rrc);
756      MRRETURN(MATCH_COMMIT);      RRETURN(MATCH_COMMIT);
757    
758      /* PRUNE overrides THEN */      /* PRUNE overrides THEN */
759    
760      case OP_PRUNE:      case OP_PRUNE:
761      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
762        eptrb, RM51);        eptrb, RM51);
763      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
764      MRRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
765    
766      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
767      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      md->nomatch_mark = ecode + 2;
768        md->mark = NULL;    /* In case previously set by assertion */
769        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
770        eptrb, RM56);        eptrb, RM56);
771        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
772             md->mark == NULL) md->mark = ecode + 2;
773      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
     md->mark = ecode + 2;  
774      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
775    
776      /* SKIP overrides PRUNE and THEN */      /* SKIP overrides PRUNE and THEN */
777    
778      case OP_SKIP:      case OP_SKIP:
779      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
780        eptrb, RM53);        eptrb, RM53);
781      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
782        RRETURN(rrc);        RRETURN(rrc);
783      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
784      MRRETURN(MATCH_SKIP);      RRETURN(MATCH_SKIP);
785    
786        /* Note that, for Perl compatibility, SKIP with an argument does NOT set
787        nomatch_mark. There is a flag that disables this opcode when re-matching a
788        pattern that ended with a SKIP for which there was not a matching MARK. */
789    
790      case OP_SKIP_ARG:      case OP_SKIP_ARG:
791      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      if (md->ignore_skip_arg)
792          {
793          ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
794          break;
795          }
796        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
797        eptrb, RM57);        eptrb, RM57);
798      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
799        RRETURN(rrc);        RRETURN(rrc);
800    
801      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
802      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
803      caught by a matching MARK, or get to the top, where it is treated the same      caught by a matching MARK, or get to the top, where it causes a rematch
804      as PRUNE. */      with the md->ignore_skip_arg flag set. */
805    
806      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
807      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
808    
809      /* For THEN (and THEN_ARG) we pass back the address of the bracket or      /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
810      the alt that is at the start of the current branch. This makes it possible      the branch in which it occurs can be determined. Overload the start of
811      to skip back past alternatives that precede the THEN within the current      match pointer to do this. */
     branch. */  
812    
813      case OP_THEN:      case OP_THEN:
814      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
815        eptrb, RM54);        eptrb, RM54);
816      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
817      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode;
818      MRRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
819    
820      case OP_THEN_ARG:      case OP_THEN_ARG:
821      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],      md->nomatch_mark = ecode + 2;
822        offset_top, md, eptrb, RM58);      md->mark = NULL;    /* In case previously set by assertion */
823        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
824          md, eptrb, RM58);
825        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
826             md->mark == NULL) md->mark = ecode + 2;
827      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
828      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode;
     md->mark = ecode + LINK_SIZE + 2;  
829      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
830    
831        /* Handle an atomic group that does not contain any capturing parentheses.
832        This can be handled like an assertion. Prior to 8.13, all atomic groups
833        were handled this way. In 8.13, the code was changed as below for ONCE, so
834        that backups pass through the group and thereby reset captured values.
835        However, this uses a lot more stack, so in 8.20, atomic groups that do not
836        contain any captures generate OP_ONCE_NC, which can be handled in the old,
837        less stack intensive way.
838    
839        Check the alternative branches in turn - the matching won't pass the KET
840        for this kind of subpattern. If any one branch matches, we carry on as at
841        the end of a normal bracket, leaving the subject pointer, but resetting
842        the start-of-match value in case it was changed by \K. */
843    
844        case OP_ONCE_NC:
845        prev = ecode;
846        saved_eptr = eptr;
847        save_mark = md->mark;
848        do
849          {
850          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
851          if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
852            {
853            mstart = md->start_match_ptr;
854            break;
855            }
856          if (rrc == MATCH_THEN)
857            {
858            next = ecode + GET(ecode,1);
859            if (md->start_match_ptr < next &&
860                (*ecode == OP_ALT || *next == OP_ALT))
861              rrc = MATCH_NOMATCH;
862            }
863    
864          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
865          ecode += GET(ecode,1);
866          md->mark = save_mark;
867          }
868        while (*ecode == OP_ALT);
869    
870        /* If hit the end of the group (which could be repeated), fail */
871    
872        if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
873    
874        /* Continue as from after the group, updating the offsets high water
875        mark, since extracts may have been taken. */
876    
877        do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
878    
879        offset_top = md->end_offset_top;
880        eptr = md->end_match_ptr;
881    
882        /* For a non-repeating ket, just continue at this level. This also
883        happens for a repeating ket if no characters were matched in the group.
884        This is the forcible breaking of infinite loops as implemented in Perl
885        5.005. */
886    
887        if (*ecode == OP_KET || eptr == saved_eptr)
888          {
889          ecode += 1+LINK_SIZE;
890          break;
891          }
892    
893        /* The repeating kets try the rest of the pattern or restart from the
894        preceding bracket, in the appropriate order. The second "call" of match()
895        uses tail recursion, to avoid using another stack frame. */
896    
897        if (*ecode == OP_KETRMIN)
898          {
899          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
900          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
901          ecode = prev;
902          goto TAIL_RECURSE;
903          }
904        else  /* OP_KETRMAX */
905          {
906          md->match_function_type = MATCH_CBEGROUP;
907          RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
908          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
909          ecode += 1 + LINK_SIZE;
910          goto TAIL_RECURSE;
911          }
912        /* Control never gets here */
913    
914      /* Handle a capturing bracket, other than those that are possessive with an      /* Handle a capturing bracket, other than those that are possessive with an
915      unlimited repeat. If there is space in the offset vector, save the current      unlimited repeat. If there is space in the offset vector, save the current
916      subject position in the working slot at the top of the vector. We mustn't      subject position in the working slot at the top of the vector. We mustn't
# Line 827  for (;;) Line 943  for (;;)
943        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
944        save_offset3 = md->offset_vector[md->offset_end - number];        save_offset3 = md->offset_vector[md->offset_end - number];
945        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
946          save_mark = md->mark;
947    
948        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
949        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
# Line 835  for (;;) Line 952  for (;;)
952        for (;;)        for (;;)
953          {          {
954          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
955          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
956            eptrb, RM1);            eptrb, RM1);
957          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
958          if (rrc != MATCH_NOMATCH &&  
959              (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* If we backed up to a THEN, check whether it is within the current
960            RRETURN(rrc);          branch by comparing the address of the THEN that is passed back with
961            the end of the branch. If it is within the current branch, and the
962            branch is one of two or more alternatives (it either starts or ends
963            with OP_ALT), we have reached the limit of THEN's action, so convert
964            the return code to NOMATCH, which will cause normal backtracking to
965            happen from now on. Otherwise, THEN is passed back to an outer
966            alternative. This implements Perl's treatment of parenthesized groups,
967            where a group not containing | does not affect the current alternative,
968            that is, (X) is NOT the same as (X|(*F)). */
969    
970            if (rrc == MATCH_THEN)
971              {
972              next = ecode + GET(ecode,1);
973              if (md->start_match_ptr < next &&
974                  (*ecode == OP_ALT || *next == OP_ALT))
975                rrc = MATCH_NOMATCH;
976              }
977    
978            /* Anything other than NOMATCH is passed back. */
979    
980            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
981          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
982          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
983            md->mark = save_mark;
984          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
985          }          }
986    
# Line 851  for (;;) Line 989  for (;;)
989        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
990        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
991    
992        /* At this point, rrc will be one of MATCH_ONCE, MATCH_NOMATCH, or        /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
       MATCH_THEN. */  
993    
994        if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;        RRETURN(rrc);
       RRETURN(((rrc == MATCH_ONCE)? MATCH_ONCE:MATCH_NOMATCH));  
995        }        }
996    
997      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
# Line 870  for (;;) Line 1006  for (;;)
1006      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1007    
1008      /* Non-capturing or atomic group, except for possessive with unlimited      /* Non-capturing or atomic group, except for possessive with unlimited
1009      repeat. Loop for all the alternatives. When we get to the final alternative      repeat and ONCE group with no captures. Loop for all the alternatives.
1010      within the brackets, we used to return the result of a recursive call to  
1011      match() whatever happened so it was possible to reduce stack usage by      When we get to the final alternative within the brackets, we used to return
1012      turning this into a tail recursion, except in the case of a possibly empty      the result of a recursive call to match() whatever happened so it was
1013      group. However, now that there is the possiblity of (*THEN) occurring in      possible to reduce stack usage by turning this into a tail recursion,
1014      the final alternative, this optimization is no longer possible.      except in the case of a possibly empty group. However, now that there is
1015        the possiblity of (*THEN) occurring in the final alternative, this
1016        optimization is no longer always possible.
1017    
1018        We can optimize if we know there are no (*THEN)s in the pattern; at present
1019        this is the best that can be done.
1020    
1021      MATCH_ONCE is returned when the end of an atomic group is successfully      MATCH_ONCE is returned when the end of an atomic group is successfully
1022      reached, but subsequent matching fails. It passes back up the tree (causing      reached, but subsequent matching fails. It passes back up the tree (causing
# Line 892  for (;;) Line 1033  for (;;)
1033      for (;;)      for (;;)
1034        {        {
1035        if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
1036        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,  
1037          /* If this is not a possibly empty group, and there are no (*THEN)s in
1038          the pattern, and this is the final alternative, optimize as described
1039          above. */
1040    
1041          else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1042            {
1043            ecode += PRIV(OP_lengths)[*ecode];
1044            goto TAIL_RECURSE;
1045            }
1046    
1047          /* In all other cases, we have to make another call to match(). */
1048    
1049          save_mark = md->mark;
1050          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1051          RM2);          RM2);
1052        if (rrc != MATCH_NOMATCH &&  
1053            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* See comment in the code for capturing groups above about handling
1054          THEN. */
1055    
1056          if (rrc == MATCH_THEN)
1057            {
1058            next = ecode + GET(ecode,1);
1059            if (md->start_match_ptr < next &&
1060                (*ecode == OP_ALT || *next == OP_ALT))
1061              rrc = MATCH_NOMATCH;
1062            }
1063    
1064          if (rrc != MATCH_NOMATCH)
1065          {          {
1066          if (rrc == MATCH_ONCE)          if (rrc == MATCH_ONCE)
1067            {            {
1068            const uschar *scode = ecode;            const pcre_uchar *scode = ecode;
1069            if (*scode != OP_ONCE)           /* If not at start, find it */            if (*scode != OP_ONCE)           /* If not at start, find it */
1070              {              {
1071              while (*scode == OP_ALT) scode += GET(scode, 1);              while (*scode == OP_ALT) scode += GET(scode, 1);
# Line 910  for (;;) Line 1076  for (;;)
1076          RRETURN(rrc);          RRETURN(rrc);
1077          }          }
1078        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1079          md->mark = save_mark;
1080        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1081        }        }
1082      if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;  
1083      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
1084    
1085      /* Handle possessive capturing brackets with an unlimited repeat. We come      /* Handle possessive capturing brackets with an unlimited repeat. We come
# Line 941  for (;;) Line 1108  for (;;)
1108      if (offset < md->offset_max)      if (offset < md->offset_max)
1109        {        {
1110        matched_once = FALSE;        matched_once = FALSE;
1111        code_offset = ecode - md->start_code;        code_offset = (int)(ecode - md->start_code);
1112    
1113        save_offset1 = md->offset_vector[offset];        save_offset1 = md->offset_vector[offset];
1114        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
# Line 964  for (;;) Line 1131  for (;;)
1131          md->offset_vector[md->offset_end - number] =          md->offset_vector[md->offset_end - number] =
1132            (int)(eptr - md->start_subject);            (int)(eptr - md->start_subject);
1133          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1134          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1135            eptrb, RM63);            eptrb, RM63);
1136          if (rrc == MATCH_KETRPOS)          if (rrc == MATCH_KETRPOS)
1137            {            {
# Line 975  for (;;) Line 1142  for (;;)
1142            matched_once = TRUE;            matched_once = TRUE;
1143            continue;            continue;
1144            }            }
1145          if (rrc != MATCH_NOMATCH &&  
1146              (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* See comment in the code for capturing groups above about handling
1147            RRETURN(rrc);          THEN. */
1148    
1149            if (rrc == MATCH_THEN)
1150              {
1151              next = ecode + GET(ecode,1);
1152              if (md->start_match_ptr < next &&
1153                  (*ecode == OP_ALT || *next == OP_ALT))
1154                rrc = MATCH_NOMATCH;
1155              }
1156    
1157            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1158          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
1159          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
1160          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
# Line 990  for (;;) Line 1167  for (;;)
1167          md->offset_vector[md->offset_end - number] = save_offset3;          md->offset_vector[md->offset_end - number] = save_offset3;
1168          }          }
1169    
       if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;  
1170        if (allow_zero || matched_once)        if (allow_zero || matched_once)
1171          {          {
1172          ecode += 1 + LINK_SIZE;          ecode += 1 + LINK_SIZE;
# Line 1022  for (;;) Line 1198  for (;;)
1198    
1199      POSSESSIVE_NON_CAPTURE:      POSSESSIVE_NON_CAPTURE:
1200      matched_once = FALSE;      matched_once = FALSE;
1201      code_offset = ecode - md->start_code;      code_offset = (int)(ecode - md->start_code);
1202    
1203      for (;;)      for (;;)
1204        {        {
1205        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1206        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1207          eptrb, RM48);          eptrb, RM48);
1208        if (rrc == MATCH_KETRPOS)        if (rrc == MATCH_KETRPOS)
1209          {          {
# Line 1037  for (;;) Line 1213  for (;;)
1213          matched_once = TRUE;          matched_once = TRUE;
1214          continue;          continue;
1215          }          }
1216        if (rrc != MATCH_NOMATCH &&  
1217            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* See comment in the code for capturing groups above about handling
1218          RRETURN(rrc);        THEN. */
1219    
1220          if (rrc == MATCH_THEN)
1221            {
1222            next = ecode + GET(ecode,1);
1223            if (md->start_match_ptr < next &&
1224                (*ecode == OP_ALT || *next == OP_ALT))
1225              rrc = MATCH_NOMATCH;
1226            }
1227    
1228          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1229        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1230        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1231        }        }
# Line 1067  for (;;) Line 1253  for (;;)
1253    
1254      if (ecode[LINK_SIZE+1] == OP_CALLOUT)      if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1255        {        {
1256        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
1257          {          {
1258          pcre_callout_block cb;          PUBL(callout_block) cb;
1259          cb.version          = 2;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1260          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1261          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1262    #ifdef COMPILE_PCRE8
1263          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
1264    #else
1265            cb.subject          = (PCRE_SPTR16)md->start_subject;
1266    #endif
1267          cb.subject_length   = (int)(md->end_subject - md->start_subject);          cb.subject_length   = (int)(md->end_subject - md->start_subject);
1268          cb.start_match      = (int)(mstart - md->start_subject);          cb.start_match      = (int)(mstart - md->start_subject);
1269          cb.current_position = (int)(eptr - md->start_subject);          cb.current_position = (int)(eptr - md->start_subject);
# Line 1082  for (;;) Line 1272  for (;;)
1272          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1273          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1274          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1275          cb.mark             = markptr;          cb.mark             = md->nomatch_mark;
1276          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1277          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1278          }          }
1279        ecode += _pcre_OP_lengths[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1280        }        }
1281    
1282      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1103  for (;;) Line 1293  for (;;)
1293        else        else
1294          {          {
1295          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1296          condition =  (recno == RREF_ANY || recno == md->recursive->group_num);          condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1297    
1298          /* If the test is for recursion into a specific subpattern, and it is          /* If the test is for recursion into a specific subpattern, and it is
1299          false, but the test was set up by name, scan the table to see if the          false, but the test was set up by name, scan the table to see if the
1300          name refers to any other numbers, and test them. The condition is true          name refers to any other numbers, and test them. The condition is true
1301          if any one is set. */          if any one is set. */
1302    
1303          if (!condition && condcode == OP_NRREF && recno != RREF_ANY)          if (!condition && condcode == OP_NRREF)
1304            {            {
1305            uschar *slotA = md->name_table;            pcre_uchar *slotA = md->name_table;
1306            for (i = 0; i < md->name_count; i++)            for (i = 0; i < md->name_count; i++)
1307              {              {
1308              if (GET2(slotA, 0) == recno) break;              if (GET2(slotA, 0) == recno) break;
# Line 1125  for (;;) Line 1315  for (;;)
1315    
1316            if (i < md->name_count)            if (i < md->name_count)
1317              {              {
1318              uschar *slotB = slotA;              pcre_uchar *slotB = slotA;
1319              while (slotB > md->name_table)              while (slotB > md->name_table)
1320                {                {
1321                slotB -= md->name_entry_size;                slotB -= md->name_entry_size;
1322                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1323                  {                  {
1324                  condition = GET2(slotB, 0) == md->recursive->group_num;                  condition = GET2(slotB, 0) == md->recursive->group_num;
1325                  if (condition) break;                  if (condition) break;
# Line 1145  for (;;) Line 1335  for (;;)
1335                for (i++; i < md->name_count; i++)                for (i++; i < md->name_count; i++)
1336                  {                  {
1337                  slotB += md->name_entry_size;                  slotB += md->name_entry_size;
1338                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                  if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1339                    {                    {
1340                    condition = GET2(slotB, 0) == md->recursive->group_num;                    condition = GET2(slotB, 0) == md->recursive->group_num;
1341                    if (condition) break;                    if (condition) break;
# Line 1158  for (;;) Line 1348  for (;;)
1348    
1349          /* Chose branch according to the condition */          /* Chose branch according to the condition */
1350    
1351          ecode += condition? 3 : GET(ecode, 1);          ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1352          }          }
1353        }        }
1354    
# Line 1175  for (;;) Line 1365  for (;;)
1365        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
1366          {          {
1367          int refno = offset >> 1;          int refno = offset >> 1;
1368          uschar *slotA = md->name_table;          pcre_uchar *slotA = md->name_table;
1369    
1370          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
1371            {            {
# Line 1189  for (;;) Line 1379  for (;;)
1379    
1380          if (i < md->name_count)          if (i < md->name_count)
1381            {            {
1382            uschar *slotB = slotA;            pcre_uchar *slotB = slotA;
1383            while (slotB > md->name_table)            while (slotB > md->name_table)
1384              {              {
1385              slotB -= md->name_entry_size;              slotB -= md->name_entry_size;
1386              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1387                {                {
1388                offset = GET2(slotB, 0) << 1;                offset = GET2(slotB, 0) << 1;
1389                condition = offset < offset_top &&                condition = offset < offset_top &&
# Line 1211  for (;;) Line 1401  for (;;)
1401              for (i++; i < md->name_count; i++)              for (i++; i < md->name_count; i++)
1402                {                {
1403                slotB += md->name_entry_size;                slotB += md->name_entry_size;
1404                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1405                  {                  {
1406                  offset = GET2(slotB, 0) << 1;                  offset = GET2(slotB, 0) << 1;
1407                  condition = offset < offset_top &&                  condition = offset < offset_top &&
# Line 1226  for (;;) Line 1416  for (;;)
1416    
1417        /* Chose branch according to the condition */        /* Chose branch according to the condition */
1418    
1419        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1420        }        }
1421    
1422      else if (condcode == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
# Line 1251  for (;;) Line 1441  for (;;)
1441          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1442          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1443          }          }
1444        else if (rrc != MATCH_NOMATCH &&  
1445                (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1446          assertion; it is therefore treated as NOMATCH. */
1447    
1448          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1449          {          {
1450          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1451          }          }
# Line 1263  for (;;) Line 1456  for (;;)
1456          }          }
1457        }        }
1458    
1459      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one, can
1460      we used to use tail recursion to avoid using another stack frame, except      use tail recursion to avoid using another stack frame, except when there is
1461      when there was unlimited repeat of a possibly empty group. However, that      unlimited repeat of a possibly empty group. In the latter case, a recursive
1462      strategy no longer works because of the possibilty of (*THEN) being      call to match() is always required, unless the second alternative doesn't
1463      encountered in the branch. A recursive call to match() is always required,      exist, in which case we can just plough on. Note that, for compatibility
1464      unless the second alternative doesn't exist, in which case we can just      with Perl, the | in a conditional group is NOT treated as creating two
1465      plough on. */      alternatives. If a THEN is encountered in the branch, it propagates out to
1466        the enclosing alternative (unless nested in a deeper set of alternatives,
1467        of course). */
1468    
1469      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1470        {        {
1471        if (op == OP_SCOND) md->match_function_type = MATCH_CBEGROUP;        if (op != OP_SCOND)
1472            {
1473            ecode += 1 + LINK_SIZE;
1474            goto TAIL_RECURSE;
1475            }
1476    
1477          md->match_function_type = MATCH_CBEGROUP;
1478        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
       if (rrc == MATCH_THEN && md->start_match_ptr == ecode)  
         rrc = MATCH_NOMATCH;  
1479        RRETURN(rrc);        RRETURN(rrc);
1480        }        }
1481      else                         /* Condition false & no alternative */  
1482         /* Condition false & no alternative; continue after the group. */
1483    
1484        else
1485        {        {
1486        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1487        }        }
# Line 1306  for (;;) Line 1508  for (;;)
1508        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1509        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1510        }        }
1511      ecode += 3;      ecode += 1 + IMM2_SIZE;
1512      break;      break;
1513    
1514    
# Line 1326  for (;;) Line 1528  for (;;)
1528           (md->notempty ||           (md->notempty ||
1529             (md->notempty_atstart &&             (md->notempty_atstart &&
1530               mstart == md->start_subject + md->start_offset)))               mstart == md->start_subject + md->start_offset)))
1531        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1532    
1533      /* Otherwise, we have a match. */      /* Otherwise, we have a match. */
1534    
# Line 1335  for (;;) Line 1537  for (;;)
1537      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1538    
1539      /* For some reason, the macros don't work properly if an expression is      /* For some reason, the macros don't work properly if an expression is
1540      given as the argument to MRRETURN when the heap is in use. */      given as the argument to RRETURN when the heap is in use. */
1541    
1542      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1543      MRRETURN(rrc);      RRETURN(rrc);
1544    
1545      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
1546      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
# Line 1353  for (;;) Line 1555  for (;;)
1555    
1556      case OP_ASSERT:      case OP_ASSERT:
1557      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1558        save_mark = md->mark;
1559      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1560        {        {
1561        condassert = TRUE;        condassert = TRUE;
# Line 1366  for (;;) Line 1569  for (;;)
1569        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1570          {          {
1571          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
         markptr = md->mark;  
1572          break;          break;
1573          }          }
1574        if (rrc != MATCH_NOMATCH &&  
1575            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1576          RRETURN(rrc);        as NOMATCH. */
1577    
1578          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1579        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1580          md->mark = save_mark;
1581        }        }
1582      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1583    
1584      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1585    
1586      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1587    
# Line 1396  for (;;) Line 1601  for (;;)
1601    
1602      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1603      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1604        save_mark = md->mark;
1605      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1606        {        {
1607        condassert = TRUE;        condassert = TRUE;
# Line 1406  for (;;) Line 1612  for (;;)
1612      do      do
1613        {        {
1614        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1615        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);        md->mark = save_mark;
1616          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1617        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1618          {          {
1619          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1620          break;          break;
1621          }          }
1622        if (rrc != MATCH_NOMATCH &&  
1623            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1624          RRETURN(rrc);        as NOMATCH. */
1625    
1626          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1627        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1628        }        }
1629      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1430  for (;;) Line 1639  for (;;)
1639      back a number of characters, not bytes. */      back a number of characters, not bytes. */
1640    
1641      case OP_REVERSE:      case OP_REVERSE:
1642  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1643      if (utf8)      if (utf)
1644        {        {
1645        i = GET(ecode, 1);        i = GET(ecode, 1);
1646        while (i-- > 0)        while (i-- > 0)
1647          {          {
1648          eptr--;          eptr--;
1649          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1650          BACKCHAR(eptr);          BACKCHAR(eptr);
1651          }          }
1652        }        }
# Line 1448  for (;;) Line 1657  for (;;)
1657    
1658        {        {
1659        eptr -= GET(ecode, 1);        eptr -= GET(ecode, 1);
1660        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1661        }        }
1662    
1663      /* Save the earliest consulted character, then skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
# Line 1462  for (;;) Line 1671  for (;;)
1671      function is able to force a failure. */      function is able to force a failure. */
1672    
1673      case OP_CALLOUT:      case OP_CALLOUT:
1674      if (pcre_callout != NULL)      if (PUBL(callout) != NULL)
1675        {        {
1676        pcre_callout_block cb;        PUBL(callout_block) cb;
1677        cb.version          = 2;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1678        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1679        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1680    #ifdef COMPILE_PCRE8
1681        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1682    #else
1683          cb.subject          = (PCRE_SPTR16)md->start_subject;
1684    #endif
1685        cb.subject_length   = (int)(md->end_subject - md->start_subject);        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1686        cb.start_match      = (int)(mstart - md->start_subject);        cb.start_match      = (int)(mstart - md->start_subject);
1687        cb.current_position = (int)(eptr - md->start_subject);        cb.current_position = (int)(eptr - md->start_subject);
# Line 1477  for (;;) Line 1690  for (;;)
1690        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1691        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1692        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1693        cb.mark             = markptr;        cb.mark             = md->nomatch_mark;
1694        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1695        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1696        }        }
1697      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 1505  for (;;) Line 1718  for (;;)
1718        {        {
1719        recursion_info *ri;        recursion_info *ri;
1720        int recno;        int recno;
1721    
1722        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1723        recno = (callpat == md->start_code)? 0 :        recno = (callpat == md->start_code)? 0 :
1724          GET2(callpat, 1 + LINK_SIZE);          GET2(callpat, 1 + LINK_SIZE);
1725    
1726        /* Check for repeating a recursion without advancing the subject pointer.        /* Check for repeating a recursion without advancing the subject pointer.
1727        This should catch convoluted mutual recursions. (Some simple cases are        This should catch convoluted mutual recursions. (Some simple cases are
1728        caught at compile time.) */        caught at compile time.) */
1729    
1730        for (ri = md->recursive; ri != NULL; ri = ri->prevrec)        for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1731          if (recno == ri->group_num && eptr == ri->subject_position)          if (recno == ri->group_num && eptr == ri->subject_position)
1732            RRETURN(PCRE_ERROR_RECURSELOOP);            RRETURN(PCRE_ERROR_RECURSELOOP);
1733    
1734        /* Add to "recursing stack" */        /* Add to "recursing stack" */
# Line 1537  for (;;) Line 1750  for (;;)
1750        else        else
1751          {          {
1752          new_recursive.offset_save =          new_recursive.offset_save =
1753            (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));            (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1754          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1755          }          }
1756        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
# Line 1552  for (;;) Line 1765  for (;;)
1765        do        do
1766          {          {
1767          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1768          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1769            md, eptrb, RM6);            md, eptrb, RM6);
1770          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1771              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1772            md->recursive = new_recursive.prevrec;
1773          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1774            {            {
1775            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
           md->recursive = new_recursive.prevrec;  
1776            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1777              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1778    
1779            /* Set where we got to in the subject, and reset the start in case            /* Set where we got to in the subject, and reset the start in case
1780            it was changed by \K. This *is* propagated back out of a recursion,            it was changed by \K. This *is* propagated back out of a recursion,
# Line 1571  for (;;) Line 1784  for (;;)
1784            mstart = md->start_match_ptr;            mstart = md->start_match_ptr;
1785            goto RECURSION_MATCHED;        /* Exit loop; end processing */            goto RECURSION_MATCHED;        /* Exit loop; end processing */
1786            }            }
1787          else if (rrc != MATCH_NOMATCH &&  
1788                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* PCRE does not allow THEN to escape beyond a recursion; it is treated
1789            as NOMATCH. */
1790    
1791            else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1792            {            {
1793            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1794            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1795              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1796            RRETURN(rrc);            RRETURN(rrc);
1797            }            }
1798    
# Line 1588  for (;;) Line 1804  for (;;)
1804        DPRINTF(("Recursion didn't match\n"));        DPRINTF(("Recursion didn't match\n"));
1805        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1806        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1807          (pcre_free)(new_recursive.offset_save);          (PUBL(free))(new_recursive.offset_save);
1808        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1809        }        }
1810    
1811      RECURSION_MATCHED:      RECURSION_MATCHED:
# Line 1658  for (;;) Line 1874  for (;;)
1874        }        }
1875      else saved_eptr = NULL;      else saved_eptr = NULL;
1876    
1877      /* If we are at the end of an assertion group, stop matching and return      /* If we are at the end of an assertion group or a non-capturing atomic
1878      MATCH_MATCH, but record the current high water mark for use by positive      group, stop matching and return MATCH_MATCH, but record the current high
1879      assertions. We also need to record the match start in case it was changed      water mark for use by positive assertions. We also need to record the match
1880      by \K. */      start in case it was changed by \K. */
1881    
1882      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1883          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT)           *prev == OP_ONCE_NC)
1884        {        {
1885        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE_NC */
1886        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1887        md->start_match_ptr = mstart;        md->start_match_ptr = mstart;
1888        MRRETURN(MATCH_MATCH);         /* Sets md->mark */        RRETURN(MATCH_MATCH);         /* Sets md->mark */
1889        }        }
1890    
1891      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
# Line 1734  for (;;) Line 1950  for (;;)
1950      /* For an ordinary non-repeating ket, just continue at this level. This      /* For an ordinary non-repeating ket, just continue at this level. This
1951      also happens for a repeating ket if no characters were matched in the      also happens for a repeating ket if no characters were matched in the
1952      group. This is the forcible breaking of infinite loops as implemented in      group. This is the forcible breaking of infinite loops as implemented in
1953      Perl 5.005. For a non-repeating atomic group, establish a backup point by      Perl 5.005. For a non-repeating atomic group that includes captures,
1954      processing the rest of the pattern at a lower level. If this results in a      establish a backup point by processing the rest of the pattern at a lower
1955      NOMATCH return, pass MATCH_ONCE back to the original OP_ONCE level, thereby      level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1956      bypassing intermediate backup points, but resetting any captures that      original OP_ONCE level, thereby bypassing intermediate backup points, but
1957      happened along the way. */      resetting any captures that happened along the way. */
1958    
1959      if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1960        {        {
# Line 1811  for (;;) Line 2027  for (;;)
2027      /* Not multiline mode: start of subject assertion, unless notbol. */      /* Not multiline mode: start of subject assertion, unless notbol. */
2028    
2029      case OP_CIRC:      case OP_CIRC:
2030      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2031    
2032      /* Start of subject assertion */      /* Start of subject assertion */
2033    
2034      case OP_SOD:      case OP_SOD:
2035      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2036      ecode++;      ecode++;
2037      break;      break;
2038    
2039      /* Multiline mode: start of subject unless notbol, or after any newline. */      /* Multiline mode: start of subject unless notbol, or after any newline. */
2040    
2041      case OP_CIRCM:      case OP_CIRCM:
2042      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2043      if (eptr != md->start_subject &&      if (eptr != md->start_subject &&
2044          (eptr == md->end_subject || !WAS_NEWLINE(eptr)))          (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2045        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2046      ecode++;      ecode++;
2047      break;      break;
2048    
2049      /* Start of match assertion */      /* Start of match assertion */
2050    
2051      case OP_SOM:      case OP_SOM:
2052      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2053      ecode++;      ecode++;
2054      break;      break;
2055    
# Line 1849  for (;;) Line 2065  for (;;)
2065    
2066      case OP_DOLLM:      case OP_DOLLM:
2067      if (eptr < md->end_subject)      if (eptr < md->end_subject)
2068        { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }        {
2069          if (!IS_NEWLINE(eptr))
2070            {
2071            if (eptr + 1 >= md->end_subject &&
2072                md->partial != 0 &&
2073                NLBLOCK->nltype == NLTYPE_FIXED &&
2074                NLBLOCK->nllen == 2 &&
2075                *eptr == NLBLOCK->nl[0])
2076              {
2077              md->hitend = TRUE;
2078              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2079              }
2080            RRETURN(MATCH_NOMATCH);
2081            }
2082          }
2083      else      else
2084        {        {
2085        if (md->noteol) MRRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
2086        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2087        }        }
2088      ecode++;      ecode++;
# Line 1862  for (;;) Line 2092  for (;;)
2092      subject unless noteol is set. */      subject unless noteol is set. */
2093    
2094      case OP_DOLL:      case OP_DOLL:
2095      if (md->noteol) MRRETURN(MATCH_NOMATCH);      if (md->noteol) RRETURN(MATCH_NOMATCH);
2096      if (!md->endonly) goto ASSERT_NL_OR_EOS;      if (!md->endonly) goto ASSERT_NL_OR_EOS;
2097    
2098      /* ... else fall through for endonly */      /* ... else fall through for endonly */
# Line 1870  for (;;) Line 2100  for (;;)
2100      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
2101    
2102      case OP_EOD:      case OP_EOD:
2103      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2104      SCHECK_PARTIAL();      SCHECK_PARTIAL();
2105      ecode++;      ecode++;
2106      break;      break;
# Line 1881  for (;;) Line 2111  for (;;)
2111      ASSERT_NL_OR_EOS:      ASSERT_NL_OR_EOS:
2112      if (eptr < md->end_subject &&      if (eptr < md->end_subject &&
2113          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2114        MRRETURN(MATCH_NOMATCH);        {
2115          if (eptr + 1 >= md->end_subject &&
2116              md->partial != 0 &&
2117              NLBLOCK->nltype == NLTYPE_FIXED &&
2118              NLBLOCK->nllen == 2 &&
2119              *eptr == NLBLOCK->nl[0])
2120            {
2121            md->hitend = TRUE;
2122            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2123            }
2124          RRETURN(MATCH_NOMATCH);
2125          }
2126    
2127      /* Either at end of string or \n before end. */      /* Either at end of string or \n before end. */
2128    
# Line 1900  for (;;) Line 2141  for (;;)
2141        be "non-word" characters. Remember the earliest consulted character for        be "non-word" characters. Remember the earliest consulted character for
2142        partial matching. */        partial matching. */
2143    
2144  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2145        if (utf8)        if (utf)
2146          {          {
2147          /* Get status of previous character */          /* Get status of previous character */
2148    
2149          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
2150            {            {
2151            USPTR lastptr = eptr - 1;            PCRE_PUCHAR lastptr = eptr - 1;
2152            while((*lastptr & 0xc0) == 0x80) lastptr--;            BACKCHAR(lastptr);
2153            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2154            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
2155  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 1973  for (;;) Line 2214  for (;;)
2214              }              }
2215            else            else
2216  #endif  #endif
2217            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = MAX_255(eptr[-1])
2218                && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2219            }            }
2220    
2221          /* Get status of next character */          /* Get status of next character */
# Line 1996  for (;;) Line 2238  for (;;)
2238            }            }
2239          else          else
2240  #endif  #endif
2241          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          cur_is_word = MAX_255(*eptr)
2242              && ((md->ctypes[*eptr] & ctype_word) != 0);
2243          }          }
2244    
2245        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
2246    
2247        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
2248             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2249          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2250        }        }
2251      break;      break;
2252    
2253      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
2254    
2255      case OP_ANY:      case OP_ANY:
2256      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2257      /* Fall through */      /* Fall through */
2258    
2259      case OP_ALLANY:      case OP_ALLANY:
2260      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2261        {                            /* not be updated before SCHECK_PARTIAL. */        {                            /* not be updated before SCHECK_PARTIAL. */
2262        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2263        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2264        }        }
2265      eptr++;      eptr++;
2266      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  #ifdef SUPPORT_UTF
2267        if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2268    #endif
2269      ecode++;      ecode++;
2270      break;      break;
2271    
# Line 2031  for (;;) Line 2276  for (;;)
2276      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2277        {                            /* not be updated before SCHECK_PARTIAL. */        {                            /* not be updated before SCHECK_PARTIAL. */
2278        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2279        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2280        }        }
2281      eptr++;      eptr++;
2282      ecode++;      ecode++;
2283      break;      break;
2284    
# Line 2041  for (;;) Line 2286  for (;;)
2286      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2287        {        {
2288        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2289        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2290        }        }
2291      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2292      if (      if (
2293  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2294         c < 256 &&         c < 256 &&
2295  #endif  #endif
2296         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
2297         )         )
2298        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2299      ecode++;      ecode++;
2300      break;      break;
2301    
# Line 2058  for (;;) Line 2303  for (;;)
2303      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2304        {        {
2305        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2306        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2307        }        }
2308      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2309      if (      if (
2310  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2311         c >= 256 ||         c > 255 ||
2312  #endif  #endif
2313         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
2314         )         )
2315        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2316      ecode++;      ecode++;
2317      break;      break;
2318    
# Line 2075  for (;;) Line 2320  for (;;)
2320      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2321        {        {
2322        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2323        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2324        }        }
2325      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2326      if (      if (
2327  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2328         c < 256 &&         c < 256 &&
2329  #endif  #endif
2330         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
2331         )         )
2332        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2333      ecode++;      ecode++;
2334      break;      break;
2335    
# Line 2092  for (;;) Line 2337  for (;;)
2337      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2338        {        {
2339        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2340        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2341        }        }
2342      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2343      if (      if (
2344  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2345         c >= 256 ||         c > 255 ||
2346  #endif  #endif
2347         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
2348         )         )
2349        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2350      ecode++;      ecode++;
2351      break;      break;
2352    
# Line 2109  for (;;) Line 2354  for (;;)
2354      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2355        {        {
2356        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2357        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2358        }        }
2359      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2360      if (      if (
2361  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2362         c < 256 &&         c < 256 &&
2363  #endif  #endif
2364         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
2365         )         )
2366        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2367      ecode++;      ecode++;
2368      break;      break;
2369    
# Line 2126  for (;;) Line 2371  for (;;)
2371      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2372        {        {
2373        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2374        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2375        }        }
2376      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2377      if (      if (
2378  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2379         c >= 256 ||         c > 255 ||
2380  #endif  #endif
2381         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
2382         )         )
2383        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2384      ecode++;      ecode++;
2385      break;      break;
2386    
# Line 2143  for (;;) Line 2388  for (;;)
2388      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2389        {        {
2390        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2391        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2392        }        }
2393      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2394      switch(c)      switch(c)
2395        {        {
2396        default: MRRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2397    
2398        case 0x000d:        case 0x000d:
2399        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr >= md->end_subject)
2400            {
2401            SCHECK_PARTIAL();
2402            }
2403          else if (*eptr == 0x0a) eptr++;
2404        break;        break;
2405    
2406        case 0x000a:        case 0x000a:
# Line 2162  for (;;) Line 2411  for (;;)
2411        case 0x0085:        case 0x0085:
2412        case 0x2028:        case 0x2028:
2413        case 0x2029:        case 0x2029:
2414        if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);        if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2415        break;        break;
2416        }        }
2417      ecode++;      ecode++;
# Line 2172  for (;;) Line 2421  for (;;)
2421      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2422        {        {
2423        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2424        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2425        }        }
2426      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2427      switch(c)      switch(c)
# Line 2197  for (;;) Line 2446  for (;;)
2446        case 0x202f:    /* NARROW NO-BREAK SPACE */        case 0x202f:    /* NARROW NO-BREAK SPACE */
2447        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2448        case 0x3000:    /* IDEOGRAPHIC SPACE */        case 0x3000:    /* IDEOGRAPHIC SPACE */
2449        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2450        }        }
2451      ecode++;      ecode++;
2452      break;      break;
# Line 2206  for (;;) Line 2455  for (;;)
2455      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2456        {        {
2457        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2458        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2459        }        }
2460      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2461      switch(c)      switch(c)
2462        {        {
2463        default: MRRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2464        case 0x09:      /* HT */        case 0x09:      /* HT */
2465        case 0x20:      /* SPACE */        case 0x20:      /* SPACE */
2466        case 0xa0:      /* NBSP */        case 0xa0:      /* NBSP */
# Line 2240  for (;;) Line 2489  for (;;)
2489      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2490        {        {
2491        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2492        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2493        }        }
2494      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2495      switch(c)      switch(c)
# Line 2253  for (;;) Line 2502  for (;;)
2502        case 0x85:      /* NEL */        case 0x85:      /* NEL */
2503        case 0x2028:    /* LINE SEPARATOR */        case 0x2028:    /* LINE SEPARATOR */
2504        case 0x2029:    /* PARAGRAPH SEPARATOR */        case 0x2029:    /* PARAGRAPH SEPARATOR */
2505        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2506        }        }
2507      ecode++;      ecode++;
2508      break;      break;
# Line 2262  for (;;) Line 2511  for (;;)
2511      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2512        {        {
2513        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2514        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2515        }        }
2516      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2517      switch(c)      switch(c)
2518        {        {
2519        default: MRRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2520        case 0x0a:      /* LF */        case 0x0a:      /* LF */
2521        case 0x0b:      /* VT */        case 0x0b:      /* VT */
2522        case 0x0c:      /* FF */        case 0x0c:      /* FF */
# Line 2289  for (;;) Line 2538  for (;;)
2538      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2539        {        {
2540        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2541        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2542        }        }
2543      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2544        {        {
# Line 2298  for (;;) Line 2547  for (;;)
2547        switch(ecode[1])        switch(ecode[1])
2548          {          {
2549          case PT_ANY:          case PT_ANY:
2550          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2551          break;          break;
2552    
2553          case PT_LAMP:          case PT_LAMP:
2554          if ((prop->chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2555               prop->chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2556               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2557            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2558          break;          break;
2559    
2560          case PT_GC:          case PT_GC:
2561          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2562            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2563          break;          break;
2564    
2565          case PT_PC:          case PT_PC:
2566          if ((ecode[2] != prop->chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2567            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2568          break;          break;
2569    
2570          case PT_SC:          case PT_SC:
2571          if ((ecode[2] != prop->script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2572            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2573          break;          break;
2574    
2575          /* These are specials */          /* These are specials */
2576    
2577          case PT_ALNUM:          case PT_ALNUM:
2578          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2579               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2580            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2581          break;          break;
2582    
2583          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2584          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2585               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2586                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2587            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2588          break;          break;
2589    
2590          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2591          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2592               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2593               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2594                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2595            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2596          break;          break;
2597    
2598          case PT_WORD:          case PT_WORD:
2599          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2600               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2601               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2602            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2603          break;          break;
2604    
2605          /* This should never occur */          /* This should never occur */
# Line 2370  for (;;) Line 2619  for (;;)
2619      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2620        {        {
2621        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2622        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2623        }        }
2624      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2625      if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH);      if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
2626      while (eptr < md->end_subject)      while (eptr < md->end_subject)
2627        {        {
2628        int len = 1;        int len = 1;
2629        if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }        if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2630        if (UCD_CATEGORY(c) != ucp_M) break;        if (UCD_CATEGORY(c) != ucp_M) break;
2631        eptr += len;        eptr += len;
2632        }        }
2633        CHECK_PARTIAL();
2634      ecode++;      ecode++;
2635      break;      break;
2636  #endif  #endif
# Line 2398  for (;;) Line 2648  for (;;)
2648      case OP_REFI:      case OP_REFI:
2649      caseless = op == OP_REFI;      caseless = op == OP_REFI;
2650      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2651      ecode += 3;      ecode += 1 + IMM2_SIZE;
2652    
2653      /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2654    
# Line 2438  for (;;) Line 2688  for (;;)
2688        case OP_CRMINRANGE:        case OP_CRMINRANGE:
2689        minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2690        min = GET2(ecode, 1);        min = GET2(ecode, 1);
2691        max = GET2(ecode, 3);        max = GET2(ecode, 1 + IMM2_SIZE);
2692        if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2693        ecode += 5;        ecode += 1 + 2 * IMM2_SIZE;
2694        break;        break;
2695    
2696        default:               /* No repeat follows */        default:               /* No repeat follows */
2697        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2698          {          {
2699            if (length == -2) eptr = md->end_subject;   /* Partial match */
2700          CHECK_PARTIAL();          CHECK_PARTIAL();
2701          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2702          }          }
2703        eptr += length;        eptr += length;
2704        continue;              /* With the main loop */        continue;              /* With the main loop */
2705        }        }
2706    
2707      /* Handle repeated back references. If the length of the reference is      /* Handle repeated back references. If the length of the reference is
2708      zero, just continue with the main loop. */      zero, just continue with the main loop. If the length is negative, it
2709        means the reference is unset in non-Java-compatible mode. If the minimum is
2710        zero, we can continue at the same level without recursion. For any other
2711        minimum, carrying on will result in NOMATCH. */
2712    
2713      if (length == 0) continue;      if (length == 0) continue;
2714        if (length < 0 && min == 0) continue;
2715    
2716      /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2717      the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
# Line 2467  for (;;) Line 2722  for (;;)
2722        int slength;        int slength;
2723        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2724          {          {
2725            if (slength == -2) eptr = md->end_subject;   /* Partial match */
2726          CHECK_PARTIAL();          CHECK_PARTIAL();
2727          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2728          }          }
2729        eptr += slength;        eptr += slength;
2730        }        }
# Line 2487  for (;;) Line 2743  for (;;)
2743          int slength;          int slength;
2744          RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2745          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2746          if (fi >= max) MRRETURN(MATCH_NOMATCH);          if (fi >= max) RRETURN(MATCH_NOMATCH);
2747          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2748            {            {
2749              if (slength == -2) eptr = md->end_subject;   /* Partial match */
2750            CHECK_PARTIAL();            CHECK_PARTIAL();
2751            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2752            }            }
2753          eptr += slength;          eptr += slength;
2754          }          }
# Line 2508  for (;;) Line 2765  for (;;)
2765          int slength;          int slength;
2766          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2767            {            {
2768            CHECK_PARTIAL();            /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2769              the soft partial matching case. */
2770    
2771              if (slength == -2 && md->partial != 0 &&
2772                  md->end_subject > md->start_used_ptr)
2773                {
2774                md->hitend = TRUE;
2775                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2776                }
2777            break;            break;
2778            }            }
2779          eptr += slength;          eptr += slength;
2780          }          }
2781    
2782        while (eptr >= pp)        while (eptr >= pp)
2783          {          {
2784          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2785          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2786          eptr -= length;          eptr -= length;
2787          }          }
2788        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2789        }        }
2790      /* Control never gets here */      /* Control never gets here */
2791    
# Line 2537  for (;;) Line 2803  for (;;)
2803      case OP_NCLASS:      case OP_NCLASS:
2804      case OP_CLASS:      case OP_CLASS:
2805        {        {
2806          /* The data variable is saved across frames, so the byte map needs to
2807          be stored there. */
2808    #define BYTE_MAP ((pcre_uint8 *)data)
2809        data = ecode + 1;                /* Save for matching */        data = ecode + 1;                /* Save for matching */
2810        ecode += 33;                     /* Advance past the item */        ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2811    
2812        switch (*ecode)        switch (*ecode)
2813          {          {
# Line 2559  for (;;) Line 2828  for (;;)
2828          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2829          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2830          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2831          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2832          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2833          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2834          break;          break;
2835    
2836          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2571  for (;;) Line 2840  for (;;)
2840    
2841        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
2842    
2843  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2844        /* UTF-8 mode */        if (utf)
       if (utf8)  
2845          {          {
2846          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2847            {            {
2848            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2849              {              {
2850              SCHECK_PARTIAL();              SCHECK_PARTIAL();
2851              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2852              }              }
2853            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2854            if (c > 255)            if (c > 255)
2855              {              {
2856              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2857              }              }
2858            else            else
2859              {              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  
             }  
2860            }            }
2861          }          }
2862        else        else
2863  #endif  #endif
2864        /* Not UTF-8 mode */        /* Not UTF mode */
2865          {          {
2866          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2867            {            {
2868            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2869              {              {
2870              SCHECK_PARTIAL();              SCHECK_PARTIAL();
2871              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2872              }              }
2873            c = *eptr++;            c = *eptr++;
2874            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2875              if (c > 255)
2876                {
2877                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2878                }
2879              else
2880    #endif
2881                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2882            }            }
2883          }          }
2884    
# Line 2619  for (;;) Line 2892  for (;;)
2892    
2893        if (minimize)        if (minimize)
2894          {          {
2895  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2896          /* UTF-8 mode */          if (utf)
         if (utf8)  
2897            {            {
2898            for (fi = min;; fi++)            for (fi = min;; fi++)
2899              {              {
2900              RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2901              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2902              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2903              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2904                {                {
2905                SCHECK_PARTIAL();                SCHECK_PARTIAL();
2906                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2907                }                }
2908              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2909              if (c > 255)              if (c > 255)
2910                {                {
2911                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2912                }                }
2913              else              else
2914                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
               if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  
               }  
2915              }              }
2916            }            }
2917          else          else
2918  #endif  #endif
2919          /* Not UTF-8 mode */          /* Not UTF mode */
2920            {            {
2921            for (fi = min;; fi++)            for (fi = min;; fi++)
2922              {              {
2923              RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2924              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2925              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2926              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2927                {                {
2928                SCHECK_PARTIAL();                SCHECK_PARTIAL();
2929                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2930                }                }
2931              c = *eptr++;              c = *eptr++;
2932              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2933                if (c > 255)
2934                  {
2935                  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2936                  }
2937                else
2938    #endif
2939                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2940              }              }
2941            }            }
2942          /* Control never gets here */          /* Control never gets here */
# Line 2671  for (;;) Line 2948  for (;;)
2948          {          {
2949          pp = eptr;          pp = eptr;
2950    
2951  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2952          /* UTF-8 mode */          if (utf)
         if (utf8)  
2953            {            {
2954            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2955              {              {
# Line 2689  for (;;) Line 2965  for (;;)
2965                if (op == OP_CLASS) break;                if (op == OP_CLASS) break;
2966                }                }
2967              else              else
2968                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
               if ((data[c/8] & (1 << (c&7))) == 0) break;  
               }  
2969              eptr += len;              eptr += len;
2970              }              }
2971            for (;;)            for (;;)
# Line 2704  for (;;) Line 2978  for (;;)
2978            }            }
2979          else          else
2980  #endif  #endif
2981            /* Not UTF-8 mode */            /* Not UTF mode */
2982            {            {
2983            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2984              {              {
# Line 2714  for (;;) Line 2988  for (;;)
2988                break;                break;
2989                }                }
2990              c = *eptr;              c = *eptr;
2991              if ((data[c/8] & (1 << (c&7))) == 0) break;  #ifndef COMPILE_PCRE8
2992                if (c > 255)
2993                  {
2994                  if (op == OP_CLASS) break;
2995                  }
2996                else
2997    #endif
2998                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2999              eptr++;              eptr++;
3000              }              }
3001            while (eptr >= pp)            while (eptr >= pp)
# Line 2725  for (;;) Line 3006  for (;;)
3006              }              }
3007            }            }
3008    
3009          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3010          }          }
3011    #undef BYTE_MAP
3012        }        }
3013      /* Control never gets here */      /* Control never gets here */
3014    
# Line 2735  for (;;) Line 3017  for (;;)
3017      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
3018      mode, because Unicode properties are supported in non-UTF-8 mode. */      mode, because Unicode properties are supported in non-UTF-8 mode. */
3019    
3020  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3021      case OP_XCLASS:      case OP_XCLASS:
3022        {        {
3023        data = ecode + 1 + LINK_SIZE;                /* Save for matching */        data = ecode + 1 + LINK_SIZE;                /* Save for matching */
# Line 2760  for (;;) Line 3042  for (;;)
3042          case OP_CRMINRANGE:          case OP_CRMINRANGE:
3043          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
3044          min = GET2(ecode, 1);          min = GET2(ecode, 1);
3045          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
3046          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
3047          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
3048          break;          break;
3049    
3050          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2777  for (;;) Line 3059  for (;;)
3059          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3060            {            {
3061            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3062            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3063            }            }
3064          GETCHARINCTEST(c, eptr);          GETCHARINCTEST(c, eptr);
3065          if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);          if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3066          }          }
3067    
3068        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 2797  for (;;) Line 3079  for (;;)
3079            {            {
3080            RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3081            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3082            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3083            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3084              {              {
3085              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3086              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3087              }              }
3088            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3089            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);            if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3090            }            }
3091          /* Control never gets here */          /* Control never gets here */
3092          }          }
# Line 2822  for (;;) Line 3104  for (;;)
3104              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3105              break;              break;
3106              }              }
3107    #ifdef SUPPORT_UTF
3108            GETCHARLENTEST(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
3109            if (!_pcre_xclass(c, data)) break;  #else
3110              c = *eptr;
3111    #endif
3112              if (!PRIV(xclass)(c, data, utf)) break;
3113            eptr += len;            eptr += len;
3114            }            }
3115          for(;;)          for(;;)
# Line 2831  for (;;) Line 3117  for (;;)
3117            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3118            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3119            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3120            if (utf8) BACKCHAR(eptr);  #ifdef SUPPORT_UTF
3121              if (utf) BACKCHAR(eptr);
3122    #endif
3123            }            }
3124          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3125          }          }
3126    
3127        /* Control never gets here */        /* Control never gets here */
# Line 2843  for (;;) Line 3131  for (;;)
3131      /* Match a single character, casefully */      /* Match a single character, casefully */
3132    
3133      case OP_CHAR:      case OP_CHAR:
3134  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3135      if (utf8)      if (utf)
3136        {        {
3137        length = 1;        length = 1;
3138        ecode++;        ecode++;
# Line 2852  for (;;) Line 3140  for (;;)
3140        if (length > md->end_subject - eptr)        if (length > md->end_subject - eptr)
3141          {          {
3142          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3143          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3144          }          }
3145        while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
3146        }        }
3147      else      else
3148  #endif  #endif
3149        /* Not UTF mode */
     /* Non-UTF-8 mode */  
3150        {        {
3151        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
3152          {          {
3153          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3154          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3155          }          }
3156        if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3157        ecode += 2;        ecode += 2;
3158        }        }
3159      break;      break;
3160    
3161      /* Match a single character, caselessly */      /* Match a single character, caselessly. If we are at the end of the
3162        subject, give up immediately. */
3163    
3164      case OP_CHARI:      case OP_CHARI:
3165  #ifdef SUPPORT_UTF8      if (eptr >= md->end_subject)
3166      if (utf8)        {
3167          SCHECK_PARTIAL();
3168          RRETURN(MATCH_NOMATCH);
3169          }
3170    
3171    #ifdef SUPPORT_UTF
3172        if (utf)
3173        {        {
3174        length = 1;        length = 1;
3175        ecode++;        ecode++;
3176        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
3177    
       if (length > md->end_subject - eptr)  
         {  
         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */  
         MRRETURN(MATCH_NOMATCH);  
         }  
   
3178        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
3179        can use the fast lookup table. */        we know that its other case must also be one byte long, so we can use the
3180          fast lookup table. We know that there is at least one byte left in the
3181          subject. */
3182    
3183        if (fc < 128)        if (fc < 128)
3184          {          {
3185          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);          if (md->lcc[fc]
3186                != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3187            ecode++;
3188            eptr++;
3189          }          }
3190    
3191        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character. Note that we cannot
3192          use the value of "length" to check for sufficient bytes left, because the
3193          other case of the character may have more or fewer bytes.  */
3194    
3195        else        else
3196          {          {
# Line 2911  for (;;) Line 3206  for (;;)
3206  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3207            if (dc != UCD_OTHERCASE(fc))            if (dc != UCD_OTHERCASE(fc))
3208  #endif  #endif
3209              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3210            }            }
3211          }          }
3212        }        }
3213      else      else
3214  #endif   /* SUPPORT_UTF8 */  #endif   /* SUPPORT_UTF */
3215    
3216      /* Non-UTF-8 mode */      /* Not UTF mode */
3217        {        {
3218        if (md->end_subject - eptr < 1)        if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3219          {            != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3220          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */        eptr++;
         MRRETURN(MATCH_NOMATCH);  
         }  
       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);  
3221        ecode += 2;        ecode += 2;
3222        }        }
3223      break;      break;
# Line 2935  for (;;) Line 3227  for (;;)
3227      case OP_EXACT:      case OP_EXACT:
3228      case OP_EXACTI:      case OP_EXACTI:
3229      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3230      ecode += 3;      ecode += 1 + IMM2_SIZE;
3231      goto REPEATCHAR;      goto REPEATCHAR;
3232    
3233      case OP_POSUPTO:      case OP_POSUPTO:
# Line 2950  for (;;) Line 3242  for (;;)
3242      min = 0;      min = 0;
3243      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3244      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3245      ecode += 3;      ecode += 1 + IMM2_SIZE;
3246      goto REPEATCHAR;      goto REPEATCHAR;
3247    
3248      case OP_POSSTAR:      case OP_POSSTAR:
# Line 2998  for (;;) Line 3290  for (;;)
3290      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. */
3291    
3292      REPEATCHAR:      REPEATCHAR:
3293  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3294      if (utf8)      if (utf)
3295        {        {
3296        length = 1;        length = 1;
3297        charptr = ecode;        charptr = ecode;
# Line 3015  for (;;) Line 3307  for (;;)
3307          unsigned int othercase;          unsigned int othercase;
3308          if (op >= OP_STARI &&     /* Caseless */          if (op >= OP_STARI &&     /* Caseless */
3309              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
3310            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = PRIV(ord2utf)(othercase, occhars);
3311          else oclength = 0;          else oclength = 0;
3312  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3313    
3314          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3315            {            {
3316            if (eptr <= md->end_subject - length &&            if (eptr <= md->end_subject - length &&
3317              memcmp(eptr, charptr, length) == 0) eptr += length;              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3318  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3319            else if (oclength > 0 &&            else if (oclength > 0 &&
3320                     eptr <= md->end_subject - oclength &&                     eptr <= md->end_subject - oclength &&
3321                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3322  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3323            else            else
3324              {              {
3325              CHECK_PARTIAL();              CHECK_PARTIAL();
3326              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3327              }              }
3328            }            }
3329    
# Line 3043  for (;;) Line 3335  for (;;)
3335              {              {
3336              RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3337              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3338              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3339              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3340                memcmp(eptr, charptr, length) == 0) eptr += length;                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3341  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3342              else if (oclength > 0 &&              else if (oclength > 0 &&
3343                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3344                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3345  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3346              else              else
3347                {                {
3348                CHECK_PARTIAL();                CHECK_PARTIAL();
3349                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3350                }                }
3351              }              }
3352            /* Control never gets here */            /* Control never gets here */
# Line 3066  for (;;) Line 3358  for (;;)
3358            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3359              {              {
3360              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3361                  memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3362  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3363              else if (oclength > 0 &&              else if (oclength > 0 &&
3364                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3365                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3366  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3367              else              else
3368                {                {
# Line 3085  for (;;) Line 3377  for (;;)
3377              {              {
3378              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3379              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3380              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }              if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
3381  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3382              eptr--;              eptr--;
3383              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 3102  for (;;) Line 3394  for (;;)
3394        value of fc will always be < 128. */        value of fc will always be < 128. */
3395        }        }
3396      else      else
3397  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3398          /* When not in UTF-8 mode, load a single-byte character. */
3399          fc = *ecode++;
3400    
3401      /* When not in UTF-8 mode, load a single-byte character. */      /* The value of fc at this point is always one character, though we may
3402        or may not be in UTF mode. The code is duplicated for the caseless and
     fc = *ecode++;  
   
     /* The value of fc at this point is always less than 256, though we may or  
     may not be in UTF-8 mode. The code is duplicated for the caseless and  
3403      caseful cases, for speed, since matching characters is likely to be quite      caseful cases, for speed, since matching characters is likely to be quite
3404      common. First, ensure the minimum number of matches are present. If min =      common. First, ensure the minimum number of matches are present. If min =
3405      max, continue at the same level without recursing. Otherwise, if      max, continue at the same level without recursing. Otherwise, if
# Line 3122  for (;;) Line 3412  for (;;)
3412    
3413      if (op >= OP_STARI)  /* Caseless */      if (op >= OP_STARI)  /* Caseless */
3414        {        {
3415        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3416          /* fc must be < 128 if UTF is enabled. */
3417          foc = md->fcc[fc];
3418    #else
3419    #ifdef SUPPORT_UTF
3420    #ifdef SUPPORT_UCP
3421          if (utf && fc > 127)
3422            foc = UCD_OTHERCASE(fc);
3423    #else
3424          if (utf && fc > 127)
3425            foc = fc;
3426    #endif /* SUPPORT_UCP */
3427          else
3428    #endif /* SUPPORT_UTF */
3429            foc = TABLE_GET(fc, md->fcc, fc);
3430    #endif /* COMPILE_PCRE8 */
3431    
3432        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3433          {          {
3434          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3435            {            {
3436            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3437            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3438            }            }
3439          if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);          if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3440            eptr++;
3441          }          }
3442        if (min == max) continue;        if (min == max) continue;
3443        if (minimize)        if (minimize)
# Line 3139  for (;;) Line 3446  for (;;)
3446            {            {
3447            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3448            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3449            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3450            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3451              {              {
3452              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3453              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3454              }              }
3455            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);            if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3456              eptr++;
3457            }            }
3458          /* Control never gets here */          /* Control never gets here */
3459          }          }
# Line 3159  for (;;) Line 3467  for (;;)
3467              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3468              break;              break;
3469              }              }
3470            if (fc != md->lcc[*eptr]) break;            if (fc != *eptr && foc != *eptr) break;
3471            eptr++;            eptr++;
3472            }            }
3473    
# Line 3171  for (;;) Line 3479  for (;;)
3479            eptr--;            eptr--;
3480            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3481            }            }
3482          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3483          }          }
3484        /* Control never gets here */        /* Control never gets here */
3485        }        }
# Line 3185  for (;;) Line 3493  for (;;)
3493          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3494            {            {
3495            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3496            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3497            }            }
3498          if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);          if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3499          }          }
3500    
3501        if (min == max) continue;        if (min == max) continue;
# Line 3198  for (;;) Line 3506  for (;;)
3506            {            {
3507            RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3508            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3509            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3510            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3511              {              {
3512              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3513              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3514              }              }
3515            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);            if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3516            }            }
3517          /* Control never gets here */          /* Control never gets here */
3518          }          }
# Line 3229  for (;;) Line 3537  for (;;)
3537            eptr--;            eptr--;
3538            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3539            }            }
3540          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3541          }          }
3542        }        }
3543      /* Control never gets here */      /* Control never gets here */
# Line 3242  for (;;) Line 3550  for (;;)
3550      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
3551        {        {
3552        SCHECK_PARTIAL();        SCHECK_PARTIAL();
3553        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
3554        }        }
3555      ecode++;      ecode++;
3556      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3557      if (op == OP_NOTI)         /* The caseless case */      if (op == OP_NOTI)         /* The caseless case */
3558        {        {
3559  #ifdef SUPPORT_UTF8        register unsigned int ch, och;
3560        if (c < 256)        ch = *ecode++;
3561  #endif  #ifdef COMPILE_PCRE8
3562        c = md->lcc[c];        /* ch must be < 128 if UTF is enabled. */
3563        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);        och = md->fcc[ch];
3564    #else
3565    #ifdef SUPPORT_UTF
3566    #ifdef SUPPORT_UCP
3567          if (utf && ch > 127)
3568            och = UCD_OTHERCASE(ch);
3569    #else
3570          if (utf && ch > 127)
3571            och = ch;
3572    #endif /* SUPPORT_UCP */
3573          else
3574    #endif /* SUPPORT_UTF */
3575            och = TABLE_GET(ch, md->fcc, ch);
3576    #endif /* COMPILE_PCRE8 */
3577          if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3578        }        }
3579      else    /* Caseful */      else    /* Caseful */
3580        {        {
3581        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
3582        }        }
3583      break;      break;
3584    
# Line 3270  for (;;) Line 3592  for (;;)
3592      case OP_NOTEXACT:      case OP_NOTEXACT:
3593      case OP_NOTEXACTI:      case OP_NOTEXACTI:
3594      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3595      ecode += 3;      ecode += 1 + IMM2_SIZE;
3596      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3597    
3598      case OP_NOTUPTO:      case OP_NOTUPTO:
# Line 3280  for (;;) Line 3602  for (;;)
3602      min = 0;      min = 0;
3603      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3604      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3605      ecode += 3;      ecode += 1 + IMM2_SIZE;
3606      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3607    
3608      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
# Line 3312  for (;;) Line 3634  for (;;)
3634      possessive = TRUE;      possessive = TRUE;
3635      min = 0;      min = 0;
3636      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3637      ecode += 3;      ecode += 1 + IMM2_SIZE;
3638      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3639    
3640      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 3351  for (;;) Line 3673  for (;;)
3673    
3674      if (op >= OP_NOTSTARI)     /* Caseless */      if (op >= OP_NOTSTARI)     /* Caseless */
3675        {        {
3676        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3677          /* fc must be < 128 if UTF is enabled. */
3678          foc = md->fcc[fc];
3679    #else
3680    #ifdef SUPPORT_UTF
3681    #ifdef SUPPORT_UCP
3682          if (utf && fc > 127)
3683            foc = UCD_OTHERCASE(fc);
3684    #else
3685          if (utf && fc > 127)
3686            foc = fc;
3687    #endif /* SUPPORT_UCP */
3688          else
3689    #endif /* SUPPORT_UTF */
3690            foc = TABLE_GET(fc, md->fcc, fc);
3691    #endif /* COMPILE_PCRE8 */
3692    
3693  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3694        /* UTF-8 mode */        if (utf)
       if (utf8)  
3695          {          {
3696          register unsigned int d;          register unsigned int d;
3697          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3363  for (;;) Line 3699  for (;;)
3699            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3700              {              {
3701              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3702              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3703              }              }
3704            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3705            if (d < 256) d = md->lcc[d];            if (fc == d || (unsigned int) foc == d) RRETURN(MATCH_NOMATCH);
           if (fc == d) MRRETURN(MATCH_NOMATCH);  
3706            }            }
3707          }          }
3708        else        else
3709  #endif  #endif
3710          /* Not UTF mode */
       /* Not UTF-8 mode */  
3711          {          {
3712          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3713            {            {
3714            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3715              {              {
3716              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3717              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3718              }              }
3719            if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);            if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3720              eptr++;
3721            }            }
3722          }          }
3723    
# Line 3390  for (;;) Line 3725  for (;;)
3725    
3726        if (minimize)        if (minimize)
3727          {          {
3728  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3729          /* UTF-8 mode */          if (utf)
         if (utf8)  
3730            {            {
3731            register unsigned int d;            register unsigned int d;
3732            for (fi = min;; fi++)            for (fi = min;; fi++)
3733              {              {
3734              RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3735              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3736              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3737              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3738                {                {
3739                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3740                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3741                }                }
3742              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3743              if (d < 256) d = md->lcc[d];              if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
             if (fc == d) MRRETURN(MATCH_NOMATCH);  
3744              }              }
3745            }            }
3746          else          else
3747  #endif  #endif
3748          /* Not UTF-8 mode */          /* Not UTF mode */
3749            {            {
3750            for (fi = min;; fi++)            for (fi = min;; fi++)
3751              {              {
3752              RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3753              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3754              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3755              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3756                {                {
3757                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3758                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3759                }                }
3760              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);              if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3761                eptr++;
3762              }              }
3763            }            }
3764          /* Control never gets here */          /* Control never gets here */
# Line 3436  for (;;) Line 3770  for (;;)
3770          {          {
3771          pp = eptr;          pp = eptr;
3772    
3773  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3774          /* UTF-8 mode */          if (utf)
         if (utf8)  
3775            {            {
3776            register unsigned int d;            register unsigned int d;
3777            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3450  for (;;) Line 3783  for (;;)
3783                break;                break;
3784                }                }
3785              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3786              if (d < 256) d = md->lcc[d];              if (fc == d || (unsigned int)foc == d) break;
             if (fc == d) break;  
3787              eptr += len;              eptr += len;
3788              }              }
3789          if (possessive) continue;            if (possessive) continue;
3790          for(;;)            for(;;)
3791              {              {
3792              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3793              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 3465  for (;;) Line 3797  for (;;)
3797            }            }
3798          else          else
3799  #endif  #endif
3800          /* Not UTF-8 mode */          /* Not UTF mode */
3801            {            {
3802            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3803              {              {
# Line 3474  for (;;) Line 3806  for (;;)
3806                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3807                break;                break;
3808                }                }
3809              if (fc == md->lcc[*eptr]) break;              if (fc == *eptr || foc == *eptr) break;
3810              eptr++;              eptr++;
3811              }              }
3812            if (possessive) continue;            if (possessive) continue;
# Line 3486  for (;;) Line 3818  for (;;)
3818              }              }
3819            }            }
3820    
3821          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3822          }          }
3823        /* Control never gets here */        /* Control never gets here */
3824        }        }
# Line 3495  for (;;) Line 3827  for (;;)
3827    
3828      else      else
3829        {        {
3830  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3831        /* UTF-8 mode */        if (utf)
       if (utf8)  
3832          {          {
3833          register unsigned int d;          register unsigned int d;
3834          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3505  for (;;) Line 3836  for (;;)
3836            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3837              {              {
3838              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3839              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3840              }              }
3841            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3842            if (fc == d) MRRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
3843            }            }
3844          }          }
3845        else        else
3846  #endif  #endif
3847        /* Not UTF-8 mode */        /* Not UTF mode */
3848          {          {
3849          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3850            {            {
3851            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3852              {              {
3853              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3854              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3855              }              }
3856            if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3857            }            }
3858          }          }
3859    
# Line 3530  for (;;) Line 3861  for (;;)
3861    
3862        if (minimize)        if (minimize)
3863          {          {
3864  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3865          /* UTF-8 mode */          if (utf)
         if (utf8)  
3866            {            {
3867            register unsigned int d;            register unsigned int d;
3868            for (fi = min;; fi++)            for (fi = min;; fi++)
3869              {              {
3870              RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3871              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3872              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3873              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3874                {                {
3875                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3876                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3877                }                }
3878              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3879              if (fc == d) MRRETURN(MATCH_NOMATCH);              if (fc == d) RRETURN(MATCH_NOMATCH);
3880              }              }
3881            }            }
3882          else          else
3883  #endif  #endif
3884          /* Not UTF-8 mode */          /* Not UTF mode */
3885            {            {
3886            for (fi = min;; fi++)            for (fi = min;; fi++)
3887              {              {
3888              RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3889              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3890              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3891              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3892                {                {
3893                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3894                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3895                }                }
3896              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);              if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3897              }              }
3898            }            }
3899          /* Control never gets here */          /* Control never gets here */
# Line 3575  for (;;) Line 3905  for (;;)
3905          {          {
3906          pp = eptr;          pp = eptr;
3907    
3908  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3909          /* UTF-8 mode */          if (utf)
         if (utf8)  
3910            {            {
3911            register unsigned int d;            register unsigned int d;
3912            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3603  for (;;) Line 3932  for (;;)
3932            }            }
3933          else          else
3934  #endif  #endif
3935          /* Not UTF-8 mode */          /* Not UTF mode */
3936            {            {
3937            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3938              {              {
# Line 3624  for (;;) Line 3953  for (;;)
3953              }              }
3954            }            }
3955    
3956          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3957          }          }
3958        }        }
3959      /* Control never gets here */      /* Control never gets here */
# Line 3636  for (;;) Line 3965  for (;;)
3965      case OP_TYPEEXACT:      case OP_TYPEEXACT:
3966      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3967      minimize = TRUE;      minimize = TRUE;
3968      ecode += 3;      ecode += 1 + IMM2_SIZE;
3969      goto REPEATTYPE;      goto REPEATTYPE;
3970    
3971      case OP_TYPEUPTO:      case OP_TYPEUPTO:
# Line 3644  for (;;) Line 3973  for (;;)
3973      min = 0;      min = 0;
3974      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3975      minimize = *ecode == OP_TYPEMINUPTO;      minimize = *ecode == OP_TYPEMINUPTO;
3976      ecode += 3;      ecode += 1 + IMM2_SIZE;
3977      goto REPEATTYPE;      goto REPEATTYPE;
3978    
3979      case OP_TYPEPOSSTAR:      case OP_TYPEPOSSTAR:
# Line 3672  for (;;) Line 4001  for (;;)
4001      possessive = TRUE;      possessive = TRUE;
4002      min = 0;      min = 0;
4003      max = GET2(ecode, 1);      max = GET2(ecode, 1);
4004      ecode += 3;      ecode += 1 + IMM2_SIZE;
4005      goto REPEATTYPE;      goto REPEATTYPE;
4006    
4007      case OP_TYPESTAR:      case OP_TYPESTAR:
# Line 3718  for (;;) Line 4047  for (;;)
4047          switch(prop_type)          switch(prop_type)
4048            {            {
4049            case PT_ANY:            case PT_ANY:
4050            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4051            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4052              {              {
4053              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4054                {                {
4055                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4056                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4057                }                }
4058              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4059              }              }
# Line 3737  for (;;) Line 4066  for (;;)
4066              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4067                {                {
4068                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4069                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4070                }                }
4071              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4072              chartype = UCD_CHARTYPE(c);              chartype = UCD_CHARTYPE(c);
4073              if ((chartype == ucp_Lu ||              if ((chartype == ucp_Lu ||
4074                   chartype == ucp_Ll ||                   chartype == ucp_Ll ||
4075                   chartype == ucp_Lt) == prop_fail_result)                   chartype == ucp_Lt) == prop_fail_result)
4076                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4077              }              }
4078            break;            break;
4079    
# Line 3754  for (;;) Line 4083  for (;;)
4083              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4084                {                {
4085                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4086                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4087                }                }
4088              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4089              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4090                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4091              }              }
4092            break;            break;
4093    
# Line 3768  for (;;) Line 4097  for (;;)
4097              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4098                {                {
4099                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4100                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4101                }                }
4102              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4103              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4104                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4105              }              }
4106            break;            break;
4107    
# Line 3782  for (;;) Line 4111  for (;;)
4111              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4112                {                {
4113                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4114                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4115                }                }
4116              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4117              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4118                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4119              }              }
4120            break;            break;
4121    
# Line 3797  for (;;) Line 4126  for (;;)
4126              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4127                {                {
4128                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4129                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4130                }                }
4131              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4132              category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
4133              if ((category == ucp_L || category == ucp_N) == prop_fail_result)              if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4134                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4135              }              }
4136            break;            break;
4137    
# Line 3812  for (;;) Line 4141  for (;;)
4141              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4142                {                {
4143                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4144                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4145                }                }
4146              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4147              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4148                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
4149                     == prop_fail_result)                     == prop_fail_result)
4150                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4151              }              }
4152            break;            break;
4153    
# Line 3828  for (;;) Line 4157  for (;;)
4157              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4158                {                {
4159                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4160                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4161                }                }
4162              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4163              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4164                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4165                     == prop_fail_result)                     == prop_fail_result)
4166                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4167              }              }
4168            break;            break;
4169    
# Line 3845  for (;;) Line 4174  for (;;)
4174              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4175                {                {
4176                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4177                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4178                }                }
4179              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4180              category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
4181              if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)              if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4182                     == prop_fail_result)                     == prop_fail_result)
4183                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4184              }              }
4185            break;            break;
4186    
# Line 3872  for (;;) Line 4201  for (;;)
4201            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4202              {              {
4203              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4204              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4205              }              }
4206            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4207            if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH);            if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
4208            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4209              {              {
4210              int len = 1;              int len = 1;
4211              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4212              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
4213              eptr += len;              eptr += len;
4214              }              }
4215              CHECK_PARTIAL();
4216            }            }
4217          }          }
4218    
# Line 3891  for (;;) Line 4221  for (;;)
4221    
4222  /* Handle all other cases when the coding is UTF-8 */  /* Handle all other cases when the coding is UTF-8 */
4223    
4224  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4225        if (utf8) switch(ctype)        if (utf) switch(ctype)
4226          {          {
4227          case OP_ANY:          case OP_ANY:
4228          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3900  for (;;) Line 4230  for (;;)
4230            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4231              {              {
4232              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4233              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4234              }              }
4235            if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4236            eptr++;            eptr++;
4237            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4238            }            }
4239          break;          break;
4240    
# Line 3914  for (;;) Line 4244  for (;;)
4244            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4245              {              {
4246              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4247              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4248              }              }
4249            eptr++;            eptr++;
4250            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4251            }            }
4252          break;          break;
4253    
4254          case OP_ANYBYTE:          case OP_ANYBYTE:
4255          if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);          if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4256          eptr += min;          eptr += min;
4257          break;          break;
4258    
# Line 3932  for (;;) Line 4262  for (;;)
4262            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4263              {              {
4264              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4265              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4266              }              }
4267            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4268            switch(c)            switch(c)
4269              {              {
4270              default: MRRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4271    
4272              case 0x000d:              case 0x000d:
4273              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
# Line 3951  for (;;) Line 4281  for (;;)
4281              case 0x0085:              case 0x0085:
4282              case 0x2028:              case 0x2028:
4283              case 0x2029:              case 0x2029:
4284              if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4285              break;              break;
4286              }              }
4287            }            }
# Line 3963  for (;;) Line 4293  for (;;)
4293            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4294              {              {
4295              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4296              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4297              }              }
4298            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4299            switch(c)            switch(c)
# Line 3988  for (;;) Line 4318  for (;;)
4318              case 0x202f:    /* NARROW NO-BREAK SPACE */              case 0x202f:    /* NARROW NO-BREAK SPACE */
4319              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4320              case 0x3000:    /* IDEOGRAPHIC SPACE */              case 0x3000:    /* IDEOGRAPHIC SPACE */
4321              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4322              }              }
4323            }            }
4324          break;          break;
# Line 3999  for (;;) Line 4329  for (;;)
4329            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4330              {              {
4331              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4332              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4333              }              }
4334            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4335            switch(c)            switch(c)
4336              {              {
4337              default: MRRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4338              case 0x09:      /* HT */              case 0x09:      /* HT */
4339              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4340              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
# Line 4035  for (;;) Line 4365  for (;;)
4365            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4366              {              {
4367              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4368              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4369              }              }
4370            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4371            switch(c)            switch(c)
# Line 4048  for (;;) Line 4378  for (;;)
4378              case 0x85:      /* NEL */              case 0x85:      /* NEL */
4379              case 0x2028:    /* LINE SEPARATOR */              case 0x2028:    /* LINE SEPARATOR */
4380              case 0x2029:    /* PARAGRAPH SEPARATOR */              case 0x2029:    /* PARAGRAPH SEPARATOR */
4381              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4382              }              }
4383            }            }
4384          break;          break;
# Line 4059  for (;;) Line 4389  for (;;)
4389            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4390              {              {
4391              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4392              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4393              }              }
4394            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4395            switch(c)            switch(c)
4396              {              {
4397              default: MRRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4398              case 0x0a:      /* LF */              case 0x0a:      /* LF */
4399              case 0x0b:      /* VT */              case 0x0b:      /* VT */
4400              case 0x0c:      /* FF */              case 0x0c:      /* FF */
# Line 4083  for (;;) Line 4413  for (;;)
4413            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4414              {              {
4415              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4416              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4417              }              }
4418            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4419            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4420              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4421            }            }
4422          break;          break;
4423    
# Line 4097  for (;;) Line 4427  for (;;)
4427            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4428              {              {
4429              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4430              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4431              }              }
4432            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
4433              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4434              eptr++;
4435            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4436            }            }
4437          break;          break;
# Line 4111  for (;;) Line 4442  for (;;)
4442            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4443              {              {
4444              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4445              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4446              }              }
4447            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
4448              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4449            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            eptr++;
4450              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4451            }            }
4452          break;          break;
4453    
# Line 4125  for (;;) Line 4457  for (;;)
4457            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4458              {              {
4459              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4460              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4461              }              }
4462            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
4463              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4464              eptr++;
4465            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4466            }            }
4467          break;          break;
# Line 4139  for (;;) Line 4472  for (;;)
4472            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4473              {              {
4474              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4475              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4476              }              }
4477            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
4478              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4479            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            eptr++;
4480              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4481            }            }
4482          break;          break;
4483    
# Line 4153  for (;;) Line 4487  for (;;)
4487            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4488              {              {
4489              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4490              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4491              }              }
4492            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
4493              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4494              eptr++;
4495            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4496            }            }
4497          break;          break;
# Line 4166  for (;;) Line 4501  for (;;)
4501          }  /* End switch(ctype) */          }  /* End switch(ctype) */
4502    
4503        else        else
4504  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF */
4505    
4506        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
4507        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. */
# Line 4179  for (;;) Line 4514  for (;;)
4514            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4515              {              {
4516              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4517              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4518              }              }
4519            if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4520            eptr++;            eptr++;
4521            }            }
4522          break;          break;
# Line 4190  for (;;) Line 4525  for (;;)
4525          if (eptr > md->end_subject - min)          if (eptr > md->end_subject - min)
4526            {            {
4527            SCHECK_PARTIAL();            SCHECK_PARTIAL();
4528            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
4529            }            }
4530          eptr += min;          eptr += min;
4531          break;          break;
# Line 4199  for (;;) Line 4534  for (;;)
4534          if (eptr > md->end_subject - min)          if (eptr > md->end_subject - min)
4535            {            {
4536            SCHECK_PARTIAL();            SCHECK_PARTIAL();
4537            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
4538            }            }
4539          eptr += min;          eptr += min;
4540          break;          break;
# Line 4210  for (;;) Line 4545  for (;;)
4545            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4546              {              {
4547              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4548              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4549              }              }
4550            switch(*eptr++)            switch(*eptr++)
4551              {              {
4552              default: MRRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4553    
4554              case 0x000d:              case 0x000d:
4555              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
# Line 4226  for (;;) Line 4561  for (;;)
4561              case 0x000b:              case 0x000b:
4562              case 0x000c:              case 0x000c:
4563              case 0x0085:              case 0x0085:
4564              if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);  #ifdef COMPILE_PCRE16
4565                case 0x2028:
4566                case 0x2029:
4567    #endif
4568                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4569              break;              break;
4570              }              }
4571            }            }
# Line 4238  for (;;) Line 4577  for (;;)
4577            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4578              {              {
4579              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4580              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4581              }              }
4582            switch(*eptr++)            switch(*eptr++)
4583              {              {
# Line 4246  for (;;) Line 4585  for (;;)
4585              case 0x09:      /* HT */              case 0x09:      /* HT */
4586              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4587              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
4588              MRRETURN(MATCH_NOMATCH);  #ifdef COMPILE_PCRE16
4589                case 0x1680:    /* OGHAM SPACE MARK */
4590                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4591                case 0x2000:    /* EN QUAD */
4592                case 0x2001:    /* EM QUAD */
4593                case 0x2002:    /* EN SPACE */
4594                case 0x2003:    /* EM SPACE */
4595                case 0x2004:    /* THREE-PER-EM SPACE */
4596                case 0x2005:    /* FOUR-PER-EM SPACE */
4597                case 0x2006:    /* SIX-PER-EM SPACE */
4598                case 0x2007:    /* FIGURE SPACE */
4599                case 0x2008:    /* PUNCTUATION SPACE */
4600                case 0x2009:    /* THIN SPACE */
4601                case 0x200A:    /* HAIR SPACE */
4602                case 0x202f:    /* NARROW NO-BREAK SPACE */
4603                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4604                case 0x3000:    /* IDEOGRAPHIC SPACE */
4605    #endif
4606                RRETURN(MATCH_NOMATCH);
4607              }              }
4608            }            }
4609          break;          break;
# Line 4257  for (;;) Line 4614  for (;;)
4614            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4615              {              {
4616              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4617              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4618              }              }
4619            switch(*eptr++)            switch(*eptr++)
4620              {              {
4621              default: MRRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4622              case 0x09:      /* HT */              case 0x09:      /* HT */
4623              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4624              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
4625    #ifdef COMPILE_PCRE16
4626                case 0x1680:    /* OGHAM SPACE MARK */
4627                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4628                case 0x2000:    /* EN QUAD */
4629                case 0x2001:    /* EM QUAD */
4630                case 0x2002:    /* EN SPACE */
4631                case 0x2003:    /* EM SPACE */
4632                case 0x2004:    /* THREE-PER-EM SPACE */
4633                case 0x2005:    /* FOUR-PER-EM SPACE */
4634                case 0x2006:    /* SIX-PER-EM SPACE */
4635                case 0x2007:    /* FIGURE SPACE */
4636                case 0x2008:    /* PUNCTUATION SPACE */
4637                case 0x2009:    /* THIN SPACE */
4638                case 0x200A:    /* HAIR SPACE */
4639                case 0x202f:    /* NARROW NO-BREAK SPACE */
4640                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4641                case 0x3000:    /* IDEOGRAPHIC SPACE */
4642    #endif
4643              break;              break;
4644              }              }
4645            }            }
# Line 4276  for (;;) Line 4651  for (;;)
4651            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4652              {              {
4653              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4654              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4655              }              }
4656            switch(*eptr++)            switch(*eptr++)
4657              {              {
# Line 4286  for (;;) Line 4661  for (;;)
4661              case 0x0c:      /* FF */              case 0x0c:      /* FF */
4662              case 0x0d:      /* CR */              case 0x0d:      /* CR */
4663              case 0x85:      /* NEL */              case 0x85:      /* NEL */
4664              MRRETURN(MATCH_NOMATCH);  #ifdef COMPILE_PCRE16
4665                case 0x2028:    /* LINE SEPARATOR */
4666                case 0x2029:    /* PARAGRAPH SEPARATOR */
4667    #endif
4668                RRETURN(MATCH_NOMATCH);
4669              }              }
4670            }            }
4671          break;          break;
# Line 4297  for (;;) Line 4676  for (;;)
4676            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4677              {              {
4678              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4679              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4680              }              }
4681            switch(*eptr++)            switch(*eptr++)
4682              {              {
4683              default: MRRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4684              case 0x0a:      /* LF */              case 0x0a:      /* LF */
4685              case 0x0b:      /* VT */              case 0x0b:      /* VT */
4686              case 0x0c:      /* FF */              case 0x0c:      /* FF */
4687              case 0x0d:      /* CR */              case 0x0d:      /* CR */
4688              case 0x85:      /* NEL */              case 0x85:      /* NEL */
4689    #ifdef COMPILE_PCRE16
4690                case 0x2028:    /* LINE SEPARATOR */
4691                case 0x2029:    /* PARAGRAPH SEPARATOR */
4692    #endif
4693              break;              break;
4694              }              }
4695            }            }
# Line 4318  for (;;) Line 4701  for (;;)
4701            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4702              {              {
4703              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4704              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4705              }              }
4706            if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4707                RRETURN(MATCH_NOMATCH);
4708              eptr++;
4709            }            }
4710          break;          break;
4711    
# Line 4330  for (;;) Line 4715  for (;;)
4715            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4716              {              {
4717              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4718              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4719              }              }
4720            if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4721                RRETURN(MATCH_NOMATCH);
4722              eptr++;
4723            }            }
4724          break;          break;
4725    
# Line 4342  for (;;) Line 4729  for (;;)
4729            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4730              {              {
4731              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4732              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4733              }              }
4734            if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4735                RRETURN(MATCH_NOMATCH);
4736              eptr++;
4737            }            }
4738          break;          break;
4739    
# Line 4354  for (;;) Line 4743  for (;;)
4743            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4744              {              {
4745              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4746              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4747              }              }
4748            if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4749                RRETURN(MATCH_NOMATCH);
4750              eptr++;
4751            }            }
4752          break;          break;
4753    
# Line 4366  for (;;) Line 4757  for (;;)
4757            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4758              {              {
4759              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4760              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NO