/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 648 by ph10, Mon Aug 1 11:02:08 2011 UTC revision 892 by ph10, Wed Jan 18 17:23:20 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 82  negative to avoid the external error cod Line 82  negative to avoid the external error cod
82  #define MATCH_SKIP_ARG     (-993)  #define MATCH_SKIP_ARG     (-993)
83  #define MATCH_THEN         (-992)  #define MATCH_THEN         (-992)
84    
 /* This is a convenience macro for code that occurs many times. */  
   
 #define MRRETURN(ra) \  
   { \  
   md->mark = markptr; \  
   RRETURN(ra); \  
   }  
   
85  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
86  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
87  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 121  Returns:     nothing Line 113  Returns:     nothing
113  */  */
114    
115  static void  static void
116  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
117  {  {
118  unsigned int c;  unsigned int c;
119  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 152  Returns:      < 0 if not matched, otherw Line 144  Returns:      < 0 if not matched, otherw
144  */  */
145    
146  static int  static int
147  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
148    BOOL caseless)    BOOL caseless)
149  {  {
150  USPTR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
151  register USPTR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
152    
153  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
154  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 181  ASCII characters. */ Line 173  ASCII characters. */
173    
174  if (caseless)  if (caseless)
175    {    {
176  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
177  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
178    if (md->utf8)    if (md->utf)
179      {      {
180      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
181      bytes matched may differ, because there are some characters whose upper and      bytes matched may differ, because there are some characters whose upper and
# Line 193  if (caseless) Line 185  if (caseless)
185      the latter. It is important, therefore, to check the length along the      the latter. It is important, therefore, to check the length along the
186      reference, not along the subject (earlier code did this wrong). */      reference, not along the subject (earlier code did this wrong). */
187    
188      USPTR endptr = p + length;      PCRE_PUCHAR endptr = p + length;
189      while (p < endptr)      while (p < endptr)
190        {        {
191        int c, d;        int c, d;
# Line 212  if (caseless) Line 204  if (caseless)
204      {      {
205      if (eptr + length > md->end_subject) return -1;      if (eptr + length > md->end_subject) return -1;
206      while (length-- > 0)      while (length-- > 0)
207        { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }        {
208          if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
209          p++;
210          eptr++;
211          }
212      }      }
213    }    }
214    
# Line 225  else Line 221  else
221    while (length-- > 0) if (*p++ != *eptr++) return -1;    while (length-- > 0) if (*p++ != *eptr++) return -1;
222    }    }
223    
224  return eptr - eptr_start;  return (int)(eptr - eptr_start);
225  }  }
226    
227    
# Line 277  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 273  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
273         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
274         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
275         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
276         RM61,  RM62, RM63 };         RM61,  RM62, RM63, RM64, RM65, RM66 };
277    
278  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
279  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 290  actually used in this definition. */ Line 286  actually used in this definition. */
286  #define RMATCH(ra,rb,rc,rd,re,rw) \  #define RMATCH(ra,rb,rc,rd,re,rw) \
287    { \    { \
288    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
289    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1); \    rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
290    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
291    }    }
292  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 300  actually used in this definition. */ Line 296  actually used in this definition. */
296    }    }
297  #else  #else
298  #define RMATCH(ra,rb,rc,rd,re,rw) \  #define RMATCH(ra,rb,rc,rd,re,rw) \
299    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1)    rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
300  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
301  #endif  #endif
302    
# Line 315  argument of match(), which never changes Line 311  argument of match(), which never changes
311    
312  #define RMATCH(ra,rb,rc,rd,re,rw)\  #define RMATCH(ra,rb,rc,rd,re,rw)\
313    {\    {\
314    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
315    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
316    frame->Xwhere = rw; \    frame->Xwhere = rw; \
317    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
318    newframe->Xecode = rb;\    newframe->Xecode = rb;\
319    newframe->Xmstart = mstart;\    newframe->Xmstart = mstart;\
   newframe->Xmarkptr = markptr;\  
320    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
321    newframe->Xeptrb = re;\    newframe->Xeptrb = re;\
322    newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
# Line 337  argument of match(), which never changes Line 332  argument of match(), which never changes
332    {\    {\
333    heapframe *oldframe = frame;\    heapframe *oldframe = frame;\
334    frame = oldframe->Xprevframe;\    frame = oldframe->Xprevframe;\
335    (pcre_stack_free)(oldframe);\    if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
336    if (frame != NULL)\    if (frame != NULL)\
337      {\      {\
338      rrc = ra;\      rrc = ra;\
# Line 354  typedef struct heapframe { Line 349  typedef struct heapframe {
349    
350    /* Function arguments that may change */    /* Function arguments that may change */
351    
352    USPTR Xeptr;    PCRE_PUCHAR Xeptr;
353    const uschar *Xecode;    const pcre_uchar *Xecode;
354    USPTR Xmstart;    PCRE_PUCHAR Xmstart;
   USPTR Xmarkptr;  
355    int Xoffset_top;    int Xoffset_top;
356    eptrblock *Xeptrb;    eptrblock *Xeptrb;
357    unsigned int Xrdepth;    unsigned int Xrdepth;
358    
359    /* Function local variables */    /* Function local variables */
360    
361    USPTR Xcallpat;    PCRE_PUCHAR Xcallpat;
362  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
363    USPTR Xcharptr;    PCRE_PUCHAR Xcharptr;
364  #endif  #endif
365    USPTR Xdata;    PCRE_PUCHAR Xdata;
366    USPTR Xnext;    PCRE_PUCHAR Xnext;
367    USPTR Xpp;    PCRE_PUCHAR Xpp;
368    USPTR Xprev;    PCRE_PUCHAR Xprev;
369    USPTR Xsaved_eptr;    PCRE_PUCHAR Xsaved_eptr;
370    
371    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
372    
# Line 385  typedef struct heapframe { Line 379  typedef struct heapframe {
379    int Xprop_value;    int Xprop_value;
380    int Xprop_fail_result;    int Xprop_fail_result;
381    int Xoclength;    int Xoclength;
382    uschar Xocchars[8];    pcre_uchar Xocchars[6];
383  #endif  #endif
384    
385    int Xcodelink;    int Xcodelink;
# Line 427  returns a negative (error) response, the Line 421  returns a negative (error) response, the
421  same response. */  same response. */
422    
423  /* These macros pack up tests that are used for partial matching, and which  /* These macros pack up tests that are used for partial matching, and which
424  appears several times in the code. We set the "hit end" flag if the pointer is  appear several times in the code. We set the "hit end" flag if the pointer is
425  at the end of the subject and also past the start of the subject (i.e.  at the end of the subject and also past the start of the subject (i.e.
426  something has been matched). For hard partial matching, we then return  something has been matched). For hard partial matching, we then return
427  immediately. The second one is used when we already know we are past the end of  immediately. The second one is used when we already know we are past the end of
# Line 438  the subject. */ Line 432  the subject. */
432        eptr > md->start_used_ptr) \        eptr > md->start_used_ptr) \
433      { \      { \
434      md->hitend = TRUE; \      md->hitend = TRUE; \
435      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
436      }      }
437    
438  #define SCHECK_PARTIAL()\  #define SCHECK_PARTIAL()\
439    if (md->partial != 0 && eptr > md->start_used_ptr) \    if (md->partial != 0 && eptr > md->start_used_ptr) \
440      { \      { \
441      md->hitend = TRUE; \      md->hitend = TRUE; \
442      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
443      }      }
444    
445    
446  /* Performance note: It might be tempting to extract commonly used fields from  /* Performance note: It might be tempting to extract commonly used fields from
447  the md structure (e.g. utf8, end_subject) into individual variables to improve  the md structure (e.g. utf, end_subject) into individual variables to improve
448  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
449  made performance worse.  made performance worse.
450    
# Line 459  Arguments: Line 453  Arguments:
453     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
454     mstart      pointer to the current match start position (can be modified     mstart      pointer to the current match start position (can be modified
455                   by encountering \K)                   by encountering \K)
    markptr     pointer to the most recent MARK name, or NULL  
456     offset_top  current top pointer     offset_top  current top pointer
457     md          pointer to "static" info for the match     md          pointer to "static" info for the match
458     eptrb       pointer to chain of blocks containing eptr at start of     eptrb       pointer to chain of blocks containing eptr at start of
# Line 474  Returns:       MATCH_MATCH if matched Line 467  Returns:       MATCH_MATCH if matched
467  */  */
468    
469  static int  static int
470  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
471    const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb,    PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
472    unsigned int rdepth)    unsigned int rdepth)
473  {  {
474  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
# Line 485  so they can be ordinary variables in all Line 478  so they can be ordinary variables in all
478  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
479  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
480  register unsigned int c;   /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
481  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf;         /* Local copy of UTF flag for speed */
482    
483  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
484  BOOL caseless;  BOOL caseless;
485  int condcode;  int condcode;
486    
487  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
488  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame". We set up the top-level
489  heap storage. Set up the top-level frame here; others are obtained from the  frame on the stack here; subsequent instantiations are obtained from the heap
490  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  whenever RMATCH() does a "recursion". See the macro definitions above. Putting
491    the top-level on the stack rather than malloc-ing them all gives a performance
492    boost in many cases where there is not much "recursion". */
493    
494  #ifdef NO_RECURSE  #ifdef NO_RECURSE
495  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));  heapframe frame_zero;
496  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  heapframe *frame = &frame_zero;
497  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
498    
499  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 506  frame->Xprevframe = NULL;            /* Line 501  frame->Xprevframe = NULL;            /*
501  frame->Xeptr = eptr;  frame->Xeptr = eptr;
502  frame->Xecode = ecode;  frame->Xecode = ecode;
503  frame->Xmstart = mstart;  frame->Xmstart = mstart;
 frame->Xmarkptr = markptr;  
504  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
505  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
506  frame->Xrdepth = rdepth;  frame->Xrdepth = rdepth;
# Line 520  HEAP_RECURSE: Line 514  HEAP_RECURSE:
514  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
515  #define ecode              frame->Xecode  #define ecode              frame->Xecode
516  #define mstart             frame->Xmstart  #define mstart             frame->Xmstart
 #define markptr            frame->Xmarkptr  
517  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
518  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
519  #define rdepth             frame->Xrdepth  #define rdepth             frame->Xrdepth
520    
521  /* Ditto for the local variables */  /* Ditto for the local variables */
522    
523  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
524  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
525  #endif  #endif
526  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
# Line 585  declarations can be cut out in a block. Line 578  declarations can be cut out in a block.
578  below are for variables that do not have to be preserved over a recursive call  below are for variables that do not have to be preserved over a recursive call
579  to RMATCH(). */  to RMATCH(). */
580    
581  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
582  const uschar *charptr;  const pcre_uchar *charptr;
583  #endif  #endif
584  const uschar *callpat;  const pcre_uchar *callpat;
585  const uschar *data;  const pcre_uchar *data;
586  const uschar *next;  const pcre_uchar *next;
587  USPTR         pp;  PCRE_PUCHAR       pp;
588  const uschar *prev;  const pcre_uchar *prev;
589  USPTR         saved_eptr;  PCRE_PUCHAR       saved_eptr;
590    
591  recursion_info new_recursive;  recursion_info new_recursive;
592    
# Line 606  int prop_type; Line 599  int prop_type;
599  int prop_value;  int prop_value;
600  int prop_fail_result;  int prop_fail_result;
601  int oclength;  int oclength;
602  uschar occhars[8];  pcre_uchar occhars[6];
603  #endif  #endif
604    
605  int codelink;  int codelink;
# Line 634  the alternative names that are used. */ Line 627  the alternative names that are used. */
627  #define code_offset   codelink  #define code_offset   codelink
628  #define condassert    condition  #define condassert    condition
629  #define matched_once  prev_is_word  #define matched_once  prev_is_word
630    #define foc           number
631    #define save_mark     data
632    
633  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
634  variables. */  variables. */
# Line 659  defined). However, RMATCH isn't like a f Line 654  defined). However, RMATCH isn't like a f
654  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
655  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
656    
657  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
658  utf8 = md->utf8;       /* Local copy of the flag */  utf = md->utf;       /* Local copy of the flag */
659  #else  #else
660  utf8 = FALSE;  utf = FALSE;
661  #endif  #endif
662    
663  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
# Line 701  for (;;) Line 696  for (;;)
696    switch(op)    switch(op)
697      {      {
698      case OP_MARK:      case OP_MARK:
699      markptr = ecode + 2;      md->nomatch_mark = ecode + 2;
700      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      md->mark = NULL;    /* In case previously set by assertion */
701        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
702        eptrb, RM55);        eptrb, RM55);
703        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
704             md->mark == NULL) md->mark = ecode + 2;
705    
706      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
707      argument, and we must check whether that argument matches this MARK's      argument, and we must check whether that argument matches this MARK's
# Line 712  for (;;) Line 710  for (;;)
710      position and return MATCH_SKIP. Otherwise, pass back the return code      position and return MATCH_SKIP. Otherwise, pass back the return code
711      unaltered. */      unaltered. */
712    
713      if (rrc == MATCH_SKIP_ARG &&      else if (rrc == MATCH_SKIP_ARG &&
714          strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)          STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
715        {        {
716        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
717        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
718        }        }
   
     if (md->mark == NULL) md->mark = markptr;  
719      RRETURN(rrc);      RRETURN(rrc);
720    
721      case OP_FAIL:      case OP_FAIL:
722      MRRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
723    
724      /* COMMIT overrides PRUNE, SKIP, and THEN */      /* COMMIT overrides PRUNE, SKIP, and THEN */
725    
726      case OP_COMMIT:      case OP_COMMIT:
727      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
728        eptrb, RM52);        eptrb, RM52);
729      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
730          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
731          rrc != MATCH_THEN)          rrc != MATCH_THEN)
732        RRETURN(rrc);        RRETURN(rrc);
733      MRRETURN(MATCH_COMMIT);      RRETURN(MATCH_COMMIT);
734    
735      /* PRUNE overrides THEN */      /* PRUNE overrides THEN */
736    
737      case OP_PRUNE:      case OP_PRUNE:
738      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
739        eptrb, RM51);        eptrb, RM51);
740      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
741      MRRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
742    
743      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
744      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      md->nomatch_mark = ecode + 2;
745        md->mark = NULL;    /* In case previously set by assertion */
746        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
747        eptrb, RM56);        eptrb, RM56);
748        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
749             md->mark == NULL) md->mark = ecode + 2;
750      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
     md->mark = ecode + 2;  
751      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
752    
753      /* SKIP overrides PRUNE and THEN */      /* SKIP overrides PRUNE and THEN */
754    
755      case OP_SKIP:      case OP_SKIP:
756      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
757        eptrb, RM53);        eptrb, RM53);
758      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
759        RRETURN(rrc);        RRETURN(rrc);
760      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
761      MRRETURN(MATCH_SKIP);      RRETURN(MATCH_SKIP);
762    
763        /* Note that, for Perl compatibility, SKIP with an argument does NOT set
764        nomatch_mark. There is a flag that disables this opcode when re-matching a
765        pattern that ended with a SKIP for which there was not a matching MARK. */
766    
767      case OP_SKIP_ARG:      case OP_SKIP_ARG:
768      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      if (md->ignore_skip_arg)
769          {
770          ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
771          break;
772          }
773        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
774        eptrb, RM57);        eptrb, RM57);
775      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
776        RRETURN(rrc);        RRETURN(rrc);
777    
778      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
779      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
780      caught by a matching MARK, or get to the top, where it is treated the same      caught by a matching MARK, or get to the top, where it causes a rematch
781      as PRUNE. */      with the md->ignore_skip_arg flag set. */
782    
783      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
784      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
785    
786      /* For THEN (and THEN_ARG) we pass back the address of the bracket or      /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
787      the alt that is at the start of the current branch. This makes it possible      the branch in which it occurs can be determined. Overload the start of
788      to skip back past alternatives that precede the THEN within the current      match pointer to do this. */
     branch. */  
789    
790      case OP_THEN:      case OP_THEN:
791      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
792        eptrb, RM54);        eptrb, RM54);
793      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
794      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode;
795      MRRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
796    
797      case OP_THEN_ARG:      case OP_THEN_ARG:
798      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],      md->nomatch_mark = ecode + 2;
799        offset_top, md, eptrb, RM58);      md->mark = NULL;    /* In case previously set by assertion */
800        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
801          md, eptrb, RM58);
802        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
803             md->mark == NULL) md->mark = ecode + 2;
804      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
805      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode;
     md->mark = ecode + LINK_SIZE + 2;  
806      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
807    
808        /* Handle an atomic group that does not contain any capturing parentheses.
809        This can be handled like an assertion. Prior to 8.13, all atomic groups
810        were handled this way. In 8.13, the code was changed as below for ONCE, so
811        that backups pass through the group and thereby reset captured values.
812        However, this uses a lot more stack, so in 8.20, atomic groups that do not
813        contain any captures generate OP_ONCE_NC, which can be handled in the old,
814        less stack intensive way.
815    
816        Check the alternative branches in turn - the matching won't pass the KET
817        for this kind of subpattern. If any one branch matches, we carry on as at
818        the end of a normal bracket, leaving the subject pointer, but resetting
819        the start-of-match value in case it was changed by \K. */
820    
821        case OP_ONCE_NC:
822        prev = ecode;
823        saved_eptr = eptr;
824        save_mark = md->mark;
825        do
826          {
827          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
828          if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
829            {
830            mstart = md->start_match_ptr;
831            break;
832            }
833          if (rrc == MATCH_THEN)
834            {
835            next = ecode + GET(ecode,1);
836            if (md->start_match_ptr < next &&
837                (*ecode == OP_ALT || *next == OP_ALT))
838              rrc = MATCH_NOMATCH;
839            }
840    
841          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
842          ecode += GET(ecode,1);
843          md->mark = save_mark;
844          }
845        while (*ecode == OP_ALT);
846    
847        /* If hit the end of the group (which could be repeated), fail */
848    
849        if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
850    
851        /* Continue as from after the group, updating the offsets high water
852        mark, since extracts may have been taken. */
853    
854        do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
855    
856        offset_top = md->end_offset_top;
857        eptr = md->end_match_ptr;
858    
859        /* For a non-repeating ket, just continue at this level. This also
860        happens for a repeating ket if no characters were matched in the group.
861        This is the forcible breaking of infinite loops as implemented in Perl
862        5.005. */
863    
864        if (*ecode == OP_KET || eptr == saved_eptr)
865          {
866          ecode += 1+LINK_SIZE;
867          break;
868          }
869    
870        /* The repeating kets try the rest of the pattern or restart from the
871        preceding bracket, in the appropriate order. The second "call" of match()
872        uses tail recursion, to avoid using another stack frame. */
873    
874        if (*ecode == OP_KETRMIN)
875          {
876          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
877          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
878          ecode = prev;
879          goto TAIL_RECURSE;
880          }
881        else  /* OP_KETRMAX */
882          {
883          md->match_function_type = MATCH_CBEGROUP;
884          RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
885          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
886          ecode += 1 + LINK_SIZE;
887          goto TAIL_RECURSE;
888          }
889        /* Control never gets here */
890    
891      /* Handle a capturing bracket, other than those that are possessive with an      /* Handle a capturing bracket, other than those that are possessive with an
892      unlimited repeat. If there is space in the offset vector, save the current      unlimited repeat. If there is space in the offset vector, save the current
893      subject position in the working slot at the top of the vector. We mustn't      subject position in the working slot at the top of the vector. We mustn't
# Line 827  for (;;) Line 920  for (;;)
920        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
921        save_offset3 = md->offset_vector[md->offset_end - number];        save_offset3 = md->offset_vector[md->offset_end - number];
922        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
923          save_mark = md->mark;
924    
925        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
926        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
# Line 835  for (;;) Line 929  for (;;)
929        for (;;)        for (;;)
930          {          {
931          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
932          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
933            eptrb, RM1);            eptrb, RM1);
934          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
935          if (rrc != MATCH_NOMATCH &&  
936              (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* If we backed up to a THEN, check whether it is within the current
937            RRETURN(rrc);          branch by comparing the address of the THEN that is passed back with
938            the end of the branch. If it is within the current branch, and the
939            branch is one of two or more alternatives (it either starts or ends
940            with OP_ALT), we have reached the limit of THEN's action, so convert
941            the return code to NOMATCH, which will cause normal backtracking to
942            happen from now on. Otherwise, THEN is passed back to an outer
943            alternative. This implements Perl's treatment of parenthesized groups,
944            where a group not containing | does not affect the current alternative,
945            that is, (X) is NOT the same as (X|(*F)). */
946    
947            if (rrc == MATCH_THEN)
948              {
949              next = ecode + GET(ecode,1);
950              if (md->start_match_ptr < next &&
951                  (*ecode == OP_ALT || *next == OP_ALT))
952                rrc = MATCH_NOMATCH;
953              }
954    
955            /* Anything other than NOMATCH is passed back. */
956    
957            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
958          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
959          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
960            md->mark = save_mark;
961          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
962          }          }
963    
# Line 851  for (;;) Line 966  for (;;)
966        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
967        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
968    
969        /* At this point, rrc will be one of MATCH_ONCE, MATCH_NOMATCH, or        /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
       MATCH_THEN. */  
970    
971        if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;        RRETURN(rrc);
       RRETURN(((rrc == MATCH_ONCE)? MATCH_ONCE:MATCH_NOMATCH));  
972        }        }
973    
974      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
# Line 870  for (;;) Line 983  for (;;)
983      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
984    
985      /* Non-capturing or atomic group, except for possessive with unlimited      /* Non-capturing or atomic group, except for possessive with unlimited
986      repeat. Loop for all the alternatives. When we get to the final alternative      repeat and ONCE group with no captures. Loop for all the alternatives.
987      within the brackets, we used to return the result of a recursive call to  
988      match() whatever happened so it was possible to reduce stack usage by      When we get to the final alternative within the brackets, we used to return
989      turning this into a tail recursion, except in the case of a possibly empty      the result of a recursive call to match() whatever happened so it was
990      group. However, now that there is the possiblity of (*THEN) occurring in      possible to reduce stack usage by turning this into a tail recursion,
991      the final alternative, this optimization is no longer possible.      except in the case of a possibly empty group. However, now that there is
992        the possiblity of (*THEN) occurring in the final alternative, this
993        optimization is no longer always possible.
994    
995        We can optimize if we know there are no (*THEN)s in the pattern; at present
996        this is the best that can be done.
997    
998      MATCH_ONCE is returned when the end of an atomic group is successfully      MATCH_ONCE is returned when the end of an atomic group is successfully
999      reached, but subsequent matching fails. It passes back up the tree (causing      reached, but subsequent matching fails. It passes back up the tree (causing
# Line 892  for (;;) Line 1010  for (;;)
1010      for (;;)      for (;;)
1011        {        {
1012        if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
1013        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,  
1014          /* If this is not a possibly empty group, and there are no (*THEN)s in
1015          the pattern, and this is the final alternative, optimize as described
1016          above. */
1017    
1018          else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1019            {
1020            ecode += PRIV(OP_lengths)[*ecode];
1021            goto TAIL_RECURSE;
1022            }
1023    
1024          /* In all other cases, we have to make another call to match(). */
1025    
1026          save_mark = md->mark;
1027          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1028          RM2);          RM2);
1029        if (rrc != MATCH_NOMATCH &&  
1030            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* See comment in the code for capturing groups above about handling
1031          THEN. */
1032    
1033          if (rrc == MATCH_THEN)
1034            {
1035            next = ecode + GET(ecode,1);
1036            if (md->start_match_ptr < next &&
1037                (*ecode == OP_ALT || *next == OP_ALT))
1038              rrc = MATCH_NOMATCH;
1039            }
1040    
1041          if (rrc != MATCH_NOMATCH)
1042          {          {
1043          if (rrc == MATCH_ONCE)          if (rrc == MATCH_ONCE)
1044            {            {
1045            const uschar *scode = ecode;            const pcre_uchar *scode = ecode;
1046            if (*scode != OP_ONCE)           /* If not at start, find it */            if (*scode != OP_ONCE)           /* If not at start, find it */
1047              {              {
1048              while (*scode == OP_ALT) scode += GET(scode, 1);              while (*scode == OP_ALT) scode += GET(scode, 1);
# Line 910  for (;;) Line 1053  for (;;)
1053          RRETURN(rrc);          RRETURN(rrc);
1054          }          }
1055        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1056          md->mark = save_mark;
1057        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1058        }        }
1059      if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;  
1060      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
1061    
1062      /* Handle possessive capturing brackets with an unlimited repeat. We come      /* Handle possessive capturing brackets with an unlimited repeat. We come
# Line 941  for (;;) Line 1085  for (;;)
1085      if (offset < md->offset_max)      if (offset < md->offset_max)
1086        {        {
1087        matched_once = FALSE;        matched_once = FALSE;
1088        code_offset = ecode - md->start_code;        code_offset = (int)(ecode - md->start_code);
1089    
1090        save_offset1 = md->offset_vector[offset];        save_offset1 = md->offset_vector[offset];
1091        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
# Line 964  for (;;) Line 1108  for (;;)
1108          md->offset_vector[md->offset_end - number] =          md->offset_vector[md->offset_end - number] =
1109            (int)(eptr - md->start_subject);            (int)(eptr - md->start_subject);
1110          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1111          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1112            eptrb, RM63);            eptrb, RM63);
1113          if (rrc == MATCH_KETRPOS)          if (rrc == MATCH_KETRPOS)
1114            {            {
# Line 975  for (;;) Line 1119  for (;;)
1119            matched_once = TRUE;            matched_once = TRUE;
1120            continue;            continue;
1121            }            }
1122          if (rrc != MATCH_NOMATCH &&  
1123              (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* See comment in the code for capturing groups above about handling
1124            RRETURN(rrc);          THEN. */
1125    
1126            if (rrc == MATCH_THEN)
1127              {
1128              next = ecode + GET(ecode,1);
1129              if (md->start_match_ptr < next &&
1130                  (*ecode == OP_ALT || *next == OP_ALT))
1131                rrc = MATCH_NOMATCH;
1132              }
1133    
1134            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1135          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
1136          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
1137          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
# Line 990  for (;;) Line 1144  for (;;)
1144          md->offset_vector[md->offset_end - number] = save_offset3;          md->offset_vector[md->offset_end - number] = save_offset3;
1145          }          }
1146    
       if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;  
1147        if (allow_zero || matched_once)        if (allow_zero || matched_once)
1148          {          {
1149          ecode += 1 + LINK_SIZE;          ecode += 1 + LINK_SIZE;
# Line 1022  for (;;) Line 1175  for (;;)
1175    
1176      POSSESSIVE_NON_CAPTURE:      POSSESSIVE_NON_CAPTURE:
1177      matched_once = FALSE;      matched_once = FALSE;
1178      code_offset = ecode - md->start_code;      code_offset = (int)(ecode - md->start_code);
1179    
1180      for (;;)      for (;;)
1181        {        {
1182        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1183        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1184          eptrb, RM48);          eptrb, RM48);
1185        if (rrc == MATCH_KETRPOS)        if (rrc == MATCH_KETRPOS)
1186          {          {
# Line 1037  for (;;) Line 1190  for (;;)
1190          matched_once = TRUE;          matched_once = TRUE;
1191          continue;          continue;
1192          }          }
1193        if (rrc != MATCH_NOMATCH &&  
1194            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* See comment in the code for capturing groups above about handling
1195          RRETURN(rrc);        THEN. */
1196    
1197          if (rrc == MATCH_THEN)
1198            {
1199            next = ecode + GET(ecode,1);
1200            if (md->start_match_ptr < next &&
1201                (*ecode == OP_ALT || *next == OP_ALT))
1202              rrc = MATCH_NOMATCH;
1203            }
1204    
1205          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1206        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1207        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1208        }        }
# Line 1067  for (;;) Line 1230  for (;;)
1230    
1231      if (ecode[LINK_SIZE+1] == OP_CALLOUT)      if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1232        {        {
1233        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
1234          {          {
1235          pcre_callout_block cb;          PUBL(callout_block) cb;
1236          cb.version          = 2;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1237          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1238          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1239    #ifdef COMPILE_PCRE8
1240          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
1241    #else
1242            cb.subject          = (PCRE_SPTR16)md->start_subject;
1243    #endif
1244          cb.subject_length   = (int)(md->end_subject - md->start_subject);          cb.subject_length   = (int)(md->end_subject - md->start_subject);
1245          cb.start_match      = (int)(mstart - md->start_subject);          cb.start_match      = (int)(mstart - md->start_subject);
1246          cb.current_position = (int)(eptr - md->start_subject);          cb.current_position = (int)(eptr - md->start_subject);
# Line 1082  for (;;) Line 1249  for (;;)
1249          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1250          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1251          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1252          cb.mark             = markptr;          cb.mark             = md->nomatch_mark;
1253          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1254          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1255          }          }
1256        ecode += _pcre_OP_lengths[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1257        }        }
1258    
1259      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1103  for (;;) Line 1270  for (;;)
1270        else        else
1271          {          {
1272          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1273          condition =  (recno == RREF_ANY || recno == md->recursive->group_num);          condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1274    
1275          /* If the test is for recursion into a specific subpattern, and it is          /* If the test is for recursion into a specific subpattern, and it is
1276          false, but the test was set up by name, scan the table to see if the          false, but the test was set up by name, scan the table to see if the
1277          name refers to any other numbers, and test them. The condition is true          name refers to any other numbers, and test them. The condition is true
1278          if any one is set. */          if any one is set. */
1279    
1280          if (!condition && condcode == OP_NRREF && recno != RREF_ANY)          if (!condition && condcode == OP_NRREF)
1281            {            {
1282            uschar *slotA = md->name_table;            pcre_uchar *slotA = md->name_table;
1283            for (i = 0; i < md->name_count; i++)            for (i = 0; i < md->name_count; i++)
1284              {              {
1285              if (GET2(slotA, 0) == recno) break;              if (GET2(slotA, 0) == recno) break;
# Line 1125  for (;;) Line 1292  for (;;)
1292    
1293            if (i < md->name_count)            if (i < md->name_count)
1294              {              {
1295              uschar *slotB = slotA;              pcre_uchar *slotB = slotA;
1296              while (slotB > md->name_table)              while (slotB > md->name_table)
1297                {                {
1298                slotB -= md->name_entry_size;                slotB -= md->name_entry_size;
1299                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1300                  {                  {
1301                  condition = GET2(slotB, 0) == md->recursive->group_num;                  condition = GET2(slotB, 0) == md->recursive->group_num;
1302                  if (condition) break;                  if (condition) break;
# Line 1145  for (;;) Line 1312  for (;;)
1312                for (i++; i < md->name_count; i++)                for (i++; i < md->name_count; i++)
1313                  {                  {
1314                  slotB += md->name_entry_size;                  slotB += md->name_entry_size;
1315                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                  if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1316                    {                    {
1317                    condition = GET2(slotB, 0) == md->recursive->group_num;                    condition = GET2(slotB, 0) == md->recursive->group_num;
1318                    if (condition) break;                    if (condition) break;
# Line 1158  for (;;) Line 1325  for (;;)
1325    
1326          /* Chose branch according to the condition */          /* Chose branch according to the condition */
1327    
1328          ecode += condition? 3 : GET(ecode, 1);          ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1329          }          }
1330        }        }
1331    
# Line 1175  for (;;) Line 1342  for (;;)
1342        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
1343          {          {
1344          int refno = offset >> 1;          int refno = offset >> 1;
1345          uschar *slotA = md->name_table;          pcre_uchar *slotA = md->name_table;
1346    
1347          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
1348            {            {
# Line 1189  for (;;) Line 1356  for (;;)
1356    
1357          if (i < md->name_count)          if (i < md->name_count)
1358            {            {
1359            uschar *slotB = slotA;            pcre_uchar *slotB = slotA;
1360            while (slotB > md->name_table)            while (slotB > md->name_table)
1361              {              {
1362              slotB -= md->name_entry_size;              slotB -= md->name_entry_size;
1363              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1364                {                {
1365                offset = GET2(slotB, 0) << 1;                offset = GET2(slotB, 0) << 1;
1366                condition = offset < offset_top &&                condition = offset < offset_top &&
# Line 1211  for (;;) Line 1378  for (;;)
1378              for (i++; i < md->name_count; i++)              for (i++; i < md->name_count; i++)
1379                {                {
1380                slotB += md->name_entry_size;                slotB += md->name_entry_size;
1381                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1382                  {                  {
1383                  offset = GET2(slotB, 0) << 1;                  offset = GET2(slotB, 0) << 1;
1384                  condition = offset < offset_top &&                  condition = offset < offset_top &&
# Line 1226  for (;;) Line 1393  for (;;)
1393    
1394        /* Chose branch according to the condition */        /* Chose branch according to the condition */
1395    
1396        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1397        }        }
1398    
1399      else if (condcode == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
# Line 1251  for (;;) Line 1418  for (;;)
1418          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1419          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1420          }          }
1421        else if (rrc != MATCH_NOMATCH &&  
1422                (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1423          assertion; it is therefore treated as NOMATCH. */
1424    
1425          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1426          {          {
1427          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1428          }          }
# Line 1263  for (;;) Line 1433  for (;;)
1433          }          }
1434        }        }
1435    
1436      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one, can
1437      we used to use tail recursion to avoid using another stack frame, except      use tail recursion to avoid using another stack frame, except when there is
1438      when there was unlimited repeat of a possibly empty group. However, that      unlimited repeat of a possibly empty group. In the latter case, a recursive
1439      strategy no longer works because of the possibilty of (*THEN) being      call to match() is always required, unless the second alternative doesn't
1440      encountered in the branch. A recursive call to match() is always required,      exist, in which case we can just plough on. Note that, for compatibility
1441      unless the second alternative doesn't exist, in which case we can just      with Perl, the | in a conditional group is NOT treated as creating two
1442      plough on. */      alternatives. If a THEN is encountered in the branch, it propagates out to
1443        the enclosing alternative (unless nested in a deeper set of alternatives,
1444        of course). */
1445    
1446      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1447        {        {
1448        if (op == OP_SCOND) md->match_function_type = MATCH_CBEGROUP;        if (op != OP_SCOND)
1449            {
1450            ecode += 1 + LINK_SIZE;
1451            goto TAIL_RECURSE;
1452            }
1453    
1454          md->match_function_type = MATCH_CBEGROUP;
1455        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
       if (rrc == MATCH_THEN && md->start_match_ptr == ecode)  
         rrc = MATCH_NOMATCH;  
1456        RRETURN(rrc);        RRETURN(rrc);
1457        }        }
1458      else                         /* Condition false & no alternative */  
1459         /* Condition false & no alternative; continue after the group. */
1460    
1461        else
1462        {        {
1463        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1464        }        }
# Line 1306  for (;;) Line 1485  for (;;)
1485        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1486        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1487        }        }
1488      ecode += 3;      ecode += 1 + IMM2_SIZE;
1489      break;      break;
1490    
1491    
# Line 1326  for (;;) Line 1505  for (;;)
1505           (md->notempty ||           (md->notempty ||
1506             (md->notempty_atstart &&             (md->notempty_atstart &&
1507               mstart == md->start_subject + md->start_offset)))               mstart == md->start_subject + md->start_offset)))
1508        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1509    
1510      /* Otherwise, we have a match. */      /* Otherwise, we have a match. */
1511    
# Line 1335  for (;;) Line 1514  for (;;)
1514      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1515    
1516      /* For some reason, the macros don't work properly if an expression is      /* For some reason, the macros don't work properly if an expression is
1517      given as the argument to MRRETURN when the heap is in use. */      given as the argument to RRETURN when the heap is in use. */
1518    
1519      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1520      MRRETURN(rrc);      RRETURN(rrc);
1521    
1522      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
1523      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
# Line 1353  for (;;) Line 1532  for (;;)
1532    
1533      case OP_ASSERT:      case OP_ASSERT:
1534      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1535        save_mark = md->mark;
1536      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1537        {        {
1538        condassert = TRUE;        condassert = TRUE;
# Line 1366  for (;;) Line 1546  for (;;)
1546        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1547          {          {
1548          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
         markptr = md->mark;  
1549          break;          break;
1550          }          }
1551        if (rrc != MATCH_NOMATCH &&  
1552            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1553          RRETURN(rrc);        as NOMATCH. */
1554    
1555          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1556        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1557          md->mark = save_mark;
1558        }        }
1559      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1560    
1561      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1562    
1563      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1564    
# Line 1396  for (;;) Line 1578  for (;;)
1578    
1579      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1580      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1581        save_mark = md->mark;
1582      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1583        {        {
1584        condassert = TRUE;        condassert = TRUE;
# Line 1406  for (;;) Line 1589  for (;;)
1589      do      do
1590        {        {
1591        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1592        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);        md->mark = save_mark;
1593          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1594        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1595          {          {
1596          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1597          break;          break;
1598          }          }
1599        if (rrc != MATCH_NOMATCH &&  
1600            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1601          RRETURN(rrc);        as NOMATCH. */
1602    
1603          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1604        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1605        }        }
1606      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1430  for (;;) Line 1616  for (;;)
1616      back a number of characters, not bytes. */      back a number of characters, not bytes. */
1617    
1618      case OP_REVERSE:      case OP_REVERSE:
1619  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1620      if (utf8)      if (utf)
1621        {        {
1622        i = GET(ecode, 1);        i = GET(ecode, 1);
1623        while (i-- > 0)        while (i-- > 0)
1624          {          {
1625          eptr--;          eptr--;
1626          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1627          BACKCHAR(eptr);          BACKCHAR(eptr);
1628          }          }
1629        }        }
# Line 1448  for (;;) Line 1634  for (;;)
1634    
1635        {        {
1636        eptr -= GET(ecode, 1);        eptr -= GET(ecode, 1);
1637        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1638        }        }
1639    
1640      /* Save the earliest consulted character, then skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
# Line 1462  for (;;) Line 1648  for (;;)
1648      function is able to force a failure. */      function is able to force a failure. */
1649    
1650      case OP_CALLOUT:      case OP_CALLOUT:
1651      if (pcre_callout != NULL)      if (PUBL(callout) != NULL)
1652        {        {
1653        pcre_callout_block cb;        PUBL(callout_block) cb;
1654        cb.version          = 2;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1655        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1656        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1657    #ifdef COMPILE_PCRE8
1658        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1659    #else
1660          cb.subject          = (PCRE_SPTR16)md->start_subject;
1661    #endif
1662        cb.subject_length   = (int)(md->end_subject - md->start_subject);        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1663        cb.start_match      = (int)(mstart - md->start_subject);        cb.start_match      = (int)(mstart - md->start_subject);
1664        cb.current_position = (int)(eptr - md->start_subject);        cb.current_position = (int)(eptr - md->start_subject);
# Line 1477  for (;;) Line 1667  for (;;)
1667        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1668        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1669        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1670        cb.mark             = markptr;        cb.mark             = md->nomatch_mark;
1671        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1672        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1673        }        }
1674      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 1505  for (;;) Line 1695  for (;;)
1695        {        {
1696        recursion_info *ri;        recursion_info *ri;
1697        int recno;        int recno;
1698    
1699        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1700        recno = (callpat == md->start_code)? 0 :        recno = (callpat == md->start_code)? 0 :
1701          GET2(callpat, 1 + LINK_SIZE);          GET2(callpat, 1 + LINK_SIZE);
1702    
1703        /* Check for repeating a recursion without advancing the subject pointer.        /* Check for repeating a recursion without advancing the subject pointer.
1704        This should catch convoluted mutual recursions. (Some simple cases are        This should catch convoluted mutual recursions. (Some simple cases are
1705        caught at compile time.) */        caught at compile time.) */
1706    
1707        for (ri = md->recursive; ri != NULL; ri = ri->prevrec)        for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1708          if (recno == ri->group_num && eptr == ri->subject_position)          if (recno == ri->group_num && eptr == ri->subject_position)
1709            RRETURN(PCRE_ERROR_RECURSELOOP);            RRETURN(PCRE_ERROR_RECURSELOOP);
1710    
1711        /* Add to "recursing stack" */        /* Add to "recursing stack" */
# Line 1537  for (;;) Line 1727  for (;;)
1727        else        else
1728          {          {
1729          new_recursive.offset_save =          new_recursive.offset_save =
1730            (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));            (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1731          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1732          }          }
1733        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
# Line 1552  for (;;) Line 1742  for (;;)
1742        do        do
1743          {          {
1744          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1745          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1746            md, eptrb, RM6);            md, eptrb, RM6);
1747          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1748              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1749            md->recursive = new_recursive.prevrec;
1750          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1751            {            {
1752            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
           md->recursive = new_recursive.prevrec;  
1753            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1754              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1755    
1756            /* Set where we got to in the subject, and reset the start in case            /* Set where we got to in the subject, and reset the start in case
1757            it was changed by \K. This *is* propagated back out of a recursion,            it was changed by \K. This *is* propagated back out of a recursion,
# Line 1571  for (;;) Line 1761  for (;;)
1761            mstart = md->start_match_ptr;            mstart = md->start_match_ptr;
1762            goto RECURSION_MATCHED;        /* Exit loop; end processing */            goto RECURSION_MATCHED;        /* Exit loop; end processing */
1763            }            }
1764          else if (rrc != MATCH_NOMATCH &&  
1765                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* PCRE does not allow THEN to escape beyond a recursion; it is treated
1766            as NOMATCH. */
1767    
1768            else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1769            {            {
1770            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1771            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1772              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1773            RRETURN(rrc);            RRETURN(rrc);
1774            }            }
1775    
# Line 1588  for (;;) Line 1781  for (;;)
1781        DPRINTF(("Recursion didn't match\n"));        DPRINTF(("Recursion didn't match\n"));
1782        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1783        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1784          (pcre_free)(new_recursive.offset_save);          (PUBL(free))(new_recursive.offset_save);
1785        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1786        }        }
1787    
1788      RECURSION_MATCHED:      RECURSION_MATCHED:
# Line 1658  for (;;) Line 1851  for (;;)
1851        }        }
1852      else saved_eptr = NULL;      else saved_eptr = NULL;
1853    
1854      /* If we are at the end of an assertion group, stop matching and return      /* If we are at the end of an assertion group or a non-capturing atomic
1855      MATCH_MATCH, but record the current high water mark for use by positive      group, stop matching and return MATCH_MATCH, but record the current high
1856      assertions. We also need to record the match start in case it was changed      water mark for use by positive assertions. We also need to record the match
1857      by \K. */      start in case it was changed by \K. */
1858    
1859      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1860          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT)           *prev == OP_ONCE_NC)
1861        {        {
1862        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE_NC */
1863        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1864        md->start_match_ptr = mstart;        md->start_match_ptr = mstart;
1865        MRRETURN(MATCH_MATCH);         /* Sets md->mark */        RRETURN(MATCH_MATCH);         /* Sets md->mark */
1866        }        }
1867    
1868      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
# Line 1734  for (;;) Line 1927  for (;;)
1927      /* For an ordinary non-repeating ket, just continue at this level. This      /* For an ordinary non-repeating ket, just continue at this level. This
1928      also happens for a repeating ket if no characters were matched in the      also happens for a repeating ket if no characters were matched in the
1929      group. This is the forcible breaking of infinite loops as implemented in      group. This is the forcible breaking of infinite loops as implemented in
1930      Perl 5.005. For a non-repeating atomic group, establish a backup point by      Perl 5.005. For a non-repeating atomic group that includes captures,
1931      processing the rest of the pattern at a lower level. If this results in a      establish a backup point by processing the rest of the pattern at a lower
1932      NOMATCH return, pass MATCH_ONCE back to the original OP_ONCE level, thereby      level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1933      bypassing intermediate backup points, but resetting any captures that      original OP_ONCE level, thereby bypassing intermediate backup points, but
1934      happened along the way. */      resetting any captures that happened along the way. */
1935    
1936      if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1937        {        {
# Line 1811  for (;;) Line 2004  for (;;)
2004      /* Not multiline mode: start of subject assertion, unless notbol. */      /* Not multiline mode: start of subject assertion, unless notbol. */
2005    
2006      case OP_CIRC:      case OP_CIRC:
2007      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2008    
2009      /* Start of subject assertion */      /* Start of subject assertion */
2010    
2011      case OP_SOD:      case OP_SOD:
2012      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2013      ecode++;      ecode++;
2014      break;      break;
2015    
2016      /* Multiline mode: start of subject unless notbol, or after any newline. */      /* Multiline mode: start of subject unless notbol, or after any newline. */
2017    
2018      case OP_CIRCM:      case OP_CIRCM:
2019      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2020      if (eptr != md->start_subject &&      if (eptr != md->start_subject &&
2021          (eptr == md->end_subject || !WAS_NEWLINE(eptr)))          (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2022        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2023      ecode++;      ecode++;
2024      break;      break;
2025    
2026      /* Start of match assertion */      /* Start of match assertion */
2027    
2028      case OP_SOM:      case OP_SOM:
2029      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2030      ecode++;      ecode++;
2031      break;      break;
2032    
# Line 1849  for (;;) Line 2042  for (;;)
2042    
2043      case OP_DOLLM:      case OP_DOLLM:
2044      if (eptr < md->end_subject)      if (eptr < md->end_subject)
2045        { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }        { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
2046      else      else
2047        {        {
2048        if (md->noteol) MRRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
2049        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2050        }        }
2051      ecode++;      ecode++;
# Line 1862  for (;;) Line 2055  for (;;)
2055      subject unless noteol is set. */      subject unless noteol is set. */
2056    
2057      case OP_DOLL:      case OP_DOLL:
2058      if (md->noteol) MRRETURN(MATCH_NOMATCH);      if (md->noteol) RRETURN(MATCH_NOMATCH);
2059      if (!md->endonly) goto ASSERT_NL_OR_EOS;      if (!md->endonly) goto ASSERT_NL_OR_EOS;
2060    
2061      /* ... else fall through for endonly */      /* ... else fall through for endonly */
# Line 1870  for (;;) Line 2063  for (;;)
2063      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
2064    
2065      case OP_EOD:      case OP_EOD:
2066      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2067      SCHECK_PARTIAL();      SCHECK_PARTIAL();
2068      ecode++;      ecode++;
2069      break;      break;
# Line 1881  for (;;) Line 2074  for (;;)
2074      ASSERT_NL_OR_EOS:      ASSERT_NL_OR_EOS:
2075      if (eptr < md->end_subject &&      if (eptr < md->end_subject &&
2076          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2077        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2078    
2079      /* Either at end of string or \n before end. */      /* Either at end of string or \n before end. */
2080    
# Line 1900  for (;;) Line 2093  for (;;)
2093        be "non-word" characters. Remember the earliest consulted character for        be "non-word" characters. Remember the earliest consulted character for
2094        partial matching. */        partial matching. */
2095    
2096  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2097        if (utf8)        if (utf)
2098          {          {
2099          /* Get status of previous character */          /* Get status of previous character */
2100    
2101          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
2102            {            {
2103            USPTR lastptr = eptr - 1;            PCRE_PUCHAR lastptr = eptr - 1;
2104            while((*lastptr & 0xc0) == 0x80) lastptr--;            BACKCHAR(lastptr);
2105            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2106            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
2107  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 1973  for (;;) Line 2166  for (;;)
2166              }              }
2167            else            else
2168  #endif  #endif
2169            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = MAX_255(eptr[-1])
2170                && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2171            }            }
2172    
2173          /* Get status of next character */          /* Get status of next character */
# Line 1996  for (;;) Line 2190  for (;;)
2190            }            }
2191          else          else
2192  #endif  #endif
2193          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          cur_is_word = MAX_255(*eptr)
2194              && ((md->ctypes[*eptr] & ctype_word) != 0);
2195          }          }
2196    
2197        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
2198    
2199        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
2200             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2201          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2202        }        }
2203      break;      break;
2204    
2205      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
2206    
2207      case OP_ANY:      case OP_ANY:
2208      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2209      /* Fall through */      /* Fall through */
2210    
2211      case OP_ALLANY:      case OP_ALLANY:
2212      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2213        {                            /* not be updated before SCHECK_PARTIAL. */        {                            /* not be updated before SCHECK_PARTIAL. */
2214        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2215        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2216        }        }
2217      eptr++;      eptr++;
2218      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  #ifdef SUPPORT_UTF
2219        if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2220    #endif
2221      ecode++;      ecode++;
2222      break;      break;
2223    
# Line 2031  for (;;) Line 2228  for (;;)
2228      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2229        {                            /* not be updated before SCHECK_PARTIAL. */        {                            /* not be updated before SCHECK_PARTIAL. */
2230        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2231        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2232        }        }
2233      eptr++;      eptr++;
2234      ecode++;      ecode++;
2235      break;      break;
2236    
# Line 2041  for (;;) Line 2238  for (;;)
2238      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2239        {        {
2240        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2241        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2242        }        }
2243      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2244      if (      if (
2245  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2246         c < 256 &&         c < 256 &&
2247  #endif  #endif
2248         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
2249         )         )
2250        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2251      ecode++;      ecode++;
2252      break;      break;
2253    
# Line 2058  for (;;) Line 2255  for (;;)
2255      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2256        {        {
2257        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2258        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2259        }        }
2260      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2261      if (      if (
2262  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2263         c >= 256 ||         c > 255 ||
2264  #endif  #endif
2265         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
2266         )         )
2267        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2268      ecode++;      ecode++;
2269      break;      break;
2270    
# Line 2075  for (;;) Line 2272  for (;;)
2272      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2273        {        {
2274        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2275        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2276        }        }
2277      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2278      if (      if (
2279  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2280         c < 256 &&         c < 256 &&
2281  #endif  #endif
2282         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
2283         )         )
2284        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2285      ecode++;      ecode++;
2286      break;      break;
2287    
# Line 2092  for (;;) Line 2289  for (;;)
2289      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2290        {        {
2291        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2292        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2293        }        }
2294      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2295      if (      if (
2296  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2297         c >= 256 ||         c > 255 ||
2298  #endif  #endif
2299         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
2300         )         )
2301        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2302      ecode++;      ecode++;
2303      break;      break;
2304    
# Line 2109  for (;;) Line 2306  for (;;)
2306      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2307        {        {
2308        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2309        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2310        }        }
2311      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2312      if (      if (
2313  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2314         c < 256 &&         c < 256 &&
2315  #endif  #endif
2316         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
2317         )         )
2318        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2319      ecode++;      ecode++;
2320      break;      break;
2321    
# Line 2126  for (;;) Line 2323  for (;;)
2323      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2324        {        {
2325        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2326        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2327        }        }
2328      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2329      if (      if (
2330  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2331         c >= 256 ||         c > 255 ||
2332  #endif  #endif
2333         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
2334         )         )
2335        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2336      ecode++;      ecode++;
2337      break;      break;
2338    
# Line 2143  for (;;) Line 2340  for (;;)
2340      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2341        {        {
2342        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2343        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2344        }        }
2345      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2346      switch(c)      switch(c)
2347        {        {
2348        default: MRRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2349    
2350        case 0x000d:        case 0x000d:
2351        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
# Line 2162  for (;;) Line 2359  for (;;)
2359        case 0x0085:        case 0x0085:
2360        case 0x2028:        case 0x2028:
2361        case 0x2029:        case 0x2029:
2362        if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);        if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2363        break;        break;
2364        }        }
2365      ecode++;      ecode++;
# Line 2172  for (;;) Line 2369  for (;;)
2369      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2370        {        {
2371        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2372        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2373        }        }
2374      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2375      switch(c)      switch(c)
# Line 2197  for (;;) Line 2394  for (;;)
2394        case 0x202f:    /* NARROW NO-BREAK SPACE */        case 0x202f:    /* NARROW NO-BREAK SPACE */
2395        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2396        case 0x3000:    /* IDEOGRAPHIC SPACE */        case 0x3000:    /* IDEOGRAPHIC SPACE */
2397        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2398        }        }
2399      ecode++;      ecode++;
2400      break;      break;
# Line 2206  for (;;) Line 2403  for (;;)
2403      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2404        {        {
2405        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2406        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2407        }        }
2408      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2409      switch(c)      switch(c)
2410        {        {
2411        default: MRRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2412        case 0x09:      /* HT */        case 0x09:      /* HT */
2413        case 0x20:      /* SPACE */        case 0x20:      /* SPACE */
2414        case 0xa0:      /* NBSP */        case 0xa0:      /* NBSP */
# Line 2240  for (;;) Line 2437  for (;;)
2437      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2438        {        {
2439        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2440        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2441        }        }
2442      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2443      switch(c)      switch(c)
# Line 2253  for (;;) Line 2450  for (;;)
2450        case 0x85:      /* NEL */        case 0x85:      /* NEL */
2451        case 0x2028:    /* LINE SEPARATOR */        case 0x2028:    /* LINE SEPARATOR */
2452        case 0x2029:    /* PARAGRAPH SEPARATOR */        case 0x2029:    /* PARAGRAPH SEPARATOR */
2453        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2454        }        }
2455      ecode++;      ecode++;
2456      break;      break;
# Line 2262  for (;;) Line 2459  for (;;)
2459      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2460        {        {
2461        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2462        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2463        }        }
2464      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2465      switch(c)      switch(c)
2466        {        {
2467        default: MRRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2468        case 0x0a:      /* LF */        case 0x0a:      /* LF */
2469        case 0x0b:      /* VT */        case 0x0b:      /* VT */
2470        case 0x0c:      /* FF */        case 0x0c:      /* FF */
# Line 2289  for (;;) Line 2486  for (;;)
2486      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2487        {        {
2488        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2489        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2490        }        }
2491      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2492        {        {
# Line 2298  for (;;) Line 2495  for (;;)
2495        switch(ecode[1])        switch(ecode[1])
2496          {          {
2497          case PT_ANY:          case PT_ANY:
2498          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2499          break;          break;
2500    
2501          case PT_LAMP:          case PT_LAMP:
2502          if ((prop->chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2503               prop->chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2504               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2505            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2506          break;          break;
2507    
2508          case PT_GC:          case PT_GC:
2509          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2510            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2511          break;          break;
2512    
2513          case PT_PC:          case PT_PC:
2514          if ((ecode[2] != prop->chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2515            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2516          break;          break;
2517    
2518          case PT_SC:          case PT_SC:
2519          if ((ecode[2] != prop->script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2520            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2521          break;          break;
2522    
2523          /* These are specials */          /* These are specials */
2524    
2525          case PT_ALNUM:          case PT_ALNUM:
2526          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2527               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2528            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2529          break;          break;
2530    
2531          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2532          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2533               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2534                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2535            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2536          break;          break;
2537    
2538          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2539          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2540               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2541               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2542                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2543            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2544          break;          break;
2545    
2546          case PT_WORD:          case PT_WORD:
2547          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2548               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2549               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2550            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2551          break;          break;
2552    
2553          /* This should never occur */          /* This should never occur */
# Line 2370  for (;;) Line 2567  for (;;)
2567      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2568        {        {
2569        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2570        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2571        }        }
2572      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2573      if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH);      if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
2574      while (eptr < md->end_subject)      while (eptr < md->end_subject)
2575        {        {
2576        int len = 1;        int len = 1;
2577        if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }        if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2578        if (UCD_CATEGORY(c) != ucp_M) break;        if (UCD_CATEGORY(c) != ucp_M) break;
2579        eptr += len;        eptr += len;
2580        }        }
# Line 2398  for (;;) Line 2595  for (;;)
2595      case OP_REFI:      case OP_REFI:
2596      caseless = op == OP_REFI;      caseless = op == OP_REFI;
2597      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2598      ecode += 3;      ecode += 1 + IMM2_SIZE;
2599    
2600      /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2601    
# Line 2438  for (;;) Line 2635  for (;;)
2635        case OP_CRMINRANGE:        case OP_CRMINRANGE:
2636        minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2637        min = GET2(ecode, 1);        min = GET2(ecode, 1);
2638        max = GET2(ecode, 3);        max = GET2(ecode, 1 + IMM2_SIZE);
2639        if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2640        ecode += 5;        ecode += 1 + 2 * IMM2_SIZE;
2641        break;        break;
2642    
2643        default:               /* No repeat follows */        default:               /* No repeat follows */
2644        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2645          {          {
2646          CHECK_PARTIAL();          CHECK_PARTIAL();
2647          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2648          }          }
2649        eptr += length;        eptr += length;
2650        continue;              /* With the main loop */        continue;              /* With the main loop */
2651        }        }
2652    
2653      /* Handle repeated back references. If the length of the reference is      /* Handle repeated back references. If the length of the reference is
2654      zero, just continue with the main loop. */      zero, just continue with the main loop. If the length is negative, it
2655        means the reference is unset in non-Java-compatible mode. If the minimum is
2656        zero, we can continue at the same level without recursion. For any other
2657        minimum, carrying on will result in NOMATCH. */
2658    
2659      if (length == 0) continue;      if (length == 0) continue;
2660        if (length < 0 && min == 0) continue;
2661    
2662      /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2663      the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
# Line 2468  for (;;) Line 2669  for (;;)
2669        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2670          {          {
2671          CHECK_PARTIAL();          CHECK_PARTIAL();
2672          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2673          }          }
2674        eptr += slength;        eptr += slength;
2675        }        }
# Line 2487  for (;;) Line 2688  for (;;)
2688          int slength;          int slength;
2689          RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2690          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2691          if (fi >= max) MRRETURN(MATCH_NOMATCH);          if (fi >= max) RRETURN(MATCH_NOMATCH);
2692          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2693            {            {
2694            CHECK_PARTIAL();            CHECK_PARTIAL();
2695            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2696            }            }
2697          eptr += slength;          eptr += slength;
2698          }          }
# Line 2519  for (;;) Line 2720  for (;;)
2720          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2721          eptr -= length;          eptr -= length;
2722          }          }
2723        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2724        }        }
2725      /* Control never gets here */      /* Control never gets here */
2726    
# Line 2537  for (;;) Line 2738  for (;;)
2738      case OP_NCLASS:      case OP_NCLASS:
2739      case OP_CLASS:      case OP_CLASS:
2740        {        {
2741          /* The data variable is saved across frames, so the byte map needs to
2742          be stored there. */
2743    #define BYTE_MAP ((pcre_uint8 *)data)
2744        data = ecode + 1;                /* Save for matching */        data = ecode + 1;                /* Save for matching */
2745        ecode += 33;                     /* Advance past the item */        ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2746    
2747        switch (*ecode)        switch (*ecode)
2748          {          {
# Line 2559  for (;;) Line 2763  for (;;)
2763          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2764          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2765          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2766          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2767          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2768          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2769          break;          break;
2770    
2771          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2571  for (;;) Line 2775  for (;;)
2775    
2776        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
2777    
2778  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2779        /* UTF-8 mode */        if (utf)
       if (utf8)  
2780          {          {
2781          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2782            {            {
2783            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2784              {              {
2785              SCHECK_PARTIAL();              SCHECK_PARTIAL();
2786              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2787              }              }
2788            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2789            if (c > 255)            if (c > 255)
2790              {              {
2791              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2792              }              }
2793            else            else
2794              {              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  
             }  
2795            }            }
2796          }          }
2797        else        else
2798  #endif  #endif
2799        /* Not UTF-8 mode */        /* Not UTF mode */
2800          {          {
2801          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2802            {            {
2803            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2804              {              {
2805              SCHECK_PARTIAL();              SCHECK_PARTIAL();
2806              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2807              }              }
2808            c = *eptr++;            c = *eptr++;
2809            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2810              if (c > 255)
2811                {
2812                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2813                }
2814              else
2815    #endif
2816                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2817            }            }
2818          }          }
2819    
# Line 2619  for (;;) Line 2827  for (;;)
2827    
2828        if (minimize)        if (minimize)
2829          {          {
2830  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2831          /* UTF-8 mode */          if (utf)
         if (utf8)  
2832            {            {
2833            for (fi = min;; fi++)            for (fi = min;; fi++)
2834              {              {
2835              RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2836              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2837              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2838              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2839                {                {
2840                SCHECK_PARTIAL();                SCHECK_PARTIAL();
2841                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2842                }                }
2843              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2844              if (c > 255)              if (c > 255)
2845                {                {
2846                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2847                }                }
2848              else              else
2849                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
               if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  
               }  
2850              }              }
2851            }            }
2852          else          else
2853  #endif  #endif
2854          /* Not UTF-8 mode */          /* Not UTF mode */
2855            {            {
2856            for (fi = min;; fi++)            for (fi = min;; fi++)
2857              {              {
2858              RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2859              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2860              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2861              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2862                {                {
2863                SCHECK_PARTIAL();                SCHECK_PARTIAL();
2864                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2865                }                }
2866              c = *eptr++;              c = *eptr++;
2867              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2868                if (c > 255)
2869                  {
2870                  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2871                  }
2872                else
2873    #endif
2874                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2875              }              }
2876            }            }
2877          /* Control never gets here */          /* Control never gets here */
# Line 2671  for (;;) Line 2883  for (;;)
2883          {          {
2884          pp = eptr;          pp = eptr;
2885    
2886  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2887          /* UTF-8 mode */          if (utf)
         if (utf8)  
2888            {            {
2889            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2890              {              {
# Line 2689  for (;;) Line 2900  for (;;)
2900                if (op == OP_CLASS) break;                if (op == OP_CLASS) break;
2901                }                }
2902              else              else
2903                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
               if ((data[c/8] & (1 << (c&7))) == 0) break;  
               }  
2904              eptr += len;              eptr += len;
2905              }              }
2906            for (;;)            for (;;)
# Line 2704  for (;;) Line 2913  for (;;)
2913            }            }
2914          else          else
2915  #endif  #endif
2916            /* Not UTF-8 mode */            /* Not UTF mode */
2917            {            {
2918            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2919              {              {
# Line 2714  for (;;) Line 2923  for (;;)
2923                break;                break;
2924                }                }
2925              c = *eptr;              c = *eptr;
2926              if ((data[c/8] & (1 << (c&7))) == 0) break;  #ifndef COMPILE_PCRE8
2927                if (c > 255)
2928                  {
2929                  if (op == OP_CLASS) break;
2930                  }
2931                else
2932    #endif
2933                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2934              eptr++;              eptr++;
2935              }              }
2936            while (eptr >= pp)            while (eptr >= pp)
# Line 2725  for (;;) Line 2941  for (;;)
2941              }              }
2942            }            }
2943    
2944          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2945          }          }
2946    #undef BYTE_MAP
2947        }        }
2948      /* Control never gets here */      /* Control never gets here */
2949    
# Line 2735  for (;;) Line 2952  for (;;)
2952      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2953      mode, because Unicode properties are supported in non-UTF-8 mode. */      mode, because Unicode properties are supported in non-UTF-8 mode. */
2954    
2955  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2956      case OP_XCLASS:      case OP_XCLASS:
2957        {        {
2958        data = ecode + 1 + LINK_SIZE;                /* Save for matching */        data = ecode + 1 + LINK_SIZE;                /* Save for matching */
# Line 2760  for (;;) Line 2977  for (;;)
2977          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2978          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2979          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2980          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2981          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2982          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2983          break;          break;
2984    
2985          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2777  for (;;) Line 2994  for (;;)
2994          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
2995            {            {
2996            SCHECK_PARTIAL();            SCHECK_PARTIAL();
2997            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2998            }            }
2999          GETCHARINCTEST(c, eptr);          GETCHARINCTEST(c, eptr);
3000          if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);          if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3001          }          }
3002    
3003        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 2797  for (;;) Line 3014  for (;;)
3014            {            {
3015            RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3016            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3017            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3018            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3019              {              {
3020              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3021              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3022              }              }
3023            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3024            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);            if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3025            }            }
3026          /* Control never gets here */          /* Control never gets here */
3027          }          }
# Line 2822  for (;;) Line 3039  for (;;)
3039              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3040              break;              break;
3041              }              }
3042    #ifdef SUPPORT_UTF
3043            GETCHARLENTEST(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
3044            if (!_pcre_xclass(c, data)) break;  #else
3045              c = *eptr;
3046    #endif
3047              if (!PRIV(xclass)(c, data, utf)) break;
3048            eptr += len;            eptr += len;
3049            }            }
3050          for(;;)          for(;;)
# Line 2831  for (;;) Line 3052  for (;;)
3052            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3053            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3054            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3055            if (utf8) BACKCHAR(eptr);  #ifdef SUPPORT_UTF
3056              if (utf) BACKCHAR(eptr);
3057    #endif
3058            }            }
3059          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3060          }          }
3061    
3062        /* Control never gets here */        /* Control never gets here */
# Line 2843  for (;;) Line 3066  for (;;)
3066      /* Match a single character, casefully */      /* Match a single character, casefully */
3067    
3068      case OP_CHAR:      case OP_CHAR:
3069  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3070      if (utf8)      if (utf)
3071        {        {
3072        length = 1;        length = 1;
3073        ecode++;        ecode++;
# Line 2852  for (;;) Line 3075  for (;;)
3075        if (length > md->end_subject - eptr)        if (length > md->end_subject - eptr)
3076          {          {
3077          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3078          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3079          }          }
3080        while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
3081        }        }
3082      else      else
3083  #endif  #endif
3084        /* Not UTF mode */
     /* Non-UTF-8 mode */  
3085        {        {
3086        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
3087          {          {
3088          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3089          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3090          }          }
3091        if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3092        ecode += 2;        ecode += 2;
3093        }        }
3094      break;      break;
3095    
3096      /* Match a single character, caselessly */      /* Match a single character, caselessly. If we are at the end of the
3097        subject, give up immediately. */
3098    
3099      case OP_CHARI:      case OP_CHARI:
3100  #ifdef SUPPORT_UTF8      if (eptr >= md->end_subject)
3101      if (utf8)        {
3102          SCHECK_PARTIAL();
3103          RRETURN(MATCH_NOMATCH);
3104          }
3105    
3106    #ifdef SUPPORT_UTF
3107        if (utf)
3108        {        {
3109        length = 1;        length = 1;
3110        ecode++;        ecode++;
3111        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
3112    
       if (length > md->end_subject - eptr)  
         {  
         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */  
         MRRETURN(MATCH_NOMATCH);  
         }  
   
3113        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
3114        can use the fast lookup table. */        we know that its other case must also be one byte long, so we can use the
3115          fast lookup table. We know that there is at least one byte left in the
3116          subject. */
3117    
3118        if (fc < 128)        if (fc < 128)
3119          {          {
3120          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);          if (md->lcc[fc]
3121                != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3122            ecode++;
3123            eptr++;
3124          }          }
3125    
3126        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character. Note that we cannot
3127          use the value of "length" to check for sufficient bytes left, because the
3128          other case of the character may have more or fewer bytes.  */
3129    
3130        else        else
3131          {          {
# Line 2911  for (;;) Line 3141  for (;;)
3141  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3142            if (dc != UCD_OTHERCASE(fc))            if (dc != UCD_OTHERCASE(fc))
3143  #endif  #endif
3144              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3145            }            }
3146          }          }
3147        }        }
3148      else      else
3149  #endif   /* SUPPORT_UTF8 */  #endif   /* SUPPORT_UTF */
3150    
3151      /* Non-UTF-8 mode */      /* Not UTF mode */
3152        {        {
3153        if (md->end_subject - eptr < 1)        if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3154          {            != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3155          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */        eptr++;
         MRRETURN(MATCH_NOMATCH);  
         }  
       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);  
3156        ecode += 2;        ecode += 2;
3157        }        }
3158      break;      break;
# Line 2935  for (;;) Line 3162  for (;;)
3162      case OP_EXACT:      case OP_EXACT:
3163      case OP_EXACTI:      case OP_EXACTI:
3164      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3165      ecode += 3;      ecode += 1 + IMM2_SIZE;
3166      goto REPEATCHAR;      goto REPEATCHAR;
3167    
3168      case OP_POSUPTO:      case OP_POSUPTO:
# Line 2950  for (;;) Line 3177  for (;;)
3177      min = 0;      min = 0;
3178      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3179      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3180      ecode += 3;      ecode += 1 + IMM2_SIZE;
3181      goto REPEATCHAR;      goto REPEATCHAR;
3182    
3183      case OP_POSSTAR:      case OP_POSSTAR:
# Line 2998  for (;;) Line 3225  for (;;)
3225      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. */
3226    
3227      REPEATCHAR:      REPEATCHAR:
3228  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3229      if (utf8)      if (utf)
3230        {        {
3231        length = 1;        length = 1;
3232        charptr = ecode;        charptr = ecode;
# Line 3015  for (;;) Line 3242  for (;;)
3242          unsigned int othercase;          unsigned int othercase;
3243          if (op >= OP_STARI &&     /* Caseless */          if (op >= OP_STARI &&     /* Caseless */
3244              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
3245            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = PRIV(ord2utf)(othercase, occhars);
3246          else oclength = 0;          else oclength = 0;
3247  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3248    
3249          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3250            {            {
3251            if (eptr <= md->end_subject - length &&            if (eptr <= md->end_subject - length &&
3252              memcmp(eptr, charptr, length) == 0) eptr += length;              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3253  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3254            else if (oclength > 0 &&            else if (oclength > 0 &&
3255                     eptr <= md->end_subject - oclength &&                     eptr <= md->end_subject - oclength &&
3256                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3257  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3258            else            else
3259              {              {
3260              CHECK_PARTIAL();              CHECK_PARTIAL();
3261              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3262              }              }
3263            }            }
3264    
# Line 3043  for (;;) Line 3270  for (;;)
3270              {              {
3271              RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3272              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3273              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3274              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3275                memcmp(eptr, charptr, length) == 0) eptr += length;                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3276  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3277              else if (oclength > 0 &&              else if (oclength > 0 &&
3278                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3279                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3280  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3281              else              else
3282                {                {
3283                CHECK_PARTIAL();                CHECK_PARTIAL();
3284                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3285                }                }
3286              }              }
3287            /* Control never gets here */            /* Control never gets here */
# Line 3066  for (;;) Line 3293  for (;;)
3293            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3294              {              {
3295              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3296                  memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3297  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3298              else if (oclength > 0 &&              else if (oclength > 0 &&
3299                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3300                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3301  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3302              else              else
3303                {                {
# Line 3085  for (;;) Line 3312  for (;;)
3312              {              {
3313              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3314              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3315              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }              if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
3316  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3317              eptr--;              eptr--;
3318              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 3102  for (;;) Line 3329  for (;;)
3329        value of fc will always be < 128. */        value of fc will always be < 128. */
3330        }        }
3331      else      else
3332  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3333          /* When not in UTF-8 mode, load a single-byte character. */
3334      /* When not in UTF-8 mode, load a single-byte character. */        fc = *ecode++;
3335    
3336      fc = *ecode++;      /* The value of fc at this point is always one character, though we may
3337        or may not be in UTF mode. The code is duplicated for the caseless and
     /* The value of fc at this point is always less than 256, though we may or  
     may not be in UTF-8 mode. The code is duplicated for the caseless and  
3338      caseful cases, for speed, since matching characters is likely to be quite      caseful cases, for speed, since matching characters is likely to be quite
3339      common. First, ensure the minimum number of matches are present. If min =      common. First, ensure the minimum number of matches are present. If min =
3340      max, continue at the same level without recursing. Otherwise, if      max, continue at the same level without recursing. Otherwise, if
# Line 3122  for (;;) Line 3347  for (;;)
3347    
3348      if (op >= OP_STARI)  /* Caseless */      if (op >= OP_STARI)  /* Caseless */
3349        {        {
3350        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3351          /* fc must be < 128 if UTF is enabled. */
3352          foc = md->fcc[fc];
3353    #else
3354    #ifdef SUPPORT_UTF
3355    #ifdef SUPPORT_UCP
3356          if (utf && fc > 127)
3357            foc = UCD_OTHERCASE(fc);
3358    #else
3359          if (utf && fc > 127)
3360            foc = fc;
3361    #endif /* SUPPORT_UCP */
3362          else
3363    #endif /* SUPPORT_UTF */
3364            foc = TABLE_GET(fc, md->fcc, fc);
3365    #endif /* COMPILE_PCRE8 */
3366    
3367        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3368          {          {
3369          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3370            {            {
3371            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3372            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3373            }            }
3374          if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);          if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3375            eptr++;
3376          }          }
3377        if (min == max) continue;        if (min == max) continue;
3378        if (minimize)        if (minimize)
# Line 3139  for (;;) Line 3381  for (;;)
3381            {            {
3382            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3383            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3384            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3385            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3386              {              {
3387              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3388              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3389              }              }
3390            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);            if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3391              eptr++;
3392            }            }
3393          /* Control never gets here */          /* Control never gets here */
3394          }          }
# Line 3159  for (;;) Line 3402  for (;;)
3402              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3403              break;              break;
3404              }              }
3405            if (fc != md->lcc[*eptr]) break;            if (fc != *eptr && foc != *eptr) break;
3406            eptr++;            eptr++;
3407            }            }
3408    
# Line 3171  for (;;) Line 3414  for (;;)
3414            eptr--;            eptr--;
3415            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3416            }            }
3417          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3418          }          }
3419        /* Control never gets here */        /* Control never gets here */
3420        }        }
# Line 3185  for (;;) Line 3428  for (;;)
3428          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3429            {            {
3430            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3431            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3432            }            }
3433          if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);          if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3434          }          }
3435    
3436        if (min == max) continue;        if (min == max) continue;
# Line 3198  for (;;) Line 3441  for (;;)
3441            {            {
3442            RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3443            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3444            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3445            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3446              {              {
3447              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3448              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3449              }              }
3450            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);            if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3451            }            }
3452          /* Control never gets here */          /* Control never gets here */
3453          }          }
# Line 3229  for (;;) Line 3472  for (;;)
3472            eptr--;            eptr--;
3473            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3474            }            }
3475          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3476          }          }
3477        }        }
3478      /* Control never gets here */      /* Control never gets here */
# Line 3242  for (;;) Line 3485  for (;;)
3485      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
3486        {        {
3487        SCHECK_PARTIAL();        SCHECK_PARTIAL();
3488        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
3489        }        }
3490      ecode++;      ecode++;
3491      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3492      if (op == OP_NOTI)         /* The caseless case */      if (op == OP_NOTI)         /* The caseless case */
3493        {        {
3494  #ifdef SUPPORT_UTF8        register int ch, och;
3495        if (c < 256)        ch = *ecode++;
3496  #endif  #ifdef COMPILE_PCRE8
3497        c = md->lcc[c];        /* ch must be < 128 if UTF is enabled. */
3498        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);        och = md->fcc[ch];
3499    #else
3500    #ifdef SUPPORT_UTF
3501    #ifdef SUPPORT_UCP
3502          if (utf && ch > 127)
3503            och = UCD_OTHERCASE(ch);
3504    #else
3505          if (utf && ch > 127)
3506            och = ch;
3507    #endif /* SUPPORT_UCP */
3508          else
3509    #endif /* SUPPORT_UTF */
3510            och = TABLE_GET(ch, md->fcc, ch);
3511    #endif /* COMPILE_PCRE8 */
3512          if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3513        }        }
3514      else    /* Caseful */      else    /* Caseful */
3515        {        {
3516        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
3517        }        }
3518      break;      break;
3519    
# Line 3270  for (;;) Line 3527  for (;;)
3527      case OP_NOTEXACT:      case OP_NOTEXACT:
3528      case OP_NOTEXACTI:      case OP_NOTEXACTI:
3529      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3530      ecode += 3;      ecode += 1 + IMM2_SIZE;
3531      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3532    
3533      case OP_NOTUPTO:      case OP_NOTUPTO:
# Line 3280  for (;;) Line 3537  for (;;)
3537      min = 0;      min = 0;
3538      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3539      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3540      ecode += 3;      ecode += 1 + IMM2_SIZE;
3541      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3542    
3543      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
# Line 3312  for (;;) Line 3569  for (;;)
3569      possessive = TRUE;      possessive = TRUE;
3570      min = 0;      min = 0;
3571      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3572      ecode += 3;      ecode += 1 + IMM2_SIZE;
3573      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3574    
3575      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 3351  for (;;) Line 3608  for (;;)
3608    
3609      if (op >= OP_NOTSTARI)     /* Caseless */      if (op >= OP_NOTSTARI)     /* Caseless */
3610        {        {
3611        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3612          /* fc must be < 128 if UTF is enabled. */
3613          foc = md->fcc[fc];
3614    #else
3615    #ifdef SUPPORT_UTF
3616    #ifdef SUPPORT_UCP
3617          if (utf && fc > 127)
3618            foc = UCD_OTHERCASE(fc);
3619    #else
3620          if (utf && fc > 127)
3621            foc = fc;
3622    #endif /* SUPPORT_UCP */
3623          else
3624    #endif /* SUPPORT_UTF */
3625            foc = TABLE_GET(fc, md->fcc, fc);
3626    #endif /* COMPILE_PCRE8 */
3627    
3628  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3629        /* UTF-8 mode */        if (utf)
       if (utf8)  
3630          {          {
3631          register unsigned int d;          register unsigned int d;
3632          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3363  for (;;) Line 3634  for (;;)
3634            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3635              {              {
3636              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3637              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3638              }              }
3639            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3640            if (d < 256) d = md->lcc[d];            if (fc == d || foc == d) RRETURN(MATCH_NOMATCH);
           if (fc == d) MRRETURN(MATCH_NOMATCH);  
3641            }            }
3642          }          }
3643        else        else
3644  #endif  #endif
3645          /* Not UTF mode */
       /* Not UTF-8 mode */  
3646          {          {
3647          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3648            {            {
3649            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3650              {              {
3651              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3652              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3653              }              }
3654            if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);            if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3655              eptr++;
3656            }            }
3657          }          }
3658    
# Line 3390  for (;;) Line 3660  for (;;)
3660    
3661        if (minimize)        if (minimize)
3662          {          {
3663  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3664          /* UTF-8 mode */          if (utf)
         if (utf8)  
3665            {            {
3666            register unsigned int d;            register unsigned int d;
3667            for (fi = min;; fi++)            for (fi = min;; fi++)
3668              {              {
3669              RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3670              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3671              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3672              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3673                {                {
3674                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3675                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3676                }                }
3677              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3678              if (d < 256) d = md->lcc[d];              if (fc == d || foc == d) RRETURN(MATCH_NOMATCH);
             if (fc == d) MRRETURN(MATCH_NOMATCH);  
3679              }              }
3680            }            }
3681          else          else
3682  #endif  #endif
3683          /* Not UTF-8 mode */          /* Not UTF mode */
3684            {            {
3685            for (fi = min;; fi++)            for (fi = min;; fi++)
3686              {              {
3687              RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3688              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3689              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3690              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3691                {                {
3692                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3693                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3694                }                }
3695              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);              if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3696                eptr++;
3697              }              }
3698            }            }
3699          /* Control never gets here */          /* Control never gets here */
# Line 3436  for (;;) Line 3705  for (;;)
3705          {          {
3706          pp = eptr;          pp = eptr;
3707    
3708  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3709          /* UTF-8 mode */          if (utf)
         if (utf8)  
3710            {            {
3711            register unsigned int d;            register unsigned int d;
3712            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3450  for (;;) Line 3718  for (;;)
3718                break;                break;
3719                }                }
3720              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3721              if (d < 256) d = md->lcc[d];              if (fc == d || foc == d) break;
             if (fc == d) break;  
3722              eptr += len;              eptr += len;
3723              }              }
3724          if (possessive) continue;            if (possessive) continue;
3725          for(;;)            for(;;)
3726              {              {
3727              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3728              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 3465  for (;;) Line 3732  for (;;)
3732            }            }
3733          else          else
3734  #endif  #endif
3735          /* Not UTF-8 mode */          /* Not UTF mode */
3736            {            {
3737            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3738              {              {
# Line 3474  for (;;) Line 3741  for (;;)
3741                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3742                break;                break;
3743                }                }
3744              if (fc == md->lcc[*eptr]) break;              if (fc == *eptr || foc == *eptr) break;
3745              eptr++;              eptr++;
3746              }              }
3747            if (possessive) continue;            if (possessive) continue;
# Line 3486  for (;;) Line 3753  for (;;)
3753              }              }
3754            }            }
3755    
3756          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3757          }          }
3758        /* Control never gets here */        /* Control never gets here */
3759        }        }
# Line 3495  for (;;) Line 3762  for (;;)
3762    
3763      else      else
3764        {        {
3765  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3766        /* UTF-8 mode */        if (utf)
       if (utf8)  
3767          {          {
3768          register unsigned int d;          register unsigned int d;
3769          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3505  for (;;) Line 3771  for (;;)
3771            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3772              {              {
3773              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3774              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3775              }              }
3776            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3777            if (fc == d) MRRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
3778            }            }
3779          }          }
3780        else        else
3781  #endif  #endif
3782        /* Not UTF-8 mode */        /* Not UTF mode */
3783          {          {
3784          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3785            {            {
3786            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3787              {              {
3788              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3789              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3790              }              }
3791            if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3792            }            }
3793          }          }
3794    
# Line 3530  for (;;) Line 3796  for (;;)
3796    
3797        if (minimize)        if (minimize)
3798          {          {
3799  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3800          /* UTF-8 mode */          if (utf)
         if (utf8)  
3801            {            {
3802            register unsigned int d;            register unsigned int d;
3803            for (fi = min;; fi++)            for (fi = min;; fi++)
3804              {              {
3805              RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3806              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3807              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3808              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3809                {                {
3810                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3811                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3812                }                }
3813              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3814              if (fc == d) MRRETURN(MATCH_NOMATCH);              if (fc == d) RRETURN(MATCH_NOMATCH);
3815              }              }
3816            }            }
3817          else          else
3818  #endif  #endif
3819          /* Not UTF-8 mode */          /* Not UTF mode */
3820            {            {
3821            for (fi = min;; fi++)            for (fi = min;; fi++)
3822              {              {
3823              RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3824              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3825              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3826              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3827                {                {
3828                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3829                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3830                }                }
3831              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);              if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3832              }              }
3833            }            }
3834          /* Control never gets here */          /* Control never gets here */
# Line 3575  for (;;) Line 3840  for (;;)
3840          {          {
3841          pp = eptr;          pp = eptr;
3842    
3843  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3844          /* UTF-8 mode */          if (utf)
         if (utf8)  
3845            {            {
3846            register unsigned int d;            register unsigned int d;
3847            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3603  for (;;) Line 3867  for (;;)
3867            }            }
3868          else          else
3869  #endif  #endif
3870          /* Not UTF-8 mode */          /* Not UTF mode */
3871            {            {
3872            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3873              {              {
# Line 3624  for (;;) Line 3888  for (;;)
3888              }              }
3889            }            }
3890    
3891          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3892          }          }
3893        }        }
3894      /* Control never gets here */      /* Control never gets here */
# Line 3636  for (;;) Line 3900  for (;;)
3900      case OP_TYPEEXACT:      case OP_TYPEEXACT:
3901      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3902      minimize = TRUE;      minimize = TRUE;
3903      ecode += 3;      ecode += 1 + IMM2_SIZE;
3904      goto REPEATTYPE;      goto REPEATTYPE;
3905    
3906      case OP_TYPEUPTO:      case OP_TYPEUPTO:
# Line 3644  for (;;) Line 3908  for (;;)
3908      min = 0;      min = 0;
3909      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3910      minimize = *ecode == OP_TYPEMINUPTO;      minimize = *ecode == OP_TYPEMINUPTO;
3911      ecode += 3;      ecode += 1 + IMM2_SIZE;
3912      goto REPEATTYPE;      goto REPEATTYPE;
3913    
3914      case OP_TYPEPOSSTAR:      case OP_TYPEPOSSTAR:
# Line 3672  for (;;) Line 3936  for (;;)
3936      possessive = TRUE;      possessive = TRUE;
3937      min = 0;      min = 0;
3938      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3939      ecode += 3;      ecode += 1 + IMM2_SIZE;
3940      goto REPEATTYPE;      goto REPEATTYPE;
3941    
3942      case OP_TYPESTAR:      case OP_TYPESTAR:
# Line 3718  for (;;) Line 3982  for (;;)
3982          switch(prop_type)          switch(prop_type)
3983            {            {
3984            case PT_ANY:            case PT_ANY:
3985            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3986            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3987              {              {
3988              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3989                {                {
3990                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3991                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3992                }                }
3993              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3994              }              }
# Line 3737  for (;;) Line 4001  for (;;)
4001              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4002                {                {
4003                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4004                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4005                }                }
4006              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4007              chartype = UCD_CHARTYPE(c);              chartype = UCD_CHARTYPE(c);
4008              if ((chartype == ucp_Lu ||              if ((chartype == ucp_Lu ||
4009                   chartype == ucp_Ll ||                   chartype == ucp_Ll ||
4010                   chartype == ucp_Lt) == prop_fail_result)                   chartype == ucp_Lt) == prop_fail_result)
4011                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4012              }              }
4013            break;            break;
4014    
# Line 3754  for (;;) Line 4018  for (;;)
4018              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4019                {                {
4020                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4021                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4022                }                }
4023              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4024              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4025                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4026              }              }
4027            break;            break;
4028    
# Line 3768  for (;;) Line 4032  for (;;)
4032              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4033                {                {
4034                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4035                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4036                }                }
4037              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4038              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4039                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4040              }              }
4041            break;            break;
4042    
# Line 3782  for (;;) Line 4046  for (;;)
4046              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4047                {                {
4048                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4049                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4050                }                }
4051              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4052              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4053                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4054              }              }
4055            break;            break;
4056    
# Line 3797  for (;;) Line 4061  for (;;)
4061              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4062                {                {
4063                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4064                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4065                }                }
4066              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4067              category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
4068              if ((category == ucp_L || category == ucp_N) == prop_fail_result)              if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4069                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4070              }              }
4071            break;            break;
4072    
# Line 3812  for (;;) Line 4076  for (;;)
4076              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4077                {                {
4078                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4079                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4080                }                }
4081              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4082              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4083                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
4084                     == prop_fail_result)                     == prop_fail_result)
4085                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4086              }              }
4087            break;            break;
4088    
# Line 3828  for (;;) Line 4092  for (;;)
4092              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4093                {                {
4094                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4095                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4096                }                }
4097              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4098              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4099                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4100                     == prop_fail_result)                     == prop_fail_result)
4101                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4102              }              }
4103            break;            break;
4104    
# Line 3845  for (;;) Line 4109  for (;;)
4109              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4110                {                {
4111                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4112                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4113                }                }
4114              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4115              category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
4116              if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)              if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4117                     == prop_fail_result)                     == prop_fail_result)
4118                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4119              }              }
4120            break;            break;
4121    
# Line 3872  for (;;) Line 4136  for (;;)
4136            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4137              {              {
4138              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4139              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4140              }              }
4141            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4142            if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH);            if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
4143            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4144              {              {
4145              int len = 1;              int len = 1;
4146              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4147              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
4148              eptr += len;              eptr += len;
4149              }              }
# Line 3891  for (;;) Line 4155  for (;;)
4155    
4156  /* Handle all other cases when the coding is UTF-8 */  /* Handle all other cases when the coding is UTF-8 */
4157    
4158  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4159        if (utf8) switch(ctype)        if (utf) switch(ctype)
4160          {          {
4161          case OP_ANY:          case OP_ANY:
4162          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3900  for (;;) Line 4164  for (;;)
4164            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4165              {              {
4166              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4167              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4168              }              }
4169            if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4170            eptr++;            eptr++;
4171            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4172            }            }
4173          break;          break;
4174    
# Line 3914  for (;;) Line 4178  for (;;)
4178            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4179              {              {
4180              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4181              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4182              }              }
4183            eptr++;            eptr++;
4184            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4185            }            }
4186          break;          break;
4187    
4188          case OP_ANYBYTE:          case OP_ANYBYTE:
4189          if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);          if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4190          eptr += min;          eptr += min;
4191          break;          break;
4192    
# Line 3932  for (;;) Line 4196  for (;;)
4196            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4197              {              {
4198              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4199              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4200              }              }
4201            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4202            switch(c)            switch(c)
4203              {              {
4204              default: MRRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4205    
4206              case 0x000d:              case 0x000d:
4207              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
# Line 3951  for (;;) Line 4215  for (;;)
4215              case 0x0085:              case 0x0085:
4216              case 0x2028:              case 0x2028:
4217              case 0x2029:              case 0x2029:
4218              if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4219              break;              break;
4220              }              }
4221            }            }
# Line 3963  for (;;) Line 4227  for (;;)
4227            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4228              {              {
4229              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4230              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4231              }              }
4232            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4233            switch(c)            switch(c)
# Line 3988  for (;;) Line 4252  for (;;)
4252              case 0x202f:    /* NARROW NO-BREAK SPACE */              case 0x202f:    /* NARROW NO-BREAK SPACE */
4253              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4254              case 0x3000:    /* IDEOGRAPHIC SPACE */              case 0x3000:    /* IDEOGRAPHIC SPACE */
4255              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4256              }              }
4257            }            }
4258          break;          break;
# Line 3999  for (;;) Line 4263  for (;;)
4263            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4264              {              {
4265              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4266              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4267              }              }
4268            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4269            switch(c)            switch(c)
4270              {              {
4271              default: MRRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4272              case 0x09:      /* HT */              case 0x09:      /* HT */
4273              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4274              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
# Line 4035  for (;;) Line 4299  for (;;)
4299            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4300              {              {
4301              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4302              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4303              }              }
4304            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4305            switch(c)            switch(c)
# Line 4048  for (;;) Line 4312  for (;;)
4312              case 0x85:      /* NEL */              case 0x85:      /* NEL */
4313              case 0x2028:    /* LINE SEPARATOR */              case 0x2028:    /* LINE SEPARATOR */
4314              case 0x2029:    /* PARAGRAPH SEPARATOR */              case 0x2029:    /* PARAGRAPH SEPARATOR */
4315              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4316              }              }
4317            }            }
4318          break;          break;
# Line 4059  for (;;) Line 4323  for (;;)
4323            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4324              {              {
4325              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4326              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4327              }              }
4328            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4329            switch(c)            switch(c)
4330              {              {
4331              default: MRRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4332              case 0x0a:      /* LF */              case 0x0a:      /* LF */
4333              case 0x0b:      /* VT */              case 0x0b:      /* VT */
4334              case 0x0c:      /* FF */              case 0x0c:      /* FF */
# Line 4083  for (;;) Line 4347  for (;;)
4347            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4348              {              {
4349              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4350              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4351              }              }
4352            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4353            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4354              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4355            }            }
4356          break;          break;
4357    
# Line 4097  for (;;) Line 4361  for (;;)
4361            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4362              {              {
4363              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4364              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4365              }              }
4366            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
4367              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4368              eptr++;
4369            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4370            }            }
4371          break;          break;
# Line 4111  for (;;) Line 4376  for (;;)
4376            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4377              {              {
4378              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4379              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4380              }              }
4381            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
4382              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4383            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            eptr++;
4384              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4385            }            }
4386          break;          break;
4387    
# Line 4125  for (;;) Line 4391  for (;;)
4391            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4392              {              {
4393              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4394              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4395              }              }
4396            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
4397              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4398              eptr++;
4399            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4400            }            }
4401          break;          break;
# Line 4139  for (;;) Line 4406  for (;;)
4406            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4407              {              {
4408              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4409              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4410              }              }
4411            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
4412              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4413            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            eptr++;
4414              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4415            }            }
4416          break;          break;
4417    
# Line 4153  for (;;) Line 4421  for (;;)
4421            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4422              {              {
4423              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4424              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4425              }              }
4426            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
4427              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4428              eptr++;
4429            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4430            }            }
4431          break;          break;
# Line 4166  for (;;) Line 4435  for (;;)
4435          }  /* End switch(ctype) */          }  /* End switch(ctype) */
4436    
4437        else        else
4438  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF */
4439    
4440        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
4441        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. */
# Line 4179  for (;;) Line 4448  for (;;)
4448            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4449              {              {
4450              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4451              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4452              }              }
4453            if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4454            eptr++;            eptr++;
4455            }            }
4456          break;          break;
# Line 4190  for (;;) Line 4459  for (;;)
4459          if (eptr > md->end_subject - min)          if (eptr > md->end_subject - min)
4460            {            {
4461            SCHECK_PARTIAL();            SCHECK_PARTIAL();
4462            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
4463            }            }
4464          eptr += min;          eptr += min;
4465          break;          break;
# Line 4199  for (;;) Line 4468  for (;;)
4468          if (eptr > md->end_subject - min)          if (eptr > md->end_subject - min)
4469            {            {
4470            SCHECK_PARTIAL();            SCHECK_PARTIAL();
4471            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
4472            }            }
4473          eptr += min;          eptr += min;
4474          break;          break;
# Line 4210  for (;;) Line 4479  for (;;)
4479            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4480              {              {
4481              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4482              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4483              }              }
4484            switch(*eptr++)            switch(*eptr++)
4485              {              {
4486              default: MRRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4487    
4488              case 0x000d:              case 0x000d:
4489              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
# Line 4226  for (;;) Line 4495  for (;;)
4495              case 0x000b:              case 0x000b:
4496              case 0x000c:              case 0x000c:
4497              case 0x0085:              case 0x0085:
4498              if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);  #ifdef COMPILE_PCRE16
4499                case 0x2028:
4500                case 0x2029:
4501    #endif
4502                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4503              break;              break;
4504              }              }
4505            }            }
# Line 4238  for (;;) Line 4511  for (;;)
4511            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4512              {              {
4513              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4514              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4515              }              }
4516            switch(*eptr++)            switch(*eptr++)
4517              {              {
# Line 4246  for (;;) Line 4519  for (;;)
4519              case 0x09:      /* HT */              case 0x09:      /* HT */
4520              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4521              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
4522              MRRETURN(MATCH_NOMATCH);  #ifdef COMPILE_PCRE16
4523                case 0x1680:    /* OGHAM SPACE MARK */
4524                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4525                case 0x2000:    /* EN QUAD */
4526                case 0x2001:    /* EM QUAD */
4527                case 0x2002:    /* EN SPACE */
4528                case 0x2003:    /* EM SPACE */
4529                case 0x2004:    /* THREE-PER-EM SPACE */
4530                case 0x2005:    /* FOUR-PER-EM SPACE */
4531                case 0x2006:    /* SIX-PER-EM SPACE */
4532                case 0x2007:    /* FIGURE SPACE */
4533                case 0x2008:    /* PUNCTUATION SPACE */
4534                case 0x2009:    /* THIN SPACE */
4535                case 0x200A:    /* HAIR SPACE */
4536                case 0x202f:    /* NARROW NO-BREAK SPACE */
4537                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4538                case 0x3000:    /* IDEOGRAPHIC SPACE */
4539    #endif
4540                RRETURN(MATCH_NOMATCH);
4541              }              }
4542            }            }
4543          break;          break;
# Line 4257  for (;;) Line 4548  for (;;)
4548            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4549              {              {
4550              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4551              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4552              }              }
4553            switch(*eptr++)            switch(*eptr++)
4554              {              {
4555              default: MRRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4556              case 0x09:      /* HT */              case 0x09:      /* HT */
4557              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4558              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
4559    #ifdef COMPILE_PCRE16
4560                case 0x1680:    /* OGHAM SPACE MARK */
4561                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4562                case 0x2000:    /* EN QUAD */
4563                case 0x2001:    /* EM QUAD */
4564                case 0x2002:    /* EN SPACE */
4565                case 0x2003:    /* EM SPACE */
4566                case 0x2004:    /* THREE-PER-EM SPACE */
4567                case 0x2005:    /* FOUR-PER-EM SPACE */
4568                case 0x2006:    /* SIX-PER-EM SPACE */
4569                case 0x2007:    /* FIGURE SPACE */
4570                case 0x2008:    /* PUNCTUATION SPACE */
4571                case 0x2009:    /* THIN SPACE */
4572                case 0x200A:    /* HAIR SPACE */
4573                case 0x202f:    /* NARROW NO-BREAK SPACE */
4574                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4575                case 0x3000:    /* IDEOGRAPHIC SPACE */
4576    #endif
4577              break;              break;
4578              }              }
4579            }            }
# Line 4276  for (;;) Line 4585  for (;;)
4585            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4586              {              {
4587              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4588              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4589              }              }
4590            switch(*eptr++)            switch(*eptr++)
4591              {              {
# Line 4286  for (;;) Line 4595  for (;;)
4595              case 0x0c:      /* FF */              case 0x0c:      /* FF */
4596              case 0x0d:      /* CR */              case 0x0d:      /* CR */
4597              case 0x85:      /* NEL */              case 0x85:      /* NEL */
4598              MRRETURN(MATCH_NOMATCH);  #ifdef COMPILE_PCRE16
4599                case 0x2028:    /* LINE SEPARATOR */
4600                case 0x2029:    /* PARAGRAPH SEPARATOR */
4601    #endif
4602                RRETURN(MATCH_NOMATCH);
4603              }              }
4604            }            }
4605          break;          break;
# Line 4297  for (;;) Line 4610  for (;;)
4610            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4611              {              {
4612              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4613              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4614              }              }
4615            switch(*eptr++)            switch(*eptr++)
4616              {              {
4617              default: MRRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4618              case 0x0a:      /* LF */              case 0x0a:      /* LF */
4619              case 0x0b:      /* VT */              case 0x0b:      /* VT */
4620              case 0x0c:      /* FF */              case 0x0c:      /* FF */
4621              case 0x0d:      /* CR */              case 0x0d:      /* CR */
4622              case 0x85:      /* NEL */              case 0x85:      /* NEL */
4623    #ifdef COMPILE_PCRE16
4624                case 0x2028:    /* LINE SEPARATOR */
4625                case 0x2029:    /* PARAGRAPH SEPARATOR */
4626    #endif
4627              break;              break;
4628              }              }
4629            }            }
# Line 4318  for (;;) Line 4635  for (;;)
4635            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4636              {              {
4637              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4638              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4639              }              }
4640            if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4641                RRETURN(MATCH_NOMATCH);
4642              eptr++;
4643            }            }
4644          break;          break;
4645    
# Line 4330  for (;;) Line 4649  for (;;)
4649            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4650              {              {
4651              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4652              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4653              }              }
4654            if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4655                RRETURN(MATCH_NOMATCH);
4656              eptr++;
4657            }            }
4658          break;          break;
4659    
# Line 4342  for (;;) Line 4663  for (;;)
4663            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4664              {              {
4665              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4666              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4667              }              }
4668            if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4669                RRETURN(MATCH_NOMATCH);
4670              eptr++;
4671            }            }
4672          break;          break;
4673    
# Line 4354  for (;;) Line 4677  for (;;)
4677            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4678              {              {
4679              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4680              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4681              }              }
4682            if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4683                RRETURN(MATCH_NOMATCH);
4684              eptr++;
4685            }            }
4686          break;          break;
4687    
# Line 4366  for (;;) Line 4691  for (;;)
4691            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4692              {              {
4693              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4694              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4695              }              }
4696            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4697              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4698              eptr++;
4699            }            }
4700          break;          break;
4701    
# Line 4379  for (;;) Line 4705  for (;;)
4705            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4706              {              {
4707              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4708              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4709              }              }
4710            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4711              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4712              eptr++;
4713            }            }
4714          break;          break;
4715    
# Line 4411  for (;;) Line 4738  for (;;)
4738              {              {
4739              RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4740              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4741              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
4742              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4743                {                {
4744                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4745                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4746                }                }
4747              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4748              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4749              }              }
4750            /* Control never gets here */            /* Control never gets here */
4751    
# Line 4428  for (;;) Line 4755  for (;;)
4755              int chartype;              int chartype;
4756              RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4757              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4758              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
4759              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4760                {                {
4761                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4762                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4763                }                }
4764              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4765              chartype = UCD_CHARTYPE(c);              chartype = UCD_CHARTYPE(c);
4766              if ((chartype == ucp_Lu ||              if ((chartype == ucp_Lu ||
4767                   chartype == ucp_Ll ||                   chartype == ucp_Ll ||
4768                   chartype == ucp_Lt) == prop_fail_result)                   chartype == ucp_Lt) == prop_fail_result)
4769                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4770              }              }
4771            /* Control never gets here */            /* Control never gets here */
4772    
# Line 4448  for (;;) Line 4775  for (;;)
4775              {              {
4776              RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4777              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4778              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
4779              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4780                {                {
4781                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4782                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4783                }                }
4784              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4785              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4786                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4787              }              }
4788            /* Control never gets here */            /* Control never gets here */
4789    
# Line 4465  for (;;) Line 4792  for (;;)
4792              {              {
4793              RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4794              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4795              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
4796              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4797                {                {
4798                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4799                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4800                }                }
4801              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4802              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4803                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4804              }              }
4805            /* Control never gets here */            /* Control never gets here */
4806    
# Line 4482  for (;;) Line 4809  for (;;)
4809              {              {
4810              RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4811              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4812              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
4813              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4814                {                {
4815                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4816                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4817                }                }
4818              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4819              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)