/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 608 by ph10, Sun Jun 12 16:25:55 2011 UTC revision 892 by ph10, Wed Jan 18 17:23:20 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 57  possible. There are also some static sup Line 57  possible. There are also some static sup
57  #undef min  #undef min
58  #undef max  #undef max
59    
60  /* Values for setting in md->match_function_type to indicate two special types  /* Values for setting in md->match_function_type to indicate two special types
61  of call to match(). We do it this way to save on using another stack variable,  of call to match(). We do it this way to save on using another stack variable,
62  as stack usage is to be discouraged. */  as stack usage is to be discouraged. */
63    
64  #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */  #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
# Line 76  negative to avoid the external error cod Line 76  negative to avoid the external error cod
76  #define MATCH_ACCEPT       (-999)  #define MATCH_ACCEPT       (-999)
77  #define MATCH_COMMIT       (-998)  #define MATCH_COMMIT       (-998)
78  #define MATCH_KETRPOS      (-997)  #define MATCH_KETRPOS      (-997)
79  #define MATCH_PRUNE        (-996)  #define MATCH_ONCE         (-996)
80  #define MATCH_SKIP         (-995)  #define MATCH_PRUNE        (-995)
81  #define MATCH_SKIP_ARG     (-994)  #define MATCH_SKIP         (-994)
82  #define MATCH_THEN         (-993)  #define MATCH_SKIP_ARG     (-993)
83    #define MATCH_THEN         (-992)
 /* This is a convenience macro for code that occurs many times. */  
   
 #define MRRETURN(ra) \  
   { \  
   md->mark = markptr; \  
   RRETURN(ra); \  
   }  
84    
85  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
86  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
# Line 120  Returns:     nothing Line 113  Returns:     nothing
113  */  */
114    
115  static void  static void
116  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
117  {  {
118  unsigned int c;  unsigned int c;
119  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 137  while (length-- > 0) Line 130  while (length-- > 0)
130    
131  /* Normally, if a back reference hasn't been set, the length that is passed is  /* Normally, if a back reference hasn't been set, the length that is passed is
132  negative, so the match always fails. However, in JavaScript compatibility mode,  negative, so the match always fails. However, in JavaScript compatibility mode,
133  the length passed is zero. Note that in caseless UTF-8 mode, the number of  the length passed is zero. Note that in caseless UTF-8 mode, the number of
134  subject bytes matched may be different to the number of reference bytes.  subject bytes matched may be different to the number of reference bytes.
135    
136  Arguments:  Arguments:
# Line 151  Returns:      < 0 if not matched, otherw Line 144  Returns:      < 0 if not matched, otherw
144  */  */
145    
146  static int  static int
147  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
148    BOOL caseless)    BOOL caseless)
149  {  {
150  USPTR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
151  register USPTR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
152    
153  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
154  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 180  ASCII characters. */ Line 173  ASCII characters. */
173    
174  if (caseless)  if (caseless)
175    {    {
176  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
177  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
178    if (md->utf8)    if (md->utf)
179      {      {
180      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
181      bytes matched may differ, because there are some characters whose upper and      bytes matched may differ, because there are some characters whose upper and
182      lower case versions code as different numbers of bytes. For example, U+023A      lower case versions code as different numbers of bytes. For example, U+023A
183      (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);      (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
184      a sequence of 3 of the former uses 6 bytes, as does a sequence of two of      a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
185      the latter. It is important, therefore, to check the length along the      the latter. It is important, therefore, to check the length along the
186      reference, not along the subject (earlier code did this wrong). */      reference, not along the subject (earlier code did this wrong). */
187    
188      USPTR endptr = p + length;      PCRE_PUCHAR endptr = p + length;
189      while (p < endptr)      while (p < endptr)
190        {        {
191        int c, d;        int c, d;
# Line 209  if (caseless) Line 202  if (caseless)
202    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
203    is no UCP support. */    is no UCP support. */
204      {      {
205      if (eptr + length > md->end_subject) return -1;      if (eptr + length > md->end_subject) return -1;
206      while (length-- > 0)      while (length-- > 0)
207        { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }        {
208      }        if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
209          p++;
210          eptr++;
211          }
212        }
213    }    }
214    
215  /* In the caseful case, we can just compare the bytes, whether or not we  /* In the caseful case, we can just compare the bytes, whether or not we
216  are in UTF-8 mode. */  are in UTF-8 mode. */
217    
218  else  else
219    {    {
220    if (eptr + length > md->end_subject) return -1;    if (eptr + length > md->end_subject) return -1;
221    while (length-- > 0) if (*p++ != *eptr++) return -1;    while (length-- > 0) if (*p++ != *eptr++) return -1;
222    }    }
223    
224  return eptr - eptr_start;  return (int)(eptr - eptr_start);
225  }  }
226    
227    
# Line 276  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 273  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
273         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
274         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
275         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
276         RM61,  RM62, RM63, RM64 };         RM61,  RM62, RM63, RM64, RM65, RM66 };
277    
278  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
279  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 289  actually used in this definition. */ Line 286  actually used in this definition. */
286  #define RMATCH(ra,rb,rc,rd,re,rw) \  #define RMATCH(ra,rb,rc,rd,re,rw) \
287    { \    { \
288    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
289    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1); \    rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
290    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
291    }    }
292  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 299  actually used in this definition. */ Line 296  actually used in this definition. */
296    }    }
297  #else  #else
298  #define RMATCH(ra,rb,rc,rd,re,rw) \  #define RMATCH(ra,rb,rc,rd,re,rw) \
299    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1)    rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
300  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
301  #endif  #endif
302    
# Line 314  argument of match(), which never changes Line 311  argument of match(), which never changes
311    
312  #define RMATCH(ra,rb,rc,rd,re,rw)\  #define RMATCH(ra,rb,rc,rd,re,rw)\
313    {\    {\
314    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
315    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
316    frame->Xwhere = rw; \    frame->Xwhere = rw; \
317    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
318    newframe->Xecode = rb;\    newframe->Xecode = rb;\
319    newframe->Xmstart = mstart;\    newframe->Xmstart = mstart;\
   newframe->Xmarkptr = markptr;\  
320    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
321    newframe->Xeptrb = re;\    newframe->Xeptrb = re;\
322    newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
# Line 336  argument of match(), which never changes Line 332  argument of match(), which never changes
332    {\    {\
333    heapframe *oldframe = frame;\    heapframe *oldframe = frame;\
334    frame = oldframe->Xprevframe;\    frame = oldframe->Xprevframe;\
335    (pcre_stack_free)(oldframe);\    if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
336    if (frame != NULL)\    if (frame != NULL)\
337      {\      {\
338      rrc = ra;\      rrc = ra;\
# Line 353  typedef struct heapframe { Line 349  typedef struct heapframe {
349    
350    /* Function arguments that may change */    /* Function arguments that may change */
351    
352    USPTR Xeptr;    PCRE_PUCHAR Xeptr;
353    const uschar *Xecode;    const pcre_uchar *Xecode;
354    USPTR Xmstart;    PCRE_PUCHAR Xmstart;
   USPTR Xmarkptr;  
355    int Xoffset_top;    int Xoffset_top;
356    eptrblock *Xeptrb;    eptrblock *Xeptrb;
357    unsigned int Xrdepth;    unsigned int Xrdepth;
358    
359    /* Function local variables */    /* Function local variables */
360    
361    USPTR Xcallpat;    PCRE_PUCHAR Xcallpat;
362  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
363    USPTR Xcharptr;    PCRE_PUCHAR Xcharptr;
364  #endif  #endif
365    USPTR Xdata;    PCRE_PUCHAR Xdata;
366    USPTR Xnext;    PCRE_PUCHAR Xnext;
367    USPTR Xpp;    PCRE_PUCHAR Xpp;
368    USPTR Xprev;    PCRE_PUCHAR Xprev;
369    USPTR Xsaved_eptr;    PCRE_PUCHAR Xsaved_eptr;
370    
371    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
372    
# Line 383  typedef struct heapframe { Line 378  typedef struct heapframe {
378    int Xprop_type;    int Xprop_type;
379    int Xprop_value;    int Xprop_value;
380    int Xprop_fail_result;    int Xprop_fail_result;
   int Xprop_category;  
   int Xprop_chartype;  
   int Xprop_script;  
381    int Xoclength;    int Xoclength;
382    uschar Xocchars[8];    pcre_uchar Xocchars[6];
383  #endif  #endif
384    
385    int Xcodelink;    int Xcodelink;
# Line 429  returns a negative (error) response, the Line 421  returns a negative (error) response, the
421  same response. */  same response. */
422    
423  /* These macros pack up tests that are used for partial matching, and which  /* These macros pack up tests that are used for partial matching, and which
424  appears several times in the code. We set the "hit end" flag if the pointer is  appear several times in the code. We set the "hit end" flag if the pointer is
425  at the end of the subject and also past the start of the subject (i.e.  at the end of the subject and also past the start of the subject (i.e.
426  something has been matched). For hard partial matching, we then return  something has been matched). For hard partial matching, we then return
427  immediately. The second one is used when we already know we are past the end of  immediately. The second one is used when we already know we are past the end of
# Line 440  the subject. */ Line 432  the subject. */
432        eptr > md->start_used_ptr) \        eptr > md->start_used_ptr) \
433      { \      { \
434      md->hitend = TRUE; \      md->hitend = TRUE; \
435      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
436      }      }
437    
438  #define SCHECK_PARTIAL()\  #define SCHECK_PARTIAL()\
439    if (md->partial != 0 && eptr > md->start_used_ptr) \    if (md->partial != 0 && eptr > md->start_used_ptr) \
440      { \      { \
441      md->hitend = TRUE; \      md->hitend = TRUE; \
442      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
443      }      }
444    
445    
446  /* Performance note: It might be tempting to extract commonly used fields from  /* Performance note: It might be tempting to extract commonly used fields from
447  the md structure (e.g. utf8, end_subject) into individual variables to improve  the md structure (e.g. utf, end_subject) into individual variables to improve
448  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
449  made performance worse.  made performance worse.
450    
# Line 461  Arguments: Line 453  Arguments:
453     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
454     mstart      pointer to the current match start position (can be modified     mstart      pointer to the current match start position (can be modified
455                   by encountering \K)                   by encountering \K)
    markptr     pointer to the most recent MARK name, or NULL  
456     offset_top  current top pointer     offset_top  current top pointer
457     md          pointer to "static" info for the match     md          pointer to "static" info for the match
458     eptrb       pointer to chain of blocks containing eptr at start of     eptrb       pointer to chain of blocks containing eptr at start of
# Line 476  Returns:       MATCH_MATCH if matched Line 467  Returns:       MATCH_MATCH if matched
467  */  */
468    
469  static int  static int
470  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
471    const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb,    PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
472    unsigned int rdepth)    unsigned int rdepth)
473  {  {
474  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
# Line 487  so they can be ordinary variables in all Line 478  so they can be ordinary variables in all
478  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
479  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
480  register unsigned int c;   /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
481  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf;         /* Local copy of UTF flag for speed */
482    
483  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
484  BOOL caseless;  BOOL caseless;
485  int condcode;  int condcode;
486    
487  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
488  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame". We set up the top-level
489  heap storage. Set up the top-level frame here; others are obtained from the  frame on the stack here; subsequent instantiations are obtained from the heap
490  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  whenever RMATCH() does a "recursion". See the macro definitions above. Putting
491    the top-level on the stack rather than malloc-ing them all gives a performance
492    boost in many cases where there is not much "recursion". */
493    
494  #ifdef NO_RECURSE  #ifdef NO_RECURSE
495  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));  heapframe frame_zero;
496  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  heapframe *frame = &frame_zero;
497  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
498    
499  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 508  frame->Xprevframe = NULL;            /* Line 501  frame->Xprevframe = NULL;            /*
501  frame->Xeptr = eptr;  frame->Xeptr = eptr;
502  frame->Xecode = ecode;  frame->Xecode = ecode;
503  frame->Xmstart = mstart;  frame->Xmstart = mstart;
 frame->Xmarkptr = markptr;  
504  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
505  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
506  frame->Xrdepth = rdepth;  frame->Xrdepth = rdepth;
# Line 522  HEAP_RECURSE: Line 514  HEAP_RECURSE:
514  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
515  #define ecode              frame->Xecode  #define ecode              frame->Xecode
516  #define mstart             frame->Xmstart  #define mstart             frame->Xmstart
 #define markptr            frame->Xmarkptr  
517  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
518  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
519  #define rdepth             frame->Xrdepth  #define rdepth             frame->Xrdepth
520    
521  /* Ditto for the local variables */  /* Ditto for the local variables */
522    
523  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
524  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
525  #endif  #endif
526  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
# Line 550  HEAP_RECURSE: Line 541  HEAP_RECURSE:
541  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
542  #define prop_value         frame->Xprop_value  #define prop_value         frame->Xprop_value
543  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
 #define prop_category      frame->Xprop_category  
 #define prop_chartype      frame->Xprop_chartype  
 #define prop_script        frame->Xprop_script  
544  #define oclength           frame->Xoclength  #define oclength           frame->Xoclength
545  #define occhars            frame->Xocchars  #define occhars            frame->Xocchars
546  #endif  #endif
# Line 590  declarations can be cut out in a block. Line 578  declarations can be cut out in a block.
578  below are for variables that do not have to be preserved over a recursive call  below are for variables that do not have to be preserved over a recursive call
579  to RMATCH(). */  to RMATCH(). */
580    
581  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
582  const uschar *charptr;  const pcre_uchar *charptr;
583  #endif  #endif
584  const uschar *callpat;  const pcre_uchar *callpat;
585  const uschar *data;  const pcre_uchar *data;
586  const uschar *next;  const pcre_uchar *next;
587  USPTR         pp;  PCRE_PUCHAR       pp;
588  const uschar *prev;  const pcre_uchar *prev;
589  USPTR         saved_eptr;  PCRE_PUCHAR       saved_eptr;
590    
591  recursion_info new_recursive;  recursion_info new_recursive;
592    
593  BOOL cur_is_word;  BOOL cur_is_word;
594  BOOL condition;  BOOL condition;
595  BOOL prev_is_word;  BOOL prev_is_word;
596    
# Line 610  BOOL prev_is_word; Line 598  BOOL prev_is_word;
598  int prop_type;  int prop_type;
599  int prop_value;  int prop_value;
600  int prop_fail_result;  int prop_fail_result;
 int prop_category;  
 int prop_chartype;  
 int prop_script;  
601  int oclength;  int oclength;
602  uschar occhars[8];  pcre_uchar occhars[6];
603  #endif  #endif
604    
605  int codelink;  int codelink;
# Line 632  int stacksave[REC_STACK_SAVE_MAX]; Line 617  int stacksave[REC_STACK_SAVE_MAX];
617  eptrblock newptrb;  eptrblock newptrb;
618  #endif     /* NO_RECURSE */  #endif     /* NO_RECURSE */
619    
620  /* To save space on the stack and in the heap frame, I have doubled up on some  /* To save space on the stack and in the heap frame, I have doubled up on some
621  of the local variables that are used only in localised parts of the code, but  of the local variables that are used only in localised parts of the code, but
622  still need to be preserved over recursive calls of match(). These macros define  still need to be preserved over recursive calls of match(). These macros define
623  the alternative names that are used. */  the alternative names that are used. */
624    
625  #define allow_zero    cur_is_word  #define allow_zero    cur_is_word
# Line 642  the alternative names that are used. */ Line 627  the alternative names that are used. */
627  #define code_offset   codelink  #define code_offset   codelink
628  #define condassert    condition  #define condassert    condition
629  #define matched_once  prev_is_word  #define matched_once  prev_is_word
630    #define foc           number
631    #define save_mark     data
632    
633  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
634  variables. */  variables. */
# Line 667  defined). However, RMATCH isn't like a f Line 654  defined). However, RMATCH isn't like a f
654  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
655  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
656    
657  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
658  utf8 = md->utf8;       /* Local copy of the flag */  utf = md->utf;       /* Local copy of the flag */
659  #else  #else
660  utf8 = FALSE;  utf = FALSE;
661  #endif  #endif
662    
663  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
# Line 680  if (md->match_call_count++ >= md->match_ Line 667  if (md->match_call_count++ >= md->match_
667  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
668    
669  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
670  string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is  string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
671  done this way to save having to use another function argument, which would take  done this way to save having to use another function argument, which would take
672  up space on the stack. See also MATCH_CONDASSERT below.  up space on the stack. See also MATCH_CONDASSERT below.
673    
674  When MATCH_CBEGROUP is set, add the current subject pointer to the chain of  When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
# Line 705  for (;;) Line 692  for (;;)
692    {    {
693    minimize = possessive = FALSE;    minimize = possessive = FALSE;
694    op = *ecode;    op = *ecode;
695    
696    switch(op)    switch(op)
697      {      {
698      case OP_MARK:      case OP_MARK:
699      markptr = ecode + 2;      md->nomatch_mark = ecode + 2;
700      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      md->mark = NULL;    /* In case previously set by assertion */
701        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
702        eptrb, RM55);        eptrb, RM55);
703        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
704             md->mark == NULL) md->mark = ecode + 2;
705    
706      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
707      argument, and we must check whether that argument matches this MARK's      argument, and we must check whether that argument matches this MARK's
# Line 720  for (;;) Line 710  for (;;)
710      position and return MATCH_SKIP. Otherwise, pass back the return code      position and return MATCH_SKIP. Otherwise, pass back the return code
711      unaltered. */      unaltered. */
712    
713      if (rrc == MATCH_SKIP_ARG &&      else if (rrc == MATCH_SKIP_ARG &&
714          strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)          STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
715        {        {
716        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
717        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
718        }        }
   
     if (md->mark == NULL) md->mark = markptr;  
719      RRETURN(rrc);      RRETURN(rrc);
720    
721      case OP_FAIL:      case OP_FAIL:
722      MRRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
723    
724      /* COMMIT overrides PRUNE, SKIP, and THEN */      /* COMMIT overrides PRUNE, SKIP, and THEN */
725    
726      case OP_COMMIT:      case OP_COMMIT:
727      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
728        eptrb, RM52);        eptrb, RM52);
729      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
730          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
731          rrc != MATCH_THEN)          rrc != MATCH_THEN)
732        RRETURN(rrc);        RRETURN(rrc);
733      MRRETURN(MATCH_COMMIT);      RRETURN(MATCH_COMMIT);
734    
735      /* PRUNE overrides THEN */      /* PRUNE overrides THEN */
736    
737      case OP_PRUNE:      case OP_PRUNE:
738      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
739        eptrb, RM51);        eptrb, RM51);
740      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
741      MRRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
742    
743      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
744      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      md->nomatch_mark = ecode + 2;
745        md->mark = NULL;    /* In case previously set by assertion */
746        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
747        eptrb, RM56);        eptrb, RM56);
748        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
749             md->mark == NULL) md->mark = ecode + 2;
750      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
     md->mark = ecode + 2;  
751      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
752    
753      /* SKIP overrides PRUNE and THEN */      /* SKIP overrides PRUNE and THEN */
754    
755      case OP_SKIP:      case OP_SKIP:
756      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
757        eptrb, RM53);        eptrb, RM53);
758      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
759        RRETURN(rrc);        RRETURN(rrc);
760      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
761      MRRETURN(MATCH_SKIP);      RRETURN(MATCH_SKIP);
762    
763        /* Note that, for Perl compatibility, SKIP with an argument does NOT set
764        nomatch_mark. There is a flag that disables this opcode when re-matching a
765        pattern that ended with a SKIP for which there was not a matching MARK. */
766    
767      case OP_SKIP_ARG:      case OP_SKIP_ARG:
768      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      if (md->ignore_skip_arg)
769          {
770          ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
771          break;
772          }
773        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
774        eptrb, RM57);        eptrb, RM57);
775      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
776        RRETURN(rrc);        RRETURN(rrc);
777    
778      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
779      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
780      caught by a matching MARK, or get to the top, where it is treated the same      caught by a matching MARK, or get to the top, where it causes a rematch
781      as PRUNE. */      with the md->ignore_skip_arg flag set. */
782    
783      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
784      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
785    
786      /* For THEN (and THEN_ARG) we pass back the address of the bracket or      /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
787      the alt that is at the start of the current branch. This makes it possible      the branch in which it occurs can be determined. Overload the start of
788      to skip back past alternatives that precede the THEN within the current      match pointer to do this. */
     branch. */  
789    
790      case OP_THEN:      case OP_THEN:
791      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
792        eptrb, RM54);        eptrb, RM54);
793      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
794      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode;
795      MRRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
796    
797      case OP_THEN_ARG:      case OP_THEN_ARG:
798      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],      md->nomatch_mark = ecode + 2;
799        offset_top, md, eptrb, RM58);      md->mark = NULL;    /* In case previously set by assertion */
800        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
801          md, eptrb, RM58);
802        if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
803             md->mark == NULL) md->mark = ecode + 2;
804      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
805      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode;
     md->mark = ecode + LINK_SIZE + 2;  
806      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
807    
808        /* Handle an atomic group that does not contain any capturing parentheses.
809        This can be handled like an assertion. Prior to 8.13, all atomic groups
810        were handled this way. In 8.13, the code was changed as below for ONCE, so
811        that backups pass through the group and thereby reset captured values.
812        However, this uses a lot more stack, so in 8.20, atomic groups that do not
813        contain any captures generate OP_ONCE_NC, which can be handled in the old,
814        less stack intensive way.
815    
816        Check the alternative branches in turn - the matching won't pass the KET
817        for this kind of subpattern. If any one branch matches, we carry on as at
818        the end of a normal bracket, leaving the subject pointer, but resetting
819        the start-of-match value in case it was changed by \K. */
820    
821        case OP_ONCE_NC:
822        prev = ecode;
823        saved_eptr = eptr;
824        save_mark = md->mark;
825        do
826          {
827          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
828          if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
829            {
830            mstart = md->start_match_ptr;
831            break;
832            }
833          if (rrc == MATCH_THEN)
834            {
835            next = ecode + GET(ecode,1);
836            if (md->start_match_ptr < next &&
837                (*ecode == OP_ALT || *next == OP_ALT))
838              rrc = MATCH_NOMATCH;
839            }
840    
841          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
842          ecode += GET(ecode,1);
843          md->mark = save_mark;
844          }
845        while (*ecode == OP_ALT);
846    
847        /* If hit the end of the group (which could be repeated), fail */
848    
849        if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
850    
851        /* Continue as from after the group, updating the offsets high water
852        mark, since extracts may have been taken. */
853    
854        do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
855    
856        offset_top = md->end_offset_top;
857        eptr = md->end_match_ptr;
858    
859        /* For a non-repeating ket, just continue at this level. This also
860        happens for a repeating ket if no characters were matched in the group.
861        This is the forcible breaking of infinite loops as implemented in Perl
862        5.005. */
863    
864        if (*ecode == OP_KET || eptr == saved_eptr)
865          {
866          ecode += 1+LINK_SIZE;
867          break;
868          }
869    
870        /* The repeating kets try the rest of the pattern or restart from the
871        preceding bracket, in the appropriate order. The second "call" of match()
872        uses tail recursion, to avoid using another stack frame. */
873    
874        if (*ecode == OP_KETRMIN)
875          {
876          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
877          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
878          ecode = prev;
879          goto TAIL_RECURSE;
880          }
881        else  /* OP_KETRMAX */
882          {
883          md->match_function_type = MATCH_CBEGROUP;
884          RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
885          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
886          ecode += 1 + LINK_SIZE;
887          goto TAIL_RECURSE;
888          }
889        /* Control never gets here */
890    
891      /* Handle a capturing bracket, other than those that are possessive with an      /* Handle a capturing bracket, other than those that are possessive with an
892      unlimited repeat. If there is space in the offset vector, save the current      unlimited repeat. If there is space in the offset vector, save the current
893      subject position in the working slot at the top of the vector. We mustn't      subject position in the working slot at the top of the vector. We mustn't
894      change the current values of the data slot, because they may be set from a      change the current values of the data slot, because they may be set from a
895      previous iteration of this group, and be referred to by a reference inside      previous iteration of this group, and be referred to by a reference inside
896      the group. If we fail to match, we need to restore this value and also the      the group. A failure to match might occur after the group has succeeded,
897      values of the final offsets, in case they were set by a previous iteration      if something later on doesn't match. For this reason, we need to restore
898      of the same bracket.      the working value and also the values of the final offsets, in case they
899        were set by a previous iteration of the same bracket.
900    
901      If there isn't enough space in the offset vector, treat this as if it were      If there isn't enough space in the offset vector, treat this as if it were
902      a non-capturing bracket. Don't worry about setting the flag for the error      a non-capturing bracket. Don't worry about setting the flag for the error
# Line 820  for (;;) Line 906  for (;;)
906      case OP_SCBRA:      case OP_SCBRA:
907      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
908      offset = number << 1;      offset = number << 1;
909    
910  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
911      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
912      printf("subject=");      printf("subject=");
# Line 834  for (;;) Line 920  for (;;)
920        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
921        save_offset3 = md->offset_vector[md->offset_end - number];        save_offset3 = md->offset_vector[md->offset_end - number];
922        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
923          save_mark = md->mark;
924    
925        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
926        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
# Line 841  for (;;) Line 928  for (;;)
928    
929        for (;;)        for (;;)
930          {          {
931          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
932          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
933            eptrb, RM1);            eptrb, RM1);
934          if (rrc != MATCH_NOMATCH &&          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
935              (rrc != MATCH_THEN || md->start_match_ptr != ecode))  
936            RRETURN(rrc);          /* If we backed up to a THEN, check whether it is within the current
937            branch by comparing the address of the THEN that is passed back with
938            the end of the branch. If it is within the current branch, and the
939            branch is one of two or more alternatives (it either starts or ends
940            with OP_ALT), we have reached the limit of THEN's action, so convert
941            the return code to NOMATCH, which will cause normal backtracking to
942            happen from now on. Otherwise, THEN is passed back to an outer
943            alternative. This implements Perl's treatment of parenthesized groups,
944            where a group not containing | does not affect the current alternative,
945            that is, (X) is NOT the same as (X|(*F)). */
946    
947            if (rrc == MATCH_THEN)
948              {
949              next = ecode + GET(ecode,1);
950              if (md->start_match_ptr < next &&
951                  (*ecode == OP_ALT || *next == OP_ALT))
952                rrc = MATCH_NOMATCH;
953              }
954    
955            /* Anything other than NOMATCH is passed back. */
956    
957            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
958          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
959          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
960          if (*ecode != OP_ALT) break;          md->mark = save_mark;
961            if (*ecode != OP_ALT) break;
962          }          }
963    
964        DPRINTF(("bracket %d failed\n", number));        DPRINTF(("bracket %d failed\n", number));
   
965        md->offset_vector[offset] = save_offset1;        md->offset_vector[offset] = save_offset1;
966        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
967        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
968    
969        if (rrc != MATCH_THEN) md->mark = markptr;        /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
970        RRETURN(MATCH_NOMATCH);  
971          RRETURN(rrc);
972        }        }
973    
974      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
# Line 873  for (;;) Line 982  for (;;)
982      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
983      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
984    
985      /* Non-capturing bracket, except for possessive with unlimited repeat. Loop      /* Non-capturing or atomic group, except for possessive with unlimited
986      for all the alternatives. When we get to the final alternative within the      repeat and ONCE group with no captures. Loop for all the alternatives.
987      brackets, we would return the result of a recursive call to match()  
988      whatever happened. We can reduce stack usage by turning this into a tail      When we get to the final alternative within the brackets, we used to return
989      recursion, except in the case of a possibly empty group.*/      the result of a recursive call to match() whatever happened so it was
990        possible to reduce stack usage by turning this into a tail recursion,
991        except in the case of a possibly empty group. However, now that there is
992        the possiblity of (*THEN) occurring in the final alternative, this
993        optimization is no longer always possible.
994    
995        We can optimize if we know there are no (*THEN)s in the pattern; at present
996        this is the best that can be done.
997    
998        MATCH_ONCE is returned when the end of an atomic group is successfully
999        reached, but subsequent matching fails. It passes back up the tree (causing
1000        captured values to be reset) until the original atomic group level is
1001        reached. This is tested by comparing md->once_target with the start of the
1002        group. At this point, the return is converted into MATCH_NOMATCH so that
1003        previous backup points can be taken. */
1004    
1005        case OP_ONCE:
1006      case OP_BRA:      case OP_BRA:
1007      case OP_SBRA:      case OP_SBRA:
1008      DPRINTF(("start non-capturing bracket\n"));      DPRINTF(("start non-capturing bracket\n"));
1009    
1010      for (;;)      for (;;)
1011        {        {
1012        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */        if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
1013    
1014          /* If this is not a possibly empty group, and there are no (*THEN)s in
1015          the pattern, and this is the final alternative, optimize as described
1016          above. */
1017    
1018          else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1019          {          {
1020          if (op >= OP_SBRA)   /* Possibly empty group */          ecode += PRIV(OP_lengths)[*ecode];
           {  
           md->match_function_type = MATCH_CBEGROUP;  
           RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,  
             RM48);  
           if (rrc == MATCH_NOMATCH) md->mark = markptr;  
           RRETURN(rrc);  
           }  
         /* Not a possibly empty group; use tail recursion */  
         ecode += _pcre_OP_lengths[*ecode];  
         DPRINTF(("bracket 0 tail recursion\n"));  
1021          goto TAIL_RECURSE;          goto TAIL_RECURSE;
1022          }          }
1023    
1024        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* In all other cases, we have to make another call to match(). */
       otherwise return. */  
1025    
1026        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        save_mark = md->mark;
1027        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1028          RM2);          RM2);
1029        if (rrc != MATCH_NOMATCH &&  
1030            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* See comment in the code for capturing groups above about handling
1031          THEN. */
1032    
1033          if (rrc == MATCH_THEN)
1034            {
1035            next = ecode + GET(ecode,1);
1036            if (md->start_match_ptr < next &&
1037                (*ecode == OP_ALT || *next == OP_ALT))
1038              rrc = MATCH_NOMATCH;
1039            }
1040    
1041          if (rrc != MATCH_NOMATCH)
1042            {
1043            if (rrc == MATCH_ONCE)
1044              {
1045              const pcre_uchar *scode = ecode;
1046              if (*scode != OP_ONCE)           /* If not at start, find it */
1047                {
1048                while (*scode == OP_ALT) scode += GET(scode, 1);
1049                scode -= GET(scode, 1);
1050                }
1051              if (md->once_target == scode) rrc = MATCH_NOMATCH;
1052              }
1053          RRETURN(rrc);          RRETURN(rrc);
1054            }
1055        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1056          md->mark = save_mark;
1057          if (*ecode != OP_ALT) break;
1058        }        }
     /* Control never reaches here. */  
1059    
1060      /* Handle possessive capturing brackets with an unlimited repeat. We come      RRETURN(MATCH_NOMATCH);
1061    
1062        /* Handle possessive capturing brackets with an unlimited repeat. We come
1063      here from BRAZERO with allow_zero set TRUE. The offset_vector values are      here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1064      handled similarly to the normal case above. However, the matching is      handled similarly to the normal case above. However, the matching is
1065      different. The end of these brackets will always be OP_KETRPOS, which      different. The end of these brackets will always be OP_KETRPOS, which
1066      returns MATCH_KETRPOS without going further in the pattern. By this means      returns MATCH_KETRPOS without going further in the pattern. By this means
1067      we can handle the group by iteration rather than recursion, thereby      we can handle the group by iteration rather than recursion, thereby
1068      reducing the amount of stack needed. */      reducing the amount of stack needed. */
1069    
1070      case OP_CBRAPOS:      case OP_CBRAPOS:
1071      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1072      allow_zero = FALSE;      allow_zero = FALSE;
1073    
1074      POSSESSIVE_CAPTURE:      POSSESSIVE_CAPTURE:
1075      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
1076      offset = number << 1;      offset = number << 1;
# Line 939  for (;;) Line 1085  for (;;)
1085      if (offset < md->offset_max)      if (offset < md->offset_max)
1086        {        {
1087        matched_once = FALSE;        matched_once = FALSE;
1088        code_offset = ecode - md->start_code;        code_offset = (int)(ecode - md->start_code);
1089    
1090        save_offset1 = md->offset_vector[offset];        save_offset1 = md->offset_vector[offset];
1091        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
# Line 947  for (;;) Line 1093  for (;;)
1093        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
1094    
1095        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1096    
1097        /* Each time round the loop, save the current subject position for use        /* Each time round the loop, save the current subject position for use
1098        when the group matches. For MATCH_MATCH, the group has matched, so we        when the group matches. For MATCH_MATCH, the group has matched, so we
1099        restart it with a new subject starting position, remembering that we had        restart it with a new subject starting position, remembering that we had
1100        at least one match. For MATCH_NOMATCH, carry on with the alternatives, as        at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1101        usual. If we haven't matched any alternatives in any iteration, check to        usual. If we haven't matched any alternatives in any iteration, check to
1102        see if a previous iteration matched. If so, the group has matched;        see if a previous iteration matched. If so, the group has matched;
1103        continue from afterwards. Otherwise it has failed; restore the previous        continue from afterwards. Otherwise it has failed; restore the previous
1104        capture values before returning NOMATCH. */        capture values before returning NOMATCH. */
1105    
1106        for (;;)        for (;;)
1107          {          {
1108          md->offset_vector[md->offset_end - number] =          md->offset_vector[md->offset_end - number] =
1109            (int)(eptr - md->start_subject);            (int)(eptr - md->start_subject);
1110          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1111          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1112            eptrb, RM63);            eptrb, RM63);
1113          if (rrc == MATCH_KETRPOS)          if (rrc == MATCH_KETRPOS)
1114            {            {
1115            offset_top = md->end_offset_top;            offset_top = md->end_offset_top;
1116            eptr = md->end_match_ptr;            eptr = md->end_match_ptr;
1117            ecode = md->start_code + code_offset;            ecode = md->start_code + code_offset;
1118            save_capture_last = md->capture_last;            save_capture_last = md->capture_last;
1119            matched_once = TRUE;            matched_once = TRUE;
1120            continue;            continue;
1121            }            }
1122          if (rrc != MATCH_NOMATCH &&  
1123              (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* See comment in the code for capturing groups above about handling
1124            RRETURN(rrc);          THEN. */
1125    
1126            if (rrc == MATCH_THEN)
1127              {
1128              next = ecode + GET(ecode,1);
1129              if (md->start_match_ptr < next &&
1130                  (*ecode == OP_ALT || *next == OP_ALT))
1131                rrc = MATCH_NOMATCH;
1132              }
1133    
1134            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1135          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
1136          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
1137          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
1138          }          }
1139    
1140        if (!matched_once)        if (!matched_once)
1141          {          {
1142          md->offset_vector[offset] = save_offset1;          md->offset_vector[offset] = save_offset1;
1143          md->offset_vector[offset+1] = save_offset2;          md->offset_vector[offset+1] = save_offset2;
1144          md->offset_vector[md->offset_end - number] = save_offset3;          md->offset_vector[md->offset_end - number] = save_offset3;
1145          }          }
1146    
       if (rrc != MATCH_THEN) md->mark = markptr;  
1147        if (allow_zero || matched_once)        if (allow_zero || matched_once)
1148          {          {
1149          ecode += 1 + LINK_SIZE;          ecode += 1 + LINK_SIZE;
1150          break;          break;
1151          }          }
1152    
1153        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1154        }        }
1155    
1156      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1157      as a non-capturing bracket. */      as a non-capturing bracket. */
1158    
# Line 1009  for (;;) Line 1164  for (;;)
1164      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1165      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1166    
1167      /* Non-capturing possessive bracket with unlimited repeat. We come here      /* Non-capturing possessive bracket with unlimited repeat. We come here
1168      from BRAZERO with allow_zero = TRUE. The code is similar to the above,      from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1169      without the capturing complication. It is written out separately for speed      without the capturing complication. It is written out separately for speed
1170      and cleanliness. */      and cleanliness. */
1171    
1172      case OP_BRAPOS:      case OP_BRAPOS:
1173      case OP_SBRAPOS:      case OP_SBRAPOS:
1174      allow_zero = FALSE;      allow_zero = FALSE;
1175    
1176      POSSESSIVE_NON_CAPTURE:      POSSESSIVE_NON_CAPTURE:
1177      matched_once = FALSE;      matched_once = FALSE;
1178      code_offset = ecode - md->start_code;      code_offset = (int)(ecode - md->start_code);
1179    
1180      for (;;)      for (;;)
1181        {        {
1182        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1183        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1184          eptrb, RM64);          eptrb, RM48);
1185        if (rrc == MATCH_KETRPOS)        if (rrc == MATCH_KETRPOS)
1186          {          {
1187            offset_top = md->end_offset_top;
1188          eptr = md->end_match_ptr;          eptr = md->end_match_ptr;
1189          ecode = md->start_code + code_offset;          ecode = md->start_code + code_offset;
1190          matched_once = TRUE;          matched_once = TRUE;
1191          continue;          continue;
1192          }          }
1193        if (rrc != MATCH_NOMATCH &&  
1194            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* See comment in the code for capturing groups above about handling
1195          RRETURN(rrc);        THEN. */
1196    
1197          if (rrc == MATCH_THEN)
1198            {
1199            next = ecode + GET(ecode,1);
1200            if (md->start_match_ptr < next &&
1201                (*ecode == OP_ALT || *next == OP_ALT))
1202              rrc = MATCH_NOMATCH;
1203            }
1204    
1205          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1206        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1207        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1208        }        }
1209    
1210      if (matched_once || allow_zero)      if (matched_once || allow_zero)
1211        {        {
1212        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1213        break;        break;
1214        }        }
1215      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
1216    
1217      /* Control never reaches here. */      /* Control never reaches here. */
# Line 1053  for (;;) Line 1219  for (;;)
1219      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
1220      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
1221      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
1222      exactly what going to the ket would do. As there is only one branch to be      exactly what going to the ket would do. */
     obeyed, we can use tail recursion to avoid using another stack frame. */  
1223    
1224      case OP_COND:      case OP_COND:
1225      case OP_SCOND:      case OP_SCOND:
# Line 1065  for (;;) Line 1230  for (;;)
1230    
1231      if (ecode[LINK_SIZE+1] == OP_CALLOUT)      if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1232        {        {
1233        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
1234          {          {
1235          pcre_callout_block cb;          PUBL(callout_block) cb;
1236          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1237          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1238          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1239    #ifdef COMPILE_PCRE8
1240          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
1241    #else
1242            cb.subject          = (PCRE_SPTR16)md->start_subject;
1243    #endif
1244          cb.subject_length   = (int)(md->end_subject - md->start_subject);          cb.subject_length   = (int)(md->end_subject - md->start_subject);
1245          cb.start_match      = (int)(mstart - md->start_subject);          cb.start_match      = (int)(mstart - md->start_subject);
1246          cb.current_position = (int)(eptr - md->start_subject);          cb.current_position = (int)(eptr - md->start_subject);
# Line 1080  for (;;) Line 1249  for (;;)
1249          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1250          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1251          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1252          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);          cb.mark             = md->nomatch_mark;
1253            if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1254          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1255          }          }
1256        ecode += _pcre_OP_lengths[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1257        }        }
1258    
1259      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1100  for (;;) Line 1270  for (;;)
1270        else        else
1271          {          {
1272          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1273          condition =  (recno == RREF_ANY || recno == md->recursive->group_num);          condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1274    
1275          /* If the test is for recursion into a specific subpattern, and it is          /* If the test is for recursion into a specific subpattern, and it is
1276          false, but the test was set up by name, scan the table to see if the          false, but the test was set up by name, scan the table to see if the
1277          name refers to any other numbers, and test them. The condition is true          name refers to any other numbers, and test them. The condition is true
1278          if any one is set. */          if any one is set. */
1279    
1280          if (!condition && condcode == OP_NRREF && recno != RREF_ANY)          if (!condition && condcode == OP_NRREF)
1281            {            {
1282            uschar *slotA = md->name_table;            pcre_uchar *slotA = md->name_table;
1283            for (i = 0; i < md->name_count; i++)            for (i = 0; i < md->name_count; i++)
1284              {              {
1285              if (GET2(slotA, 0) == recno) break;              if (GET2(slotA, 0) == recno) break;
# Line 1122  for (;;) Line 1292  for (;;)
1292    
1293            if (i < md->name_count)            if (i < md->name_count)
1294              {              {
1295              uschar *slotB = slotA;              pcre_uchar *slotB = slotA;
1296              while (slotB > md->name_table)              while (slotB > md->name_table)
1297                {                {
1298                slotB -= md->name_entry_size;                slotB -= md->name_entry_size;
1299                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1300                  {                  {
1301                  condition = GET2(slotB, 0) == md->recursive->group_num;                  condition = GET2(slotB, 0) == md->recursive->group_num;
1302                  if (condition) break;                  if (condition) break;
# Line 1142  for (;;) Line 1312  for (;;)
1312                for (i++; i < md->name_count; i++)                for (i++; i < md->name_count; i++)
1313                  {                  {
1314                  slotB += md->name_entry_size;                  slotB += md->name_entry_size;
1315                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                  if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1316                    {                    {
1317                    condition = GET2(slotB, 0) == md->recursive->group_num;                    condition = GET2(slotB, 0) == md->recursive->group_num;
1318                    if (condition) break;                    if (condition) break;
# Line 1155  for (;;) Line 1325  for (;;)
1325    
1326          /* Chose branch according to the condition */          /* Chose branch according to the condition */
1327    
1328          ecode += condition? 3 : GET(ecode, 1);          ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1329          }          }
1330        }        }
1331    
# Line 1172  for (;;) Line 1342  for (;;)
1342        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
1343          {          {
1344          int refno = offset >> 1;          int refno = offset >> 1;
1345          uschar *slotA = md->name_table;          pcre_uchar *slotA = md->name_table;
1346    
1347          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
1348            {            {
# Line 1186  for (;;) Line 1356  for (;;)
1356    
1357          if (i < md->name_count)          if (i < md->name_count)
1358            {            {
1359            uschar *slotB = slotA;            pcre_uchar *slotB = slotA;
1360            while (slotB > md->name_table)            while (slotB > md->name_table)
1361              {              {
1362              slotB -= md->name_entry_size;              slotB -= md->name_entry_size;
1363              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1364                {                {
1365                offset = GET2(slotB, 0) << 1;                offset = GET2(slotB, 0) << 1;
1366                condition = offset < offset_top &&                condition = offset < offset_top &&
# Line 1208  for (;;) Line 1378  for (;;)
1378              for (i++; i < md->name_count; i++)              for (i++; i < md->name_count; i++)
1379                {                {
1380                slotB += md->name_entry_size;                slotB += md->name_entry_size;
1381                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1382                  {                  {
1383                  offset = GET2(slotB, 0) << 1;                  offset = GET2(slotB, 0) << 1;
1384                  condition = offset < offset_top &&                  condition = offset < offset_top &&
# Line 1223  for (;;) Line 1393  for (;;)
1393    
1394        /* Chose branch according to the condition */        /* Chose branch according to the condition */
1395    
1396        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1397        }        }
1398    
1399      else if (condcode == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
# Line 1238  for (;;) Line 1408  for (;;)
1408    
1409      else      else
1410        {        {
1411        md->match_function_type = MATCH_CONDASSERT;        md->match_function_type = MATCH_CONDASSERT;
1412        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1413        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1414          {          {
1415            if (md->end_offset_top > offset_top)
1416              offset_top = md->end_offset_top;  /* Captures may have happened */
1417          condition = TRUE;          condition = TRUE;
1418          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1419          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1420          }          }
1421        else if (rrc != MATCH_NOMATCH &&  
1422                (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1423          assertion; it is therefore treated as NOMATCH. */
1424    
1425          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1426          {          {
1427          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1428          }          }
# Line 1258  for (;;) Line 1433  for (;;)
1433          }          }
1434        }        }
1435    
1436      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one, can
1437      we can use tail recursion to avoid using another stack frame, except when      use tail recursion to avoid using another stack frame, except when there is
1438      we have an unlimited repeat of a possibly empty group. If the second      unlimited repeat of a possibly empty group. In the latter case, a recursive
1439      alternative doesn't exist, we can just plough on. */      call to match() is always required, unless the second alternative doesn't
1440        exist, in which case we can just plough on. Note that, for compatibility
1441        with Perl, the | in a conditional group is NOT treated as creating two
1442        alternatives. If a THEN is encountered in the branch, it propagates out to
1443        the enclosing alternative (unless nested in a deeper set of alternatives,
1444        of course). */
1445    
1446      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1447        {        {
1448        ecode += 1 + LINK_SIZE;        if (op != OP_SCOND)
       if (op == OP_SCOND)        /* Possibly empty group */  
1449          {          {
1450          md->match_function_type = MATCH_CBEGROUP;          ecode += 1 + LINK_SIZE;
1451          RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);          goto TAIL_RECURSE;
         RRETURN(rrc);  
1452          }          }
1453        else goto TAIL_RECURSE;  
1454          md->match_function_type = MATCH_CBEGROUP;
1455          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1456          RRETURN(rrc);
1457        }        }
1458      else                         /* Condition false & no alternative */  
1459         /* Condition false & no alternative; continue after the group. */
1460    
1461        else
1462        {        {
1463        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1464        }        }
# Line 1301  for (;;) Line 1485  for (;;)
1485        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1486        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1487        }        }
1488      ecode += 3;      ecode += 1 + IMM2_SIZE;
1489      break;      break;
1490    
1491    
1492      /* End of the pattern, either real or forced. If we are in a recursion, we      /* End of the pattern, either real or forced. */
     should restore the offsets appropriately, and if it's a top-level  
     recursion, continue from after the call. */  
1493    
     case OP_ACCEPT:  
1494      case OP_END:      case OP_END:
1495      if (md->recursive != NULL)      case OP_ACCEPT:
1496        {      case OP_ASSERT_ACCEPT:
1497        recursion_info *rec = md->recursive;  
1498        md->recursive = rec->prevrec;      /* If we have matched an empty string, fail if not in an assertion and not
1499        memmove(md->offset_vector, rec->offset_save,      in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1500          rec->saved_max * sizeof(int));      is set and we have matched at the start of the subject. In both cases,
1501        offset_top = rec->save_offset_top;      backtracking will then try other alternatives, if any. */
1502        if (rec->group_num == 0)  
1503          {      if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1504          ecode = rec->after_call;           md->recursive == NULL &&
1505          break;           (md->notempty ||
1506          }             (md->notempty_atstart &&
1507        }               mstart == md->start_subject + md->start_offset)))
1508          RRETURN(MATCH_NOMATCH);
     /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is  
     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of  
     the subject. In both cases, backtracking will then try other alternatives,  
     if any. */  
   
     else if (eptr == mstart &&  
         (md->notempty ||  
           (md->notempty_atstart &&  
             mstart == md->start_subject + md->start_offset)))  
       MRRETURN(MATCH_NOMATCH);  
1509    
1510      /* Otherwise, we have a match. */      /* Otherwise, we have a match. */
1511    
1512      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
1513      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
1514      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1515    
1516      /* For some reason, the macros don't work properly if an expression is      /* For some reason, the macros don't work properly if an expression is
1517      given as the argument to MRRETURN when the heap is in use. */      given as the argument to RRETURN when the heap is in use. */
1518    
1519      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1520      MRRETURN(rrc);      RRETURN(rrc);
1521    
1522      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
1523      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
1524      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1525      start of each branch to move the current point backwards, so the code at      start of each branch to move the current point backwards, so the code at
1526      this level is identical to the lookahead case. When the assertion is part      this level is identical to the lookahead case. When the assertion is part
1527      of a condition, we want to return immediately afterwards. The caller of      of a condition, we want to return immediately afterwards. The caller of
1528      this incarnation of the match() function will have set MATCH_CONDASSERT in      this incarnation of the match() function will have set MATCH_CONDASSERT in
1529      md->match_function type, and one of these opcodes will be the first opcode      md->match_function type, and one of these opcodes will be the first opcode
1530      that is processed. We use a local variable that is preserved over calls to      that is processed. We use a local variable that is preserved over calls to
1531      match() to remember this case. */      match() to remember this case. */
1532    
1533      case OP_ASSERT:      case OP_ASSERT:
1534      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1535        save_mark = md->mark;
1536      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1537        {        {
1538        condassert = TRUE;        condassert = TRUE;
1539        md->match_function_type = 0;        md->match_function_type = 0;
1540        }        }
1541      else condassert = FALSE;      else condassert = FALSE;
1542    
1543      do      do
1544        {        {
1545        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
# Line 1376  for (;;) Line 1548  for (;;)
1548          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1549          break;          break;
1550          }          }
1551        if (rrc != MATCH_NOMATCH &&  
1552            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1553          RRETURN(rrc);        as NOMATCH. */
1554    
1555          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1556        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1557          md->mark = save_mark;
1558        }        }
1559      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1560    
1561      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1562    
1563      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1564    
# Line 1403  for (;;) Line 1578  for (;;)
1578    
1579      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1580      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1581        save_mark = md->mark;
1582      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1583        {        {
1584        condassert = TRUE;        condassert = TRUE;
1585        md->match_function_type = 0;        md->match_function_type = 0;
1586        }        }
1587      else condassert = FALSE;      else condassert = FALSE;
1588    
1589      do      do
1590        {        {
1591        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1592        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);        md->mark = save_mark;
1593          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1594        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1595          {          {
1596          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1597          break;          break;
1598          }          }
1599        if (rrc != MATCH_NOMATCH &&  
1600            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1601          RRETURN(rrc);        as NOMATCH. */
1602    
1603          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1604        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1605        }        }
1606      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1607    
1608      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1609    
1610      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1611      continue;      continue;
1612    
# Line 1437  for (;;) Line 1616  for (;;)
1616      back a number of characters, not bytes. */      back a number of characters, not bytes. */
1617    
1618      case OP_REVERSE:      case OP_REVERSE:
1619  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1620      if (utf8)      if (utf)
1621        {        {
1622        i = GET(ecode, 1);        i = GET(ecode, 1);
1623        while (i-- > 0)        while (i-- > 0)
1624          {          {
1625          eptr--;          eptr--;
1626          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1627          BACKCHAR(eptr);          BACKCHAR(eptr);
1628          }          }
1629        }        }
# Line 1455  for (;;) Line 1634  for (;;)
1634    
1635        {        {
1636        eptr -= GET(ecode, 1);        eptr -= GET(ecode, 1);
1637        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1638        }        }
1639    
1640      /* Save the earliest consulted character, then skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
# Line 1469  for (;;) Line 1648  for (;;)
1648      function is able to force a failure. */      function is able to force a failure. */
1649    
1650      case OP_CALLOUT:      case OP_CALLOUT:
1651      if (pcre_callout != NULL)      if (PUBL(callout) != NULL)
1652        {        {
1653        pcre_callout_block cb;        PUBL(callout_block) cb;
1654        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1655        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1656        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1657    #ifdef COMPILE_PCRE8
1658        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1659    #else
1660          cb.subject          = (PCRE_SPTR16)md->start_subject;
1661    #endif
1662        cb.subject_length   = (int)(md->end_subject - md->start_subject);        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1663        cb.start_match      = (int)(mstart - md->start_subject);        cb.start_match      = (int)(mstart - md->start_subject);
1664        cb.current_position = (int)(eptr - md->start_subject);        cb.current_position = (int)(eptr - md->start_subject);
# Line 1484  for (;;) Line 1667  for (;;)
1667        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1668        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1669        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1670        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);        cb.mark             = md->nomatch_mark;
1671          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1672        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1673        }        }
1674      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 1494  for (;;) Line 1678  for (;;)
1678      offset data is the offset to the starting bracket from the start of the      offset data is the offset to the starting bracket from the start of the
1679      whole pattern. (This is so that it works from duplicated subpatterns.)      whole pattern. (This is so that it works from duplicated subpatterns.)
1680    
1681      If there are any capturing brackets started but not finished, we have to      The state of the capturing groups is preserved over recursion, and
1682      save their starting points and reinstate them after the recursion. However,      re-instated afterwards. We don't know how many are started and not yet
1683      we don't know how many such there are (offset_top records the completed      finished (offset_top records the completed total) so we just have to save
1684      total) so we just have to save all the potential data. There may be up to      all the potential data. There may be up to 65535 such values, which is too
1685      65535 such values, which is too large to put on the stack, but using malloc      large to put on the stack, but using malloc for small numbers seems
1686      for small numbers seems expensive. As a compromise, the stack is used when      expensive. As a compromise, the stack is used when there are no more than
1687      there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc      REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
     is used. A problem is what to do if the malloc fails ... there is no way of  
     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX  
     values on the stack, and accept that the rest may be wrong.  
1688    
1689      There are also other values that have to be saved. We use a chained      There are also other values that have to be saved. We use a chained
1690      sequence of blocks that actually live on the stack. Thanks to Robin Houston      sequence of blocks that actually live on the stack. Thanks to Robin Houston
1691      for the original version of this logic. */      for the original version of this logic. It has, however, been hacked around
1692        a lot, so he is not to blame for the current way it works. */
1693    
1694      case OP_RECURSE:      case OP_RECURSE:
1695        {        {
1696          recursion_info *ri;
1697          int recno;
1698    
1699        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1700        new_recursive.group_num = (callpat == md->start_code)? 0 :        recno = (callpat == md->start_code)? 0 :
1701          GET2(callpat, 1 + LINK_SIZE);          GET2(callpat, 1 + LINK_SIZE);
1702    
1703          /* Check for repeating a recursion without advancing the subject pointer.
1704          This should catch convoluted mutual recursions. (Some simple cases are
1705          caught at compile time.) */
1706    
1707          for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1708            if (recno == ri->group_num && eptr == ri->subject_position)
1709              RRETURN(PCRE_ERROR_RECURSELOOP);
1710    
1711        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1712    
1713          new_recursive.group_num = recno;
1714          new_recursive.subject_position = eptr;
1715        new_recursive.prevrec = md->recursive;        new_recursive.prevrec = md->recursive;
1716        md->recursive = &new_recursive;        md->recursive = &new_recursive;
1717    
1718        /* Find where to continue from afterwards */        /* Where to continue from afterwards */
1719    
1720        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
       new_recursive.after_call = ecode;  
1721    
1722        /* Now save the offset data. */        /* Now save the offset data */
1723    
1724        new_recursive.saved_max = md->offset_end;        new_recursive.saved_max = md->offset_end;
1725        if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)        if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
# Line 1533  for (;;) Line 1727  for (;;)
1727        else        else
1728          {          {
1729          new_recursive.offset_save =          new_recursive.offset_save =
1730            (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));            (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1731          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1732          }          }
   
1733        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1734              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1735        new_recursive.save_offset_top = offset_top;  
1736          /* OK, now we can do the recursion. After processing each alternative,
1737        /* OK, now we can do the recursion. For each top-level alternative we        restore the offset data. If there were nested recursions, md->recursive
1738        restore the offset and recursion data. */        might be changed, so reset it before looping. */
1739    
1740        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1741        cbegroup = (*callpat >= OP_SBRA);        cbegroup = (*callpat >= OP_SBRA);
1742        do        do
1743          {          {
1744          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1745          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1746            md, eptrb, RM6);            md, eptrb, RM6);
1747            memcpy(md->offset_vector, new_recursive.offset_save,
1748                new_recursive.saved_max * sizeof(int));
1749            md->recursive = new_recursive.prevrec;
1750          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1751            {            {
1752            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
           md->recursive = new_recursive.prevrec;  
1753            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1754              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1755            MRRETURN(MATCH_MATCH);  
1756              /* Set where we got to in the subject, and reset the start in case
1757              it was changed by \K. This *is* propagated back out of a recursion,
1758              for Perl compatibility. */
1759    
1760              eptr = md->end_match_ptr;
1761              mstart = md->start_match_ptr;
1762              goto RECURSION_MATCHED;        /* Exit loop; end processing */
1763            }            }
1764          else if (rrc != MATCH_NOMATCH &&  
1765                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* PCRE does not allow THEN to escape beyond a recursion; it is treated
1766            as NOMATCH. */
1767    
1768            else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1769            {            {
1770            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1771            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1772              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1773            RRETURN(rrc);            RRETURN(rrc);
1774            }            }
1775    
1776          md->recursive = &new_recursive;          md->recursive = &new_recursive;
         memcpy(md->offset_vector, new_recursive.offset_save,  
             new_recursive.saved_max * sizeof(int));  
1777          callpat += GET(callpat, 1);          callpat += GET(callpat, 1);
1778          }          }
1779        while (*callpat == OP_ALT);        while (*callpat == OP_ALT);
# Line 1578  for (;;) Line 1781  for (;;)
1781        DPRINTF(("Recursion didn't match\n"));        DPRINTF(("Recursion didn't match\n"));
1782        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1783        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1784          (pcre_free)(new_recursive.offset_save);          (PUBL(free))(new_recursive.offset_save);
1785        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
       }  
     /* Control never reaches here */  
   
     /* "Once" brackets are like assertion brackets except that after a match,  
     the point in the subject string is not moved back. Thus there can never be  
     a move back into the brackets. Friedl calls these "atomic" subpatterns.  
     Check the alternative branches in turn - the matching won't pass the KET  
     for this kind of subpattern. If any one branch matches, we carry on as at  
     the end of a normal bracket, leaving the subject pointer, but resetting  
     the start-of-match value in case it was changed by \K. */  
   
     case OP_ONCE:  
     prev = ecode;  
     saved_eptr = eptr;  
   
     do  
       {  
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);  
       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */  
         {  
         mstart = md->start_match_ptr;  
         break;  
         }  
       if (rrc != MATCH_NOMATCH &&  
           (rrc != MATCH_THEN || md->start_match_ptr != ecode))  
         RRETURN(rrc);  
       ecode += GET(ecode,1);  
       }  
     while (*ecode == OP_ALT);  
   
     /* If hit the end of the group (which could be repeated), fail */  
   
     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);  
   
     /* Continue as from after the assertion, updating the offsets high water  
     mark, since extracts may have been taken. */  
   
     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);  
   
     offset_top = md->end_offset_top;  
     eptr = md->end_match_ptr;  
   
     /* For a non-repeating ket, just continue at this level. This also  
     happens for a repeating ket if no characters were matched in the group.  
     This is the forcible breaking of infinite loops as implemented in Perl  
     5.005. If there is an options reset, it will get obeyed in the normal  
     course of events. */  
   
     if (*ecode == OP_KET || eptr == saved_eptr)  
       {  
       ecode += 1+LINK_SIZE;  
       break;  
1786        }        }
1787    
1788      /* The repeating kets try the rest of the pattern or restart from the      RECURSION_MATCHED:
1789      preceding bracket, in the appropriate order. The second "call" of match()      break;
     uses tail recursion, to avoid using another stack frame. */  
   
     if (*ecode == OP_KETRMIN)  
       {  
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM8);  
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
       ecode = prev;  
       goto TAIL_RECURSE;  
       }  
     else  /* OP_KETRMAX */  
       {  
       md->match_function_type = MATCH_CBEGROUP;  
       RMATCH(eptr, prev, offset_top, md, eptrb, RM9);  
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
       ecode += 1 + LINK_SIZE;  
       goto TAIL_RECURSE;  
       }  
     /* Control never gets here */  
1790    
1791      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1792      bracketed group and go to there. */      bracketed group and go to there. */
# Line 1667  for (;;) Line 1800  for (;;)
1800      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1801      with fixed upper repeat limits are compiled as a number of copies, with the      with fixed upper repeat limits are compiled as a number of copies, with the
1802      optional ones preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1803    
1804      case OP_BRAZERO:      case OP_BRAZERO:
1805      next = ecode + 1;      next = ecode + 1;
1806      RMATCH(eptr, next, offset_top, md, eptrb, RM10);      RMATCH(eptr, next, offset_top, md, eptrb, RM10);
# Line 1675  for (;;) Line 1808  for (;;)
1808      do next += GET(next, 1); while (*next == OP_ALT);      do next += GET(next, 1); while (*next == OP_ALT);
1809      ecode = next + 1 + LINK_SIZE;      ecode = next + 1 + LINK_SIZE;
1810      break;      break;
1811    
1812      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1813      next = ecode + 1;      next = ecode + 1;
1814      do next += GET(next, 1); while (*next == OP_ALT);      do next += GET(next, 1); while (*next == OP_ALT);
# Line 1689  for (;;) Line 1822  for (;;)
1822      do next += GET(next,1); while (*next == OP_ALT);      do next += GET(next,1); while (*next == OP_ALT);
1823      ecode = next + 1 + LINK_SIZE;      ecode = next + 1 + LINK_SIZE;
1824      break;      break;
1825    
1826      /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything      /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1827      here; just jump to the group, with allow_zero set TRUE. */      here; just jump to the group, with allow_zero set TRUE. */
1828    
1829      case OP_BRAPOSZERO:      case OP_BRAPOSZERO:
1830      op = *(++ecode);      op = *(++ecode);
1831      allow_zero = TRUE;      allow_zero = TRUE;
1832      if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;      if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1833        goto POSSESSIVE_NON_CAPTURE;        goto POSSESSIVE_NON_CAPTURE;
# Line 1704  for (;;) Line 1837  for (;;)
1837      case OP_KET:      case OP_KET:
1838      case OP_KETRMIN:      case OP_KETRMIN:
1839      case OP_KETRMAX:      case OP_KETRMAX:
1840      case OP_KETRPOS:      case OP_KETRPOS:
1841      prev = ecode - GET(ecode, 1);      prev = ecode - GET(ecode, 1);
1842    
1843      /* If this was a group that remembered the subject start, in order to break      /* If this was a group that remembered the subject start, in order to break
1844      infinite repeats of empty string matches, retrieve the subject start from      infinite repeats of empty string matches, retrieve the subject start from
1845      the chain. Otherwise, set it NULL. */      the chain. Otherwise, set it NULL. */
1846    
1847      if (*prev >= OP_SBRA)      if (*prev >= OP_SBRA || *prev == OP_ONCE)
1848        {        {
1849        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1850        eptrb = eptrb->epb_prev;              /* Backup to previous group */        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1851        }        }
1852      else saved_eptr = NULL;      else saved_eptr = NULL;
1853    
1854      /* If we are at the end of an assertion group or an atomic group, stop      /* If we are at the end of an assertion group or a non-capturing atomic
1855      matching and return MATCH_MATCH, but record the current high water mark for      group, stop matching and return MATCH_MATCH, but record the current high
1856      use by positive assertions. We also need to record the match start in case      water mark for use by positive assertions. We also need to record the match
1857      it was changed by \K. */      start in case it was changed by \K. */
1858    
1859      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1860          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||           *prev == OP_ONCE_NC)
         *prev == OP_ONCE)  
1861        {        {
1862        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE_NC */
1863        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1864        md->start_match_ptr = mstart;        md->start_match_ptr = mstart;
1865        MRRETURN(MATCH_MATCH);        RRETURN(MATCH_MATCH);         /* Sets md->mark */
1866        }        }
1867    
1868      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
1869      and if necessary complete handling an extraction by setting the offsets and      and if necessary complete handling an extraction by setting the offsets and
1870      bumping the high water mark. Note that whole-pattern recursion is coded as      bumping the high water mark. Whole-pattern recursion is coded as a recurse
1871      a recurse into group 0, so it won't be picked up here. Instead, we catch it      into group 0, so it won't be picked up here. Instead, we catch it when the
1872      when the OP_END is reached. Other recursion is handled here. */      OP_END is reached. Other recursion is handled here. We just have to record
1873        the current subject position and start match pointer and give a MATCH
1874        return. */
1875    
1876      if (*prev == OP_CBRA || *prev == OP_SCBRA ||      if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1877          *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)          *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
# Line 1750  for (;;) Line 1884  for (;;)
1884        printf("\n");        printf("\n");
1885  #endif  #endif
1886    
1887          /* Handle a recursively called group. */
1888    
1889          if (md->recursive != NULL && md->recursive->group_num == number)
1890            {
1891            md->end_match_ptr = eptr;
1892            md->start_match_ptr = mstart;
1893            RRETURN(MATCH_MATCH);
1894            }
1895    
1896          /* Deal with capturing */
1897    
1898        md->capture_last = number;        md->capture_last = number;
1899        if (offset >= md->offset_max) md->offset_overflow = TRUE; else        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1900          {          {
1901            /* If offset is greater than offset_top, it means that we are
1902            "skipping" a capturing group, and that group's offsets must be marked
1903            unset. In earlier versions of PCRE, all the offsets were unset at the
1904            start of matching, but this doesn't work because atomic groups and
1905            assertions can cause a value to be set that should later be unset.
1906            Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1907            part of the atomic group, but this is not on the final matching path,
1908            so must be unset when 2 is set. (If there is no group 2, there is no
1909            problem, because offset_top will then be 2, indicating no capture.) */
1910    
1911            if (offset > offset_top)
1912              {
1913              register int *iptr = md->offset_vector + offset_top;
1914              register int *iend = md->offset_vector + offset;
1915              while (iptr < iend) *iptr++ = -1;
1916              }
1917    
1918            /* Now make the extraction */
1919    
1920          md->offset_vector[offset] =          md->offset_vector[offset] =
1921            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1922          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1923          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1924          }          }
   
       /* Handle a recursively called group. Restore the offsets  
       appropriately and continue from after the call. */  
   
       if (md->recursive != NULL && md->recursive->group_num == number)  
         {  
         recursion_info *rec = md->recursive;  
         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));  
         md->recursive = rec->prevrec;  
         memcpy(md->offset_vector, rec->offset_save,  
           rec->saved_max * sizeof(int));  
         offset_top = rec->save_offset_top;  
         ecode = rec->after_call;  
         break;  
         }  
1925        }        }
1926    
1927      /* For a non-repeating ket, just continue at this level. This also      /* For an ordinary non-repeating ket, just continue at this level. This
1928      happens for a repeating ket if no characters were matched in the group.      also happens for a repeating ket if no characters were matched in the
1929      This is the forcible breaking of infinite loops as implemented in Perl      group. This is the forcible breaking of infinite loops as implemented in
1930      5.005. If there is an options reset, it will get obeyed in the normal      Perl 5.005. For a non-repeating atomic group that includes captures,
1931      course of events. */      establish a backup point by processing the rest of the pattern at a lower
1932        level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1933        original OP_ONCE level, thereby bypassing intermediate backup points, but
1934        resetting any captures that happened along the way. */
1935    
1936      if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1937        {        {
1938        ecode += 1 + LINK_SIZE;        if (*prev == OP_ONCE)
1939            {
1940            RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
1941            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1942            md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
1943            RRETURN(MATCH_ONCE);
1944            }
1945          ecode += 1 + LINK_SIZE;    /* Carry on at this level */
1946        break;        break;
1947        }        }
1948    
1949      /* OP_KETRPOS is a possessive repeating ket. Remember the current position,      /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
1950      and return the MATCH_KETRPOS. This makes it possible to do the repeats one      and return the MATCH_KETRPOS. This makes it possible to do the repeats one
1951      at a time from the outer level, thus saving stack. */      at a time from the outer level, thus saving stack. */
1952    
1953      if (*ecode == OP_KETRPOS)      if (*ecode == OP_KETRPOS)
1954        {        {
1955        md->end_match_ptr = eptr;        md->end_match_ptr = eptr;
1956        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1957        RRETURN(MATCH_KETRPOS);        RRETURN(MATCH_KETRPOS);
1958        }        }
1959    
1960      /* The normal repeating kets try the rest of the pattern or restart from      /* The normal repeating kets try the rest of the pattern or restart from
1961      the preceding bracket, in the appropriate order. In the second case, we can      the preceding bracket, in the appropriate order. In the second case, we can
1962      use tail recursion to avoid using another stack frame, unless we have an      use tail recursion to avoid using another stack frame, unless we have an
1963      unlimited repeat of a group that can match an empty string. */      an atomic group or an unlimited repeat of a group that can match an empty
1964        string. */
1965    
1966      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1967        {        {
1968        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
1969        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1970          if (*prev == OP_ONCE)
1971            {
1972            RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
1973            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1974            md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
1975            RRETURN(MATCH_ONCE);
1976            }
1977        if (*prev >= OP_SBRA)    /* Could match an empty string */        if (*prev >= OP_SBRA)    /* Could match an empty string */
1978          {          {
1979          md->match_function_type = MATCH_CBEGROUP;          md->match_function_type = MATCH_CBEGROUP;
1980          RMATCH(eptr, prev, offset_top, md, eptrb, RM50);          RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
1981          RRETURN(rrc);          RRETURN(rrc);
1982          }          }
# Line 1818  for (;;) Line 1985  for (;;)
1985        }        }
1986      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1987        {        {
1988        if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1989        RMATCH(eptr, prev, offset_top, md, eptrb, RM13);        RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
1990          if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
1991        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1992          if (*prev == OP_ONCE)
1993            {
1994            RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
1995            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1996            md->once_target = prev;
1997            RRETURN(MATCH_ONCE);
1998            }
1999        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
2000        goto TAIL_RECURSE;        goto TAIL_RECURSE;
2001        }        }
# Line 1829  for (;;) Line 2004  for (;;)
2004      /* Not multiline mode: start of subject assertion, unless notbol. */      /* Not multiline mode: start of subject assertion, unless notbol. */
2005    
2006      case OP_CIRC:      case OP_CIRC:
2007      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2008    
2009      /* Start of subject assertion */      /* Start of subject assertion */
2010    
2011      case OP_SOD:      case OP_SOD:
2012      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2013      ecode++;      ecode++;
2014      break;      break;
2015    
2016      /* Multiline mode: start of subject unless notbol, or after any newline. */      /* Multiline mode: start of subject unless notbol, or after any newline. */
2017    
2018      case OP_CIRCM:      case OP_CIRCM:
2019      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2020      if (eptr != md->start_subject &&      if (eptr != md->start_subject &&
2021          (eptr == md->end_subject || !WAS_NEWLINE(eptr)))          (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2022        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2023      ecode++;      ecode++;
2024      break;      break;
2025    
2026      /* Start of match assertion */      /* Start of match assertion */
2027    
2028      case OP_SOM:      case OP_SOM:
2029      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2030      ecode++;      ecode++;
2031      break;      break;
2032    
# Line 1867  for (;;) Line 2042  for (;;)
2042    
2043      case OP_DOLLM:      case OP_DOLLM:
2044      if (eptr < md->end_subject)      if (eptr < md->end_subject)
2045        { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }        { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
2046      else      else
2047        {        {
2048        if (md->noteol) MRRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
2049        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2050        }        }
2051      ecode++;      ecode++;
2052      break;      break;
2053    
2054      /* Not multiline mode: assert before a terminating newline or before end of      /* Not multiline mode: assert before a terminating newline or before end of
2055      subject unless noteol is set. */      subject unless noteol is set. */
2056    
2057      case OP_DOLL:      case OP_DOLL:
2058      if (md->noteol) MRRETURN(MATCH_NOMATCH);      if (md->noteol) RRETURN(MATCH_NOMATCH);
2059      if (!md->endonly) goto ASSERT_NL_OR_EOS;      if (!md->endonly) goto ASSERT_NL_OR_EOS;
2060    
2061      /* ... else fall through for endonly */      /* ... else fall through for endonly */
# Line 1888  for (;;) Line 2063  for (;;)
2063      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
2064    
2065      case OP_EOD:      case OP_EOD:
2066      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2067      SCHECK_PARTIAL();      SCHECK_PARTIAL();
2068      ecode++;      ecode++;
2069      break;      break;
# Line 1899  for (;;) Line 2074  for (;;)
2074      ASSERT_NL_OR_EOS:      ASSERT_NL_OR_EOS:
2075      if (eptr < md->end_subject &&      if (eptr < md->end_subject &&
2076          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2077        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2078    
2079      /* Either at end of string or \n before end. */      /* Either at end of string or \n before end. */
2080    
# Line 1918  for (;;) Line 2093  for (;;)
2093        be "non-word" characters. Remember the earliest consulted character for        be "non-word" characters. Remember the earliest consulted character for
2094        partial matching. */        partial matching. */
2095    
2096  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2097        if (utf8)        if (utf)
2098          {          {
2099          /* Get status of previous character */          /* Get status of previous character */
2100    
2101          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
2102            {            {
2103            USPTR lastptr = eptr - 1;            PCRE_PUCHAR lastptr = eptr - 1;
2104            while((*lastptr & 0xc0) == 0x80) lastptr--;            BACKCHAR(lastptr);
2105            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2106            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
2107  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 1991  for (;;) Line 2166  for (;;)
2166              }              }
2167            else            else
2168  #endif  #endif
2169            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = MAX_255(eptr[-1])
2170                && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2171            }            }
2172    
2173          /* Get status of next character */          /* Get status of next character */
# Line 2014  for (;;) Line 2190  for (;;)
2190            }            }
2191          else          else
2192  #endif  #endif
2193          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          cur_is_word = MAX_255(*eptr)
2194              && ((md->ctypes[*eptr] & ctype_word) != 0);
2195          }          }
2196    
2197        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
2198    
2199        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
2200             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2201          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2202        }        }
2203      break;      break;
2204    
2205      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
2206    
2207      case OP_ANY:      case OP_ANY:
2208      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2209      /* Fall through */      /* Fall through */
2210    
2211      case OP_ALLANY:      case OP_ALLANY:
2212      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2213        {        {                            /* not be updated before SCHECK_PARTIAL. */
2214        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2215        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2216        }        }
2217      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;      eptr++;
2218    #ifdef SUPPORT_UTF
2219        if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2220    #endif
2221      ecode++;      ecode++;
2222      break;      break;
2223    
# Line 2045  for (;;) Line 2225  for (;;)
2225      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
2226    
2227      case OP_ANYBYTE:      case OP_ANYBYTE:
2228      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2229        {        {                            /* not be updated before SCHECK_PARTIAL. */
2230        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2231        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2232        }        }
2233        eptr++;
2234      ecode++;      ecode++;
2235      break;      break;
2236    
# Line 2057  for (;;) Line 2238  for (;;)
2238      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2239        {        {
2240        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2241        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2242        }        }
2243      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2244      if (      if (
2245  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2246         c < 256 &&         c < 256 &&
2247  #endif  #endif
2248         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
2249         )         )
2250        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2251      ecode++;      ecode++;
2252      break;      break;
2253    
# Line 2074  for (;;) Line 2255  for (;;)
2255      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2256        {        {
2257        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2258        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2259        }        }
2260      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2261      if (      if (
2262  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2263         c >= 256 ||         c > 255 ||
2264  #endif  #endif
2265         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
2266         )         )
2267        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2268      ecode++;      ecode++;
2269      break;      break;
2270    
# Line 2091  for (;;) Line 2272  for (;;)
2272      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2273        {        {
2274        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2275        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2276        }        }
2277      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2278      if (      if (
2279  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2280         c < 256 &&         c < 256 &&
2281  #endif  #endif
2282         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
2283         )         )
2284        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2285      ecode++;      ecode++;
2286      break;      break;
2287    
# Line 2108  for (;;) Line 2289  for (;;)
2289      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2290        {        {
2291        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2292        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2293        }        }
2294      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2295      if (      if (
2296  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2297         c >= 256 ||         c > 255 ||
2298  #endif  #endif
2299         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
2300         )         )
2301        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2302      ecode++;      ecode++;
2303      break;      break;
2304    
# Line 2125  for (;;) Line 2306  for (;;)
2306      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2307        {        {
2308        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2309        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2310        }        }
2311      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2312      if (      if (
2313  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2314         c < 256 &&         c < 256 &&
2315  #endif  #endif
2316         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
2317         )         )
2318        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2319      ecode++;      ecode++;
2320      break;      break;
2321    
# Line 2142  for (;;) Line 2323  for (;;)
2323      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2324        {        {
2325        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2326        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2327        }        }
2328      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2329      if (      if (
2330  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2331         c >= 256 ||         c > 255 ||
2332  #endif  #endif
2333         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
2334         )         )
2335        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2336      ecode++;      ecode++;
2337      break;      break;
2338    
# Line 2159  for (;;) Line 2340  for (;;)
2340      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2341        {        {
2342        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2343        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2344        }        }
2345      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2346      switch(c)      switch(c)
2347        {        {
2348        default: MRRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2349    
2350        case 0x000d:        case 0x000d:
2351        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2352        break;        break;
# Line 2178  for (;;) Line 2359  for (;;)
2359        case 0x0085:        case 0x0085:
2360        case 0x2028:        case 0x2028:
2361        case 0x2029:        case 0x2029:
2362        if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);        if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2363        break;        break;
2364        }        }
2365      ecode++;      ecode++;
# Line 2188  for (;;) Line 2369  for (;;)
2369      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2370        {        {
2371        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2372        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2373        }        }
2374      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2375      switch(c)      switch(c)
# Line 2213  for (;;) Line 2394  for (;;)
2394        case 0x202f:    /* NARROW NO-BREAK SPACE */        case 0x202f:    /* NARROW NO-BREAK SPACE */
2395        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2396        case 0x3000:    /* IDEOGRAPHIC SPACE */        case 0x3000:    /* IDEOGRAPHIC SPACE */
2397        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2398        }        }
2399      ecode++;      ecode++;
2400      break;      break;
# Line 2222  for (;;) Line 2403  for (;;)
2403      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2404        {        {
2405        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2406        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2407        }        }
2408      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2409      switch(c)      switch(c)
2410        {        {
2411        default: MRRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2412        case 0x09:      /* HT */        case 0x09:      /* HT */
2413        case 0x20:      /* SPACE */        case 0x20:      /* SPACE */
2414        case 0xa0:      /* NBSP */        case 0xa0:      /* NBSP */
# Line 2256  for (;;) Line 2437  for (;;)
2437      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2438        {        {
2439        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2440        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2441        }        }
2442      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2443      switch(c)      switch(c)
# Line 2269  for (;;) Line 2450  for (;;)
2450        case 0x85:      /* NEL */        case 0x85:      /* NEL */
2451        case 0x2028:    /* LINE SEPARATOR */        case 0x2028:    /* LINE SEPARATOR */
2452        case 0x2029:    /* PARAGRAPH SEPARATOR */        case 0x2029:    /* PARAGRAPH SEPARATOR */
2453        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2454        }        }
2455      ecode++;      ecode++;
2456      break;      break;
# Line 2278  for (;;) Line 2459  for (;;)
2459      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2460        {        {
2461        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2462        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2463        }        }
2464      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2465      switch(c)      switch(c)
2466        {        {
2467        default: MRRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2468        case 0x0a:      /* LF */        case 0x0a:      /* LF */
2469        case 0x0b:      /* VT */        case 0x0b:      /* VT */
2470        case 0x0c:      /* FF */        case 0x0c:      /* FF */
# Line 2305  for (;;) Line 2486  for (;;)
2486      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2487        {        {
2488        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2489        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2490        }        }
2491      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2492        {        {
# Line 2314  for (;;) Line 2495  for (;;)
2495        switch(ecode[1])        switch(ecode[1])
2496          {          {
2497          case PT_ANY:          case PT_ANY:
2498          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2499          break;          break;
2500    
2501          case PT_LAMP:          case PT_LAMP:
2502          if ((prop->chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2503               prop->chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2504               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2505            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2506          break;          break;
2507    
2508          case PT_GC:          case PT_GC:
2509          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2510            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2511          break;          break;
2512    
2513          case PT_PC:          case PT_PC:
2514          if ((ecode[2] != prop->chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2515            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2516          break;          break;
2517    
2518          case PT_SC:          case PT_SC:
2519          if ((ecode[2] != prop->script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2520            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2521          break;          break;
2522    
2523          /* These are specials */          /* These are specials */
2524    
2525          case PT_ALNUM:          case PT_ALNUM:
2526          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2527               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2528            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2529          break;          break;
2530    
2531          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2532          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2533               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2534                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2535            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2536          break;          break;
2537    
2538          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2539          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2540               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2541               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2542                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2543            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2544          break;          break;
2545    
2546          case PT_WORD:          case PT_WORD:
2547          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2548               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2549               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2550            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2551          break;          break;
2552    
2553          /* This should never occur */          /* This should never occur */
# Line 2386  for (;;) Line 2567  for (;;)
2567      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2568        {        {
2569        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2570        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2571        }        }
2572      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2573        if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
2574        while (eptr < md->end_subject)
2575        {        {
2576        int category = UCD_CATEGORY(c);        int len = 1;
2577        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);        if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2578        while (eptr < md->end_subject)        if (UCD_CATEGORY(c) != ucp_M) break;
2579          {        eptr += len;
         int len = 1;  
         if (!utf8) c = *eptr; else  
           {  
           GETCHARLEN(c, eptr, len);  
           }  
         category = UCD_CATEGORY(c);  
         if (category != ucp_M) break;  
         eptr += len;  
         }  
2580        }        }
2581      ecode++;      ecode++;
2582      break;      break;
# Line 2418  for (;;) Line 2592  for (;;)
2592      loops). */      loops). */
2593    
2594      case OP_REF:      case OP_REF:
2595      case OP_REFI:      case OP_REFI:
2596      caseless = op == OP_REFI;      caseless = op == OP_REFI;
2597      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2598      ecode += 3;      ecode += 1 + IMM2_SIZE;
2599    
2600      /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2601    
# Line 2461  for (;;) Line 2635  for (;;)
2635        case OP_CRMINRANGE:        case OP_CRMINRANGE:
2636        minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2637        min = GET2(ecode, 1);        min = GET2(ecode, 1);
2638        max = GET2(ecode, 3);        max = GET2(ecode, 1 + IMM2_SIZE);
2639        if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2640        ecode += 5;        ecode += 1 + 2 * IMM2_SIZE;
2641        break;        break;
2642    
2643        default:               /* No repeat follows */        default:               /* No repeat follows */
2644        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2645          {          {
2646          CHECK_PARTIAL();          CHECK_PARTIAL();
2647          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2648          }          }
2649        eptr += length;        eptr += length;
2650        continue;              /* With the main loop */        continue;              /* With the main loop */
2651        }        }
2652    
2653      /* Handle repeated back references. If the length of the reference is      /* Handle repeated back references. If the length of the reference is
2654      zero, just continue with the main loop. */      zero, just continue with the main loop. If the length is negative, it
2655        means the reference is unset in non-Java-compatible mode. If the minimum is
2656        zero, we can continue at the same level without recursion. For any other
2657        minimum, carrying on will result in NOMATCH. */
2658    
2659      if (length == 0) continue;      if (length == 0) continue;
2660        if (length < 0 && min == 0) continue;
2661    
2662      /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2663      the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
# Line 2487  for (;;) Line 2665  for (;;)
2665    
2666      for (i = 1; i <= min; i++)      for (i = 1; i <= min; i++)
2667        {        {
2668        int slength;        int slength;
2669        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2670          {          {
2671          CHECK_PARTIAL();          CHECK_PARTIAL();
2672          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2673          }          }
2674        eptr += slength;        eptr += slength;
2675        }        }
# Line 2507  for (;;) Line 2685  for (;;)
2685        {        {
2686        for (fi = min;; fi++)        for (fi = min;; fi++)
2687          {          {
2688          int slength;          int slength;
2689          RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2690          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2691          if (fi >= max) MRRETURN(MATCH_NOMATCH);          if (fi >= max) RRETURN(MATCH_NOMATCH);
2692          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2693            {            {
2694            CHECK_PARTIAL();            CHECK_PARTIAL();
2695            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2696            }            }
2697          eptr += slength;          eptr += slength;
2698          }          }
# Line 2528  for (;;) Line 2706  for (;;)
2706        pp = eptr;        pp = eptr;
2707        for (i = min; i < max; i++)        for (i = min; i < max; i++)
2708          {          {
2709          int slength;          int slength;
2710          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2711            {            {
2712            CHECK_PARTIAL();            CHECK_PARTIAL();
# Line 2542  for (;;) Line 2720  for (;;)
2720          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2721          eptr -= length;          eptr -= length;
2722          }          }
2723        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2724        }        }
2725      /* Control never gets here */      /* Control never gets here */
2726    
# Line 2560  for (;;) Line 2738  for (;;)
2738      case OP_NCLASS:      case OP_NCLASS:
2739      case OP_CLASS:      case OP_CLASS:
2740        {        {
2741          /* The data variable is saved across frames, so the byte map needs to
2742          be stored there. */
2743    #define BYTE_MAP ((pcre_uint8 *)data)
2744        data = ecode + 1;                /* Save for matching */        data = ecode + 1;                /* Save for matching */
2745        ecode += 33;                     /* Advance past the item */        ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2746    
2747        switch (*ecode)        switch (*ecode)
2748          {          {
# Line 2582  for (;;) Line 2763  for (;;)
2763          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2764          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2765          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2766          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2767          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2768          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2769          break;          break;
2770    
2771          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2594  for (;;) Line 2775  for (;;)
2775    
2776        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
2777    
2778  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2779        /* UTF-8 mode */        if (utf)
       if (utf8)  
2780          {          {
2781          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2782            {            {
2783            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2784              {              {
2785              SCHECK_PARTIAL();              SCHECK_PARTIAL();
2786              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2787              }              }
2788            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2789            if (c > 255)            if (c > 255)
2790              {              {
2791              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2792              }              }
2793            else            else
2794              {              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  
             }  
2795            }            }
2796          }          }
2797        else        else
2798  #endif  #endif
2799        /* Not UTF-8 mode */        /* Not UTF mode */
2800          {          {
2801          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2802            {            {
2803            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2804              {              {
2805              SCHECK_PARTIAL();              SCHECK_PARTIAL();
2806              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2807              }              }
2808            c = *eptr++;            c = *eptr++;
2809            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2810              if (c > 255)
2811                {
2812                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2813                }
2814              else
2815    #endif
2816                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2817            }            }
2818          }          }
2819    
# Line 2642  for (;;) Line 2827  for (;;)
2827    
2828        if (minimize)        if (minimize)
2829          {          {
2830  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2831          /* UTF-8 mode */          if (utf)
         if (utf8)  
2832            {            {
2833            for (fi = min;; fi++)            for (fi = min;; fi++)
2834              {              {
2835              RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2836              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2837              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2838              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2839                {                {
2840                SCHECK_PARTIAL();                SCHECK_PARTIAL();
2841                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2842                }                }
2843              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2844              if (c > 255)              if (c > 255)
2845                {                {
2846                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2847                }                }
2848              else              else
2849                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
               if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  
               }  
2850              }              }
2851            }            }
2852          else          else
2853  #endif  #endif
2854          /* Not UTF-8 mode */          /* Not UTF mode */
2855            {            {
2856            for (fi = min;; fi++)            for (fi = min;; fi++)
2857              {              {
2858              RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2859              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2860              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2861              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2862                {                {
2863                SCHECK_PARTIAL();                SCHECK_PARTIAL();
2864                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2865                }                }
2866              c = *eptr++;              c = *eptr++;
2867              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2868                if (c > 255)
2869                  {
2870                  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2871                  }
2872                else
2873    #endif
2874                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2875              }              }
2876            }            }
2877          /* Control never gets here */          /* Control never gets here */
# Line 2694  for (;;) Line 2883  for (;;)
2883          {          {
2884          pp = eptr;          pp = eptr;
2885    
2886  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2887          /* UTF-8 mode */          if (utf)
         if (utf8)  
2888            {            {
2889            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2890              {              {
# Line 2712  for (;;) Line 2900  for (;;)
2900                if (op == OP_CLASS) break;                if (op == OP_CLASS) break;
2901                }                }
2902              else              else
2903                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
               if ((data[c/8] & (1 << (c&7))) == 0) break;  
               }  
2904              eptr += len;              eptr += len;
2905              }              }
2906            for (;;)            for (;;)
# Line 2727  for (;;) Line 2913  for (;;)
2913            }            }
2914          else          else
2915  #endif  #endif
2916            /* Not UTF-8 mode */            /* Not UTF mode */
2917            {            {
2918            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2919              {              {
# Line 2737  for (;;) Line 2923  for (;;)
2923                break;                break;
2924                }                }
2925              c = *eptr;              c = *eptr;
2926              if ((data[c/8] & (1 << (c&7))) == 0) break;  #ifndef COMPILE_PCRE8
2927                if (c > 255)
2928                  {
2929                  if (op == OP_CLASS) break;
2930                  }
2931                else
2932    #endif
2933                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2934              eptr++;              eptr++;
2935              }              }
2936            while (eptr >= pp)            while (eptr >= pp)
# Line 2748  for (;;) Line 2941  for (;;)
2941              }              }
2942            }            }
2943    
2944          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2945          }          }
2946    #undef BYTE_MAP
2947        }        }
2948      /* Control never gets here */      /* Control never gets here */
2949    
# Line 2758  for (;;) Line 2952  for (;;)
2952      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2953      mode, because Unicode properties are supported in non-UTF-8 mode. */      mode, because Unicode properties are supported in non-UTF-8 mode. */
2954    
2955  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2956      case OP_XCLASS:      case OP_XCLASS:
2957        {        {
2958        data = ecode + 1 + LINK_SIZE;                /* Save for matching */        data = ecode + 1 + LINK_SIZE;                /* Save for matching */
# Line 2783  for (;;) Line 2977  for (;;)
2977          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2978          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2979          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2980          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2981          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2982          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2983          break;          break;
2984    
2985          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2800  for (;;) Line 2994  for (;;)
2994          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
2995            {            {
2996            SCHECK_PARTIAL();            SCHECK_PARTIAL();
2997            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2998            }            }
2999          GETCHARINCTEST(c, eptr);          GETCHARINCTEST(c, eptr);
3000          if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);          if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3001          }          }
3002    
3003        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 2820  for (;;) Line 3014  for (;;)
3014            {            {
3015            RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3016            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3017            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3018            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3019              {              {
3020              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3021              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3022              }              }
3023            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3024            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);            if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3025            }            }
3026          /* Control never gets here */          /* Control never gets here */
3027          }          }
# Line 2845  for (;;) Line 3039  for (;;)
3039              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3040              break;              break;
3041              }              }
3042    #ifdef SUPPORT_UTF
3043            GETCHARLENTEST(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
3044            if (!_pcre_xclass(c, data)) break;  #else
3045              c = *eptr;
3046    #endif
3047              if (!PRIV(xclass)(c, data, utf)) break;
3048            eptr += len;            eptr += len;
3049            }            }
3050          for(;;)          for(;;)
# Line 2854  for (;;) Line 3052  for (;;)
3052            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3053            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3054            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3055            if (utf8) BACKCHAR(eptr);  #ifdef SUPPORT_UTF
3056              if (utf) BACKCHAR(eptr);
3057    #endif
3058            }            }
3059          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3060          }          }
3061    
3062        /* Control never gets here */        /* Control never gets here */
# Line 2866  for (;;) Line 3066  for (;;)
3066      /* Match a single character, casefully */      /* Match a single character, casefully */
3067    
3068      case OP_CHAR:      case OP_CHAR:
3069  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3070      if (utf8)      if (utf)
3071        {        {
3072        length = 1;        length = 1;
3073        ecode++;        ecode++;
# Line 2875  for (;;) Line 3075  for (;;)
3075        if (length > md->end_subject - eptr)        if (length > md->end_subject - eptr)
3076          {          {
3077          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3078          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3079          }          }
3080        while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
3081        }        }
3082      else      else
3083  #endif  #endif
3084        /* Not UTF mode */
     /* Non-UTF-8 mode */  
3085        {        {
3086        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
3087          {          {
3088          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3089          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3090          }          }
3091        if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3092        ecode += 2;        ecode += 2;
3093        }        }
3094      break;      break;
3095    
3096      /* Match a single character, caselessly */      /* Match a single character, caselessly. If we are at the end of the
3097        subject, give up immediately. */
3098    
3099      case OP_CHARI:      case OP_CHARI:
3100  #ifdef SUPPORT_UTF8      if (eptr >= md->end_subject)
     if (utf8)  
3101        {        {
3102        length = 1;        SCHECK_PARTIAL();
3103        ecode++;        RRETURN(MATCH_NOMATCH);
3104        GETCHARLEN(fc, ecode, length);        }
3105    
3106        if (length > md->end_subject - eptr)  #ifdef SUPPORT_UTF
3107          {      if (utf)
3108          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */        {
3109          MRRETURN(MATCH_NOMATCH);        length = 1;
3110          }        ecode++;
3111          GETCHARLEN(fc, ecode, length);
3112    
3113        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
3114        can use the fast lookup table. */        we know that its other case must also be one byte long, so we can use the
3115          fast lookup table. We know that there is at least one byte left in the
3116          subject. */
3117    
3118        if (fc < 128)        if (fc < 128)
3119          {          {
3120          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);          if (md->lcc[fc]
3121                != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3122            ecode++;
3123            eptr++;
3124          }          }
3125    
3126        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character. Note that we cannot
3127          use the value of "length" to check for sufficient bytes left, because the
3128          other case of the character may have more or fewer bytes.  */
3129    
3130        else        else
3131          {          {
# Line 2934  for (;;) Line 3141  for (;;)
3141  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3142            if (dc != UCD_OTHERCASE(fc))            if (dc != UCD_OTHERCASE(fc))
3143  #endif  #endif
3144              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3145            }            }
3146          }          }
3147        }        }
3148      else      else
3149  #endif   /* SUPPORT_UTF8 */  #endif   /* SUPPORT_UTF */
3150    
3151      /* Non-UTF-8 mode */      /* Not UTF mode */
3152        {        {
3153        if (md->end_subject - eptr < 1)        if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3154          {            != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3155          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */        eptr++;
         MRRETURN(MATCH_NOMATCH);  
         }  
       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);  
3156        ecode += 2;        ecode += 2;
3157        }        }
3158      break;      break;
# Line 2958  for (;;) Line 3162  for (;;)
3162      case OP_EXACT:      case OP_EXACT:
3163      case OP_EXACTI:      case OP_EXACTI:
3164      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3165      ecode += 3;      ecode += 1 + IMM2_SIZE;
3166      goto REPEATCHAR;      goto REPEATCHAR;
3167    
3168      case OP_POSUPTO:      case OP_POSUPTO:
# Line 2973  for (;;) Line 3177  for (;;)
3177      min = 0;      min = 0;
3178      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3179      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3180      ecode += 3;      ecode += 1 + IMM2_SIZE;
3181      goto REPEATCHAR;      goto REPEATCHAR;
3182    
3183      case OP_POSSTAR:      case OP_POSSTAR:
# Line 3021  for (;;) Line 3225  for (;;)
3225      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. */
3226    
3227      REPEATCHAR:      REPEATCHAR:
3228  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3229      if (utf8)      if (utf)
3230        {        {
3231        length = 1;        length = 1;
3232        charptr = ecode;        charptr = ecode;
# Line 3038  for (;;) Line 3242  for (;;)
3242          unsigned int othercase;          unsigned int othercase;
3243          if (op >= OP_STARI &&     /* Caseless */          if (op >= OP_STARI &&     /* Caseless */
3244              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
3245            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = PRIV(ord2utf)(othercase, occhars);
3246          else oclength = 0;          else oclength = 0;
3247  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3248    
3249          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3250            {            {
3251            if (eptr <= md->end_subject - length &&            if (eptr <= md->end_subject - length &&
3252              memcmp(eptr, charptr, length) == 0) eptr += length;              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3253  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3254            else if (oclength > 0 &&            else if (oclength > 0 &&
3255                     eptr <= md->end_subject - oclength &&                     eptr <= md->end_subject - oclength &&
3256                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3257  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3258            else            else
3259              {              {
3260              CHECK_PARTIAL();              CHECK_PARTIAL();
3261              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3262              }              }
3263            }            }
3264    
# Line 3066  for (;;) Line 3270  for (;;)
3270              {              {
3271              RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3272              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3273              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3274              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3275                memcmp(eptr, charptr, length) == 0) eptr += length;                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3276  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3277              else if (oclength > 0 &&              else if (oclength > 0 &&
3278                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3279                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3280  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3281              else              else
3282                {                {
3283                CHECK_PARTIAL();                CHECK_PARTIAL();
3284                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3285                }                }
3286              }              }
3287            /* Control never gets here */            /* Control never gets here */
# Line 3089  for (;;) Line 3293  for (;;)
3293            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3294              {              {
3295              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3296                  memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3297  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3298              else if (oclength > 0 &&              else if (oclength > 0 &&
3299                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3300                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3301  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3302              else              else
3303                {                {
# Line 3108  for (;;) Line 3312  for (;;)
3312              {              {
3313              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3314              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3315              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }              if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
3316  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3317              eptr--;              eptr--;
3318              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 3125  for (;;) Line 3329  for (;;)
3329        value of fc will always be < 128. */        value of fc will always be < 128. */
3330        }        }
3331      else      else
3332  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3333          /* When not in UTF-8 mode, load a single-byte character. */
3334      /* When not in UTF-8 mode, load a single-byte character. */        fc = *ecode++;
3335    
3336      fc = *ecode++;      /* The value of fc at this point is always one character, though we may
3337        or may not be in UTF mode. The code is duplicated for the caseless and
     /* The value of fc at this point is always less than 256, though we may or  
     may not be in UTF-8 mode. The code is duplicated for the caseless and  
3338      caseful cases, for speed, since matching characters is likely to be quite      caseful cases, for speed, since matching characters is likely to be quite
3339      common. First, ensure the minimum number of matches are present. If min =      common. First, ensure the minimum number of matches are present. If min =
3340      max, continue at the same level without recursing. Otherwise, if      max, continue at the same level without recursing. Otherwise, if
# Line 3145  for (;;) Line 3347  for (;;)
3347    
3348      if (op >= OP_STARI)  /* Caseless */      if (op >= OP_STARI)  /* Caseless */
3349        {        {
3350        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3351          /* fc must be < 128 if UTF is enabled. */
3352          foc = md->fcc[fc];
3353    #else
3354    #ifdef SUPPORT_UTF
3355    #ifdef SUPPORT_UCP
3356          if (utf && fc > 127)
3357            foc = UCD_OTHERCASE(fc);
3358    #else
3359          if (utf && fc > 127)
3360            foc = fc;
3361    #endif /* SUPPORT_UCP */
3362          else
3363    #endif /* SUPPORT_UTF */
3364            foc = TABLE_GET(fc, md->fcc, fc);
3365    #endif /* COMPILE_PCRE8 */
3366    
3367        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3368          {          {
3369          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3370            {            {
3371            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3372            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3373            }            }
3374          if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);          if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3375            eptr++;
3376          }          }
3377        if (min == max) continue;        if (min == max) continue;
3378        if (minimize)        if (minimize)
# Line 3162  for (;;) Line 3381  for (;;)
3381            {            {
3382            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3383            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3384            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3385            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3386              {              {
3387              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3388              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3389              }              }
3390            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);            if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3391              eptr++;
3392            }            }
3393          /* Control never gets here */          /* Control never gets here */
3394          }          }
# Line 3182  for (;;) Line 3402  for (;;)
3402              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3403              break;              break;
3404              }              }
3405            if (fc != md->lcc[*eptr]) break;            if (fc != *eptr && foc != *eptr) break;
3406            eptr++;            eptr++;
3407            }            }
3408    
# Line 3194  for (;;) Line 3414  for (;;)
3414            eptr--;            eptr--;
3415            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3416            }            }
3417          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3418          }          }
3419        /* Control never gets here */        /* Control never gets here */
3420        }        }
# Line 3208  for (;;) Line 3428  for (;;)
3428          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3429            {            {
3430            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3431            MRRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3432            }            }
3433          if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);          if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3434          }          }
3435    
3436        if (min == max) continue;        if (min == max) continue;
# Line 3221  for (;;) Line 3441  for (;;)
3441            {            {
3442            RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3443            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3444            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3445            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3446              {              {
3447              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3448              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3449              }              }
3450            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);            if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3451            }            }
3452          /* Control never gets here */          /* Control never gets here */
3453          }          }
# Line 3252  for (;;) Line 3472  for (;;)
3472            eptr--;            eptr--;
3473            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3474            }            }
3475          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3476          }          }
3477        }        }
3478      /* Control never gets here */      /* Control never gets here */
# Line 3261  for (;;) Line 3481  for (;;)
3481      checking can be multibyte. */      checking can be multibyte. */
3482    
3483      case OP_NOT:      case OP_NOT:
3484      case OP_NOTI:      case OP_NOTI:
3485      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
3486        {        {
3487        SCHECK_PARTIAL();        SCHECK_PARTIAL();
3488        MRRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
3489        }        }
3490      ecode++;      ecode++;
3491      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3492      if (op == OP_NOTI)         /* The caseless case */      if (op == OP_NOTI)         /* The caseless case */
3493        {        {
3494  #ifdef SUPPORT_UTF8        register int ch, och;
3495        if (c < 256)        ch = *ecode++;
3496  #endif  #ifdef COMPILE_PCRE8
3497        c = md->lcc[c];        /* ch must be < 128 if UTF is enabled. */
3498        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);        och = md->fcc[ch];
3499    #else
3500    #ifdef SUPPORT_UTF
3501    #ifdef SUPPORT_UCP
3502          if (utf && ch > 127)
3503            och = UCD_OTHERCASE(ch);
3504    #else
3505          if (utf && ch > 127)
3506            och = ch;
3507    #endif /* SUPPORT_UCP */
3508          else
3509    #endif /* SUPPORT_UTF */
3510            och = TABLE_GET(ch, md->fcc, ch);
3511    #endif /* COMPILE_PCRE8 */
3512          if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3513        }        }
3514      else    /* Caseful */      else    /* Caseful */
3515        {        {
3516        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
3517        }        }
3518      break;      break;
3519    
# Line 3293  for (;;) Line 3527  for (;;)
3527      case OP_NOTEXACT:      case OP_NOTEXACT:
3528      case OP_NOTEXACTI:      case OP_NOTEXACTI:
3529      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3530      ecode += 3;      ecode += 1 + IMM2_SIZE;
3531      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3532    
3533      case OP_NOTUPTO:      case OP_NOTUPTO:
# Line 3303  for (;;) Line 3537  for (;;)
3537      min = 0;      min = 0;
3538      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3539      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3540      ecode += 3;      ecode += 1 + IMM2_SIZE;
3541      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3542    
3543      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
# Line 3335  for (;;) Line 3569  for (;;)
3569      possessive = TRUE;      possessive = TRUE;
3570      min = 0;      min = 0;
3571      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3572      ecode += 3;      ecode += 1 + IMM2_SIZE;
3573      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3574    
3575      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 3374  for (;;) Line 3608  for (;;)
3608    
3609      if (op >= OP_NOTSTARI)     /* Caseless */      if (op >= OP_NOTSTARI)     /* Caseless */
3610        {        {
3611        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3612          /* fc must be < 128 if UTF is enabled. */
3613          foc = md->fcc[fc];
3614    #else
3615    #ifdef SUPPORT_UTF
3616    #ifdef SUPPORT_UCP
3617          if (utf && fc > 127)
3618            foc = UCD_OTHERCASE(fc);
3619    #else
3620          if (utf && fc > 127)
3621            foc = fc;
3622    #endif /* SUPPORT_UCP */
3623          else
3624    #endif /* SUPPORT_UTF */
3625            foc = TABLE_GET(fc, md->fcc, fc);
3626    #endif /* COMPILE_PCRE8 */
3627    
3628  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3629        /* UTF-8 mode */        if (utf)
       if (utf8)  
3630          {          {
3631          register unsigned int d;          register unsigned int d;
3632          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3386  for (;;) Line 3634  for (;;)
3634            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3635              {              {
3636              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3637              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3638              }              }
3639            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3640            if (d < 256) d = md->lcc[d];            if (fc == d || foc == d) RRETURN(MATCH_NOMATCH);
           if (fc == d) MRRETURN(MATCH_NOMATCH);  
3641            }            }
3642          }          }
3643        else        else
3644  #endif  #endif
3645          /* Not UTF mode */
       /* Not UTF-8 mode */  
3646          {          {
3647          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3648            {            {
3649            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3650              {              {
3651              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3652              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3653              }              }
3654            if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);            if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3655              eptr++;
3656            }            }
3657          }          }
3658    
# Line 3413  for (;;) Line 3660  for (;;)
3660    
3661        if (minimize)        if (minimize)
3662          {          {
3663  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3664          /* UTF-8 mode */          if (utf)
         if (utf8)  
3665            {            {
3666            register unsigned int d;            register unsigned int d;
3667            for (fi = min;; fi++)            for (fi = min;; fi++)
3668              {              {
3669              RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3670              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3671              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3672              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3673                {                {
3674                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3675                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3676                }                }
3677              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3678              if (d < 256) d = md->lcc[d];              if (fc == d || foc == d) RRETURN(MATCH_NOMATCH);
             if (fc == d) MRRETURN(MATCH_NOMATCH);  
3679              }              }
3680            }            }
3681          else          else
3682  #endif  #endif
3683          /* Not UTF-8 mode */          /* Not UTF mode */
3684            {            {
3685            for (fi = min;; fi++)            for (fi = min;; fi++)
3686              {              {
3687              RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3688              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3689              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3690              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3691                {                {
3692                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3693                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3694                }                }
3695              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);              if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3696                eptr++;
3697              }              }
3698            }            }
3699          /* Control never gets here */          /* Control never gets here */
# Line 3459  for (;;) Line 3705  for (;;)
3705          {          {
3706          pp = eptr;          pp = eptr;
3707    
3708  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3709          /* UTF-8 mode */          if (utf)
         if (utf8)  
3710            {            {
3711            register unsigned int d;            register unsigned int d;
3712            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3473  for (;;) Line 3718  for (;;)
3718                break;                break;
3719                }                }
3720              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3721              if (d < 256) d = md->lcc[d];              if (fc == d || foc == d) break;
             if (fc == d) break;  
3722              eptr += len;              eptr += len;
3723              }              }
3724          if (possessive) continue;            if (possessive) continue;
3725          for(;;)            for(;;)
3726              {              {
3727              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3728              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 3488  for (;;) Line 3732  for (;;)
3732            }            }
3733          else          else
3734  #endif  #endif
3735          /* Not UTF-8 mode */          /* Not UTF mode */
3736            {            {
3737            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3738              {              {
# Line 3497  for (;;) Line 3741  for (;;)
3741                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3742                break;                break;
3743                }                }
3744              if (fc == md->lcc[*eptr]) break;              if (fc == *eptr || foc == *eptr) break;
3745              eptr++;              eptr++;
3746              }              }
3747            if (possessive) continue;            if (possessive) continue;
# Line 3509  for (;;) Line 3753  for (;;)
3753              }              }
3754            }            }
3755    
3756          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3757          }          }
3758        /* Control never gets here */        /* Control never gets here */
3759        }        }
# Line 3518  for (;;) Line 3762  for (;;)
3762    
3763      else      else
3764        {        {
3765  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3766        /* UTF-8 mode */        if (utf)
       if (utf8)  
3767          {          {
3768          register unsigned int d;          register unsigned int d;
3769          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3528  for (;;) Line 3771  for (;;)
3771            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3772              {              {
3773              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3774              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3775              }              }
3776            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3777            if (fc == d) MRRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
3778            }            }
3779          }          }
3780        else        else
3781  #endif  #endif
3782        /* Not UTF-8 mode */        /* Not UTF mode */
3783          {          {
3784          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3785            {            {
3786            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3787              {              {
3788              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3789              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3790              }              }
3791            if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3792            }            }
3793          }          }
3794    
# Line 3553  for (;;) Line 3796  for (;;)
3796    
3797        if (minimize)        if (minimize)
3798          {          {
3799  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3800          /* UTF-8 mode */          if (utf)
         if (utf8)  
3801            {            {
3802            register unsigned int d;            register unsigned int d;
3803            for (fi = min;; fi++)            for (fi = min;; fi++)
3804              {              {
3805              RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3806              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3807              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3808              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3809                {                {
3810                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3811                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3812                }                }
3813              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3814              if (fc == d) MRRETURN(MATCH_NOMATCH);              if (fc == d) RRETURN(MATCH_NOMATCH);
3815              }              }
3816            }            }
3817          else          else
3818  #endif  #endif
3819          /* Not UTF-8 mode */          /* Not UTF mode */
3820            {            {
3821            for (fi = min;; fi++)            for (fi = min;; fi++)
3822              {              {
3823              RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3824              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3825              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3826              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3827                {                {
3828                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3829                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3830                }                }
3831              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);              if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3832              }              }
3833            }            }
3834          /* Control never gets here */          /* Control never gets here */
# Line 3598  for (;;) Line 3840  for (;;)
3840          {          {
3841          pp = eptr;          pp = eptr;
3842    
3843  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3844          /* UTF-8 mode */          if (utf)
         if (utf8)  
3845            {            {
3846            register unsigned int d;            register unsigned int d;
3847            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3626  for (;;) Line 3867  for (;;)
3867            }            }
3868          else          else
3869  #endif  #endif
3870          /* Not UTF-8 mode */          /* Not UTF mode */
3871            {            {
3872            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3873              {              {
# Line 3647  for (;;) Line 3888  for (;;)
3888              }              }
3889            }            }
3890    
3891          MRRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3892          }          }
3893        }        }
3894      /* Control never gets here */      /* Control never gets here */
# Line 3659  for (;;) Line 3900  for (;;)
3900      case OP_TYPEEXACT:      case OP_TYPEEXACT:
3901      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3902      minimize = TRUE;      minimize = TRUE;
3903      ecode += 3;      ecode += 1 + IMM2_SIZE;
3904      goto REPEATTYPE;      goto REPEATTYPE;
3905    
3906      case OP_TYPEUPTO:      case OP_TYPEUPTO:
# Line 3667  for (;;) Line 3908  for (;;)
3908      min = 0;      min = 0;
3909      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3910      minimize = *ecode == OP_TYPEMINUPTO;      minimize = *ecode == OP_TYPEMINUPTO;
3911      ecode += 3;      ecode += 1 + IMM2_SIZE;
3912      goto REPEATTYPE;      goto REPEATTYPE;
3913    
3914      case OP_TYPEPOSSTAR:      case OP_TYPEPOSSTAR:
# Line 3695  for (;;) Line 3936  for (;;)
3936      possessive = TRUE;      possessive = TRUE;
3937      min = 0;      min = 0;
3938      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3939      ecode += 3;      ecode += 1 + IMM2_SIZE;
3940      goto REPEATTYPE;      goto REPEATTYPE;
3941    
3942      case OP_TYPESTAR:      case OP_TYPESTAR:
# Line 3741  for (;;) Line 3982  for (;;)
3982          switch(prop_type)          switch(prop_type)
3983            {            {
3984            case PT_ANY:            case PT_ANY:
3985            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3986            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3987              {              {
3988              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3989                {                {
3990                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3991                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3992                }                }
3993              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3994              }              }
# Line 3756  for (;;) Line 3997  for (;;)
3997            case PT_LAMP:            case PT_LAMP:
3998            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3999              {              {
4000                int chartype;
4001              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4002                {                {
4003                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4004                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4005                }                }
4006              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4007              prop_chartype = UCD_CHARTYPE(c);              chartype = UCD_CHARTYPE(c);
4008              if ((prop_chartype == ucp_Lu ||              if ((chartype == ucp_Lu ||
4009                   prop_chartype == ucp_Ll ||                   chartype == ucp_Ll ||
4010                   prop_chartype == ucp_Lt) == prop_fail_result)                   chartype == ucp_Lt) == prop_fail_result)
4011                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4012              }              }
4013            break;            break;
4014    
# Line 3776  for (;;) Line 4018  for (;;)
4018              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4019                {                {
4020                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4021                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4022                }                }
4023              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4024              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4025              if ((prop_category == prop_value) == prop_fail_result)                RRETURN(MATCH_NOMATCH);
               MRRETURN(MATCH_NOMATCH);  
4026              }              }
4027            break;            break;
4028    
# Line 3791  for (;;) Line 4032  for (;;)
4032              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4033                {                {
4034                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4035                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4036                }                }
4037              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4038              prop_chartype = UCD_CHARTYPE(c);              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4039              if ((prop_chartype == prop_value) == prop_fail_result)                RRETURN(MATCH_NOMATCH);
               MRRETURN(MATCH_NOMATCH);  
4040              }              }
4041            break;            break;
4042    
# Line 3806  for (;;) Line 4046  for (;;)
4046              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4047                {                {
4048                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4049                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4050                }                }
4051              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4052              prop_script = UCD_SCRIPT(c);              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4053              if ((prop_script == prop_value) == prop_fail_result)                RRETURN(MATCH_NOMATCH);
               MRRETURN(MATCH_NOMATCH);  
4054              }              }
4055            break;            break;
4056    
4057            case PT_ALNUM:            case PT_ALNUM:
4058            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4059              {              {
4060                int category;
4061              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4062                {                {
4063                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4064                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4065                }                }
4066              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4067              prop_category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
4068              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4069                     == prop_fail_result)                RRETURN(MATCH_NOMATCH);
               MRRETURN(MATCH_NOMATCH);  
4070              }              }
4071            break;            break;
4072    
# Line 3837  for (;;) Line 4076  for (;;)
4076              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4077                {                {
4078                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4079                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4080                }                }
4081              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4082              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
4083                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
4084                     == prop_fail_result)                     == prop_fail_result)
4085                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4086              }              }
4087            break;            break;
4088    
# Line 3854  for (;;) Line 4092  for (;;)
4092              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4093                {                {
4094                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4095                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4096                }                }
4097              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4098              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
4099                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4100                     == prop_fail_result)                     == prop_fail_result)
4101                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4102              }              }
4103            break;            break;
4104    
4105            case PT_WORD:            case PT_WORD:
4106            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4107              {              {
4108                int category;
4109              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4110                {                {
4111                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4112                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4113                }                }
4114              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4115              prop_category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
4116              if ((prop_category == ucp_L || prop_category == ucp_N ||              if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
                  c == CHAR_UNDERSCORE)  
4117                     == prop_fail_result)                     == prop_fail_result)
4118                MRRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4119              }              }
4120            break;            break;
4121    
# Line 3899  for (;;) Line 4136  for (;;)
4136            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4137              {              {
4138              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4139              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4140              }              }
4141            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4142            prop_category = UCD_CATEGORY(c);            if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);  
4143            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4144              {              {
4145              int len = 1;              int len = 1;
4146              if (!utf8) c = *eptr;              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4147                else { GETCHARLEN(c, eptr, len); }              if (UCD_CATEGORY(c) != ucp_M) break;
             prop_category = UCD_CATEGORY(c);  
             if (prop_category != ucp_M) break;  
4148              eptr += len;              eptr += len;
4149              }              }
4150            }            }
# Line 3921  for (;;) Line 4155  for (;;)
4155    
4156  /* Handle all other cases when the coding is UTF-8 */  /* Handle all other cases when the coding is UTF-8 */
4157    
4158  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4159        if (utf8) switch(ctype)        if (utf) switch(ctype)
4160          {          {
4161          case OP_ANY:          case OP_ANY:
4162          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3930  for (;;) Line 4164  for (;;)
4164            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4165              {              {
4166              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4167              MRRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4168              }              }
4169            if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4170            eptr++;       &nbs