/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 598 by ph10, Sat May 7 15:37:31 2011 UTC revision 733 by ph10, Tue Oct 11 10:29:36 2011 UTC
# Line 57  possible. There are also some static sup Line 57  possible. There are also some static sup
57  #undef min  #undef min
58  #undef max  #undef max
59    
60  /* Flag bits for the match() function */  /* Values for setting in md->match_function_type to indicate two special types
61    of call to match(). We do it this way to save on using another stack variable,
62    as stack usage is to be discouraged. */
63    
64  #define match_condassert     0x01  /* Called to check a condition assertion */  #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
65  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
66    
67  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
68  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 73  negative to avoid the external error cod Line 75  negative to avoid the external error cod
75    
76  #define MATCH_ACCEPT       (-999)  #define MATCH_ACCEPT       (-999)
77  #define MATCH_COMMIT       (-998)  #define MATCH_COMMIT       (-998)
78  #define MATCH_PRUNE        (-997)  #define MATCH_KETRPOS      (-997)
79  #define MATCH_SKIP         (-996)  #define MATCH_ONCE         (-996)
80  #define MATCH_SKIP_ARG     (-995)  #define MATCH_PRUNE        (-995)
81  #define MATCH_THEN         (-994)  #define MATCH_SKIP         (-994)
82    #define MATCH_SKIP_ARG     (-993)
83    #define MATCH_THEN         (-992)
84    
85  /* This is a convenience macro for code that occurs many times. */  /* This is a convenience macro for code that occurs many times. */
86    
# Line 134  while (length-- > 0) Line 138  while (length-- > 0)
138    
139  /* Normally, if a back reference hasn't been set, the length that is passed is  /* Normally, if a back reference hasn't been set, the length that is passed is
140  negative, so the match always fails. However, in JavaScript compatibility mode,  negative, so the match always fails. However, in JavaScript compatibility mode,
141  the length passed is zero. Note that in caseless UTF-8 mode, the number of  the length passed is zero. Note that in caseless UTF-8 mode, the number of
142  subject bytes matched may be different to the number of reference bytes.  subject bytes matched may be different to the number of reference bytes.
143    
144  Arguments:  Arguments:
# Line 142  Arguments: Line 146  Arguments:
146    eptr        pointer into the subject    eptr        pointer into the subject
147    length      length of reference to be matched (number of bytes)    length      length of reference to be matched (number of bytes)
148    md          points to match data block    md          points to match data block
149    ims         the ims flags    caseless    TRUE if caseless
150    
151  Returns:      < 0 if not matched, otherwise the number of subject bytes matched  Returns:      < 0 if not matched, otherwise the number of subject bytes matched
152  */  */
153    
154  static int  static int
155  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
156    unsigned long int ims)    BOOL caseless)
157  {  {
158  USPTR eptr_start = eptr;  USPTR eptr_start = eptr;
159  register USPTR p = md->start_subject + md->offset_vector[offset];  register USPTR p = md->start_subject + md->offset_vector[offset];
# Line 175  if (length < 0) return -1; Line 179  if (length < 0) return -1;
179  properly if Unicode properties are supported. Otherwise, we can check only  properly if Unicode properties are supported. Otherwise, we can check only
180  ASCII characters. */  ASCII characters. */
181    
182  if ((ims & PCRE_CASELESS) != 0)  if (caseless)
183    {    {
184  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
185  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
186    if (md->utf8)    if (md->utf8)
187      {      {
188      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
189      bytes matched may differ, because there are some characters whose upper and      bytes matched may differ, because there are some characters whose upper and
190      lower case versions code as different numbers of bytes. For example, U+023A      lower case versions code as different numbers of bytes. For example, U+023A
191      (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);      (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
192      a sequence of 3 of the former uses 6 bytes, as does a sequence of two of      a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
193      the latter. It is important, therefore, to check the length along the      the latter. It is important, therefore, to check the length along the
194      reference, not along the subject (earlier code did this wrong). */      reference, not along the subject (earlier code did this wrong). */
195    
196      USPTR endptr = p + length;      USPTR endptr = p + length;
197      while (p < endptr)      while (p < endptr)
198        {        {
# Line 206  if ((ims & PCRE_CASELESS) != 0) Line 210  if ((ims & PCRE_CASELESS) != 0)
210    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
211    is no UCP support. */    is no UCP support. */
212      {      {
213      if (eptr + length > md->end_subject) return -1;      if (eptr + length > md->end_subject) return -1;
214      while (length-- > 0)      while (length-- > 0)
215        { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }        { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
216      }      }
217    }    }
218    
219  /* In the caseful case, we can just compare the bytes, whether or not we  /* In the caseful case, we can just compare the bytes, whether or not we
220  are in UTF-8 mode. */  are in UTF-8 mode. */
221    
222  else  else
223    {    {
224    if (eptr + length > md->end_subject) return -1;    if (eptr + length > md->end_subject) return -1;
225    while (length-- > 0) if (*p++ != *eptr++) return -1;    while (length-- > 0) if (*p++ != *eptr++) return -1;
226    }    }
227    
228  return eptr - eptr_start;  return eptr - eptr_start;
# Line 273  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 277  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
277         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
278         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
279         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
280         RM61,  RM62 };         RM61,  RM62, RM63, RM64, RM65, RM66 };
281    
282  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
283  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 283  actually used in this definition. */ Line 287  actually used in this definition. */
287  #define REGISTER register  #define REGISTER register
288    
289  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
290  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rw) \
291    { \    { \
292    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
293    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1); \
294    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
295    }    }
296  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 295  actually used in this definition. */ Line 299  actually used in this definition. */
299    return ra; \    return ra; \
300    }    }
301  #else  #else
302  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rw) \
303    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1)
304  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
305  #endif  #endif
306    
# Line 309  argument of match(), which never changes Line 313  argument of match(), which never changes
313    
314  #define REGISTER  #define REGISTER
315    
316  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\  #define RMATCH(ra,rb,rc,rd,re,rw)\
317    {\    {\
318    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
319    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
# Line 319  argument of match(), which never changes Line 323  argument of match(), which never changes
323    newframe->Xmstart = mstart;\    newframe->Xmstart = mstart;\
324    newframe->Xmarkptr = markptr;\    newframe->Xmarkptr = markptr;\
325    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
326    newframe->Xims = re;\    newframe->Xeptrb = re;\
   newframe->Xeptrb = rf;\  
   newframe->Xflags = rg;\  
327    newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
328    newframe->Xprevframe = frame;\    newframe->Xprevframe = frame;\
329    frame = newframe;\    frame = newframe;\
# Line 357  typedef struct heapframe { Line 359  typedef struct heapframe {
359    USPTR Xmstart;    USPTR Xmstart;
360    USPTR Xmarkptr;    USPTR Xmarkptr;
361    int Xoffset_top;    int Xoffset_top;
   long int Xims;  
362    eptrblock *Xeptrb;    eptrblock *Xeptrb;
   int Xflags;  
363    unsigned int Xrdepth;    unsigned int Xrdepth;
364    
365    /* Function local variables */    /* Function local variables */
# Line 380  typedef struct heapframe { Line 380  typedef struct heapframe {
380    BOOL Xcondition;    BOOL Xcondition;
381    BOOL Xprev_is_word;    BOOL Xprev_is_word;
382    
   unsigned long int Xoriginal_ims;  
   
383  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
384    int Xprop_type;    int Xprop_type;
385    int Xprop_value;    int Xprop_value;
386    int Xprop_fail_result;    int Xprop_fail_result;
   int Xprop_category;  
   int Xprop_chartype;  
   int Xprop_script;  
387    int Xoclength;    int Xoclength;
388    uschar Xocchars[8];    uschar Xocchars[8];
389  #endif  #endif
# Line 467  Arguments: Line 462  Arguments:
462     markptr     pointer to the most recent MARK name, or NULL     markptr     pointer to the most recent MARK name, or NULL
463     offset_top  current top pointer     offset_top  current top pointer
464     md          pointer to "static" info for the match     md          pointer to "static" info for the match
    ims         current /i, /m, and /s options  
465     eptrb       pointer to chain of blocks containing eptr at start of     eptrb       pointer to chain of blocks containing eptr at start of
466                   brackets - for testing for empty matches                   brackets - for testing for empty matches
    flags       can contain  
                  match_condassert - this is an assertion condition  
                  match_cbegroup - this is the start of an unlimited repeat  
                    group that can match an empty string  
467     rdepth      the recursion depth     rdepth      the recursion depth
468    
469  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 485  Returns:       MATCH_MATCH if matched Line 475  Returns:       MATCH_MATCH if matched
475    
476  static int  static int
477  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
478    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,    const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb,
479    eptrblock *eptrb, int flags, unsigned int rdepth)    unsigned int rdepth)
480  {  {
481  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
482  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 498  register unsigned int c;   /* Character Line 488  register unsigned int c;   /* Character
488  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
489    
490  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
491    BOOL caseless;
492  int condcode;  int condcode;
493    
494  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
# Line 517  frame->Xecode = ecode; Line 508  frame->Xecode = ecode;
508  frame->Xmstart = mstart;  frame->Xmstart = mstart;
509  frame->Xmarkptr = markptr;  frame->Xmarkptr = markptr;
510  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
 frame->Xims = ims;  
511  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
 frame->Xflags = flags;  
512  frame->Xrdepth = rdepth;  frame->Xrdepth = rdepth;
513    
514  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
# Line 533  HEAP_RECURSE: Line 522  HEAP_RECURSE:
522  #define mstart             frame->Xmstart  #define mstart             frame->Xmstart
523  #define markptr            frame->Xmarkptr  #define markptr            frame->Xmarkptr
524  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
 #define ims                frame->Xims  
525  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
 #define flags              frame->Xflags  
526  #define rdepth             frame->Xrdepth  #define rdepth             frame->Xrdepth
527    
528  /* Ditto for the local variables */  /* Ditto for the local variables */
# Line 557  HEAP_RECURSE: Line 544  HEAP_RECURSE:
544  #define condition          frame->Xcondition  #define condition          frame->Xcondition
545  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
546    
 #define original_ims       frame->Xoriginal_ims  
   
547  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
548  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
549  #define prop_value         frame->Xprop_value  #define prop_value         frame->Xprop_value
550  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
 #define prop_category      frame->Xprop_category  
 #define prop_chartype      frame->Xprop_chartype  
 #define prop_script        frame->Xprop_script  
551  #define oclength           frame->Xoclength  #define oclength           frame->Xoclength
552  #define occhars            frame->Xocchars  #define occhars            frame->Xocchars
553  #endif  #endif
# Line 595  i, and fc and c, can be the same variabl Line 577  i, and fc and c, can be the same variabl
577  #define fi i  #define fi i
578  #define fc c  #define fc c
579    
580    /* Many of the following variables are used only in small blocks of the code.
581    My normal style of coding would have declared them within each of those blocks.
582    However, in order to accommodate the version of this code that uses an external
583    "stack" implemented on the heap, it is easier to declare them all here, so the
584    declarations can be cut out in a block. The only declarations within blocks
585    below are for variables that do not have to be preserved over a recursive call
586    to RMATCH(). */
587    
588    #ifdef SUPPORT_UTF8
589    const uschar *charptr;
590    #endif
591    const uschar *callpat;
592    const uschar *data;
593    const uschar *next;
594    USPTR         pp;
595    const uschar *prev;
596    USPTR         saved_eptr;
597    
598    recursion_info new_recursive;
599    
600  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */  BOOL cur_is_word;
 const uschar *charptr;             /* in small blocks of the code. My normal */  
 #endif                             /* style of coding would have declared    */  
 const uschar *callpat;             /* them within each of those blocks.      */  
 const uschar *data;                /* However, in order to accommodate the   */  
 const uschar *next;                /* version of this code that uses an      */  
 USPTR         pp;                  /* external "stack" implemented on the    */  
 const uschar *prev;                /* heap, it is easier to declare them all */  
 USPTR         saved_eptr;          /* here, so the declarations can be cut   */  
                                    /* out in a block. The only declarations  */  
 recursion_info new_recursive;      /* within blocks below are for variables  */  
                                    /* that do not have to be preserved over  */  
 BOOL cur_is_word;                  /* a recursive call to RMATCH().          */  
601  BOOL condition;  BOOL condition;
602  BOOL prev_is_word;  BOOL prev_is_word;
603    
 unsigned long int original_ims;  
   
604  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
605  int prop_type;  int prop_type;
606  int prop_value;  int prop_value;
607  int prop_fail_result;  int prop_fail_result;
 int prop_category;  
 int prop_chartype;  
 int prop_script;  
608  int oclength;  int oclength;
609  uschar occhars[8];  uschar occhars[8];
610  #endif  #endif
# Line 640  int stacksave[REC_STACK_SAVE_MAX]; Line 624  int stacksave[REC_STACK_SAVE_MAX];
624  eptrblock newptrb;  eptrblock newptrb;
625  #endif     /* NO_RECURSE */  #endif     /* NO_RECURSE */
626    
627    /* To save space on the stack and in the heap frame, I have doubled up on some
628    of the local variables that are used only in localised parts of the code, but
629    still need to be preserved over recursive calls of match(). These macros define
630    the alternative names that are used. */
631    
632    #define allow_zero    cur_is_word
633    #define cbegroup      condition
634    #define code_offset   codelink
635    #define condassert    condition
636    #define matched_once  prev_is_word
637    
638  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
639  variables. */  variables. */
640    
# Line 676  haven't exceeded the recursive call limi Line 671  haven't exceeded the recursive call limi
671  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
672  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
673    
 original_ims = ims;    /* Save for resetting on ')' */  
   
674  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
675  string, the match_cbegroup flag is set. When this is the case, add the current  string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
676  subject pointer to the chain of such remembered pointers, to be checked when we  done this way to save having to use another function argument, which would take
677  hit the closing ket, in order to break infinite loops that match no characters.  up space on the stack. See also MATCH_CONDASSERT below.
678  When match() is called in other circumstances, don't add to the chain. The  
679  match_cbegroup flag must NOT be used with tail recursion, because the memory  When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
680  block that is used is on the stack, so a new one may be required for each  such remembered pointers, to be checked when we hit the closing ket, in order
681  match(). */  to break infinite loops that match no characters. When match() is called in
682    other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
683    NOT be used with tail recursion, because the memory block that is used is on
684    the stack, so a new one may be required for each match(). */
685    
686  if ((flags & match_cbegroup) != 0)  if (md->match_function_type == MATCH_CBEGROUP)
687    {    {
688    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
689    newptrb.epb_prev = eptrb;    newptrb.epb_prev = eptrb;
690    eptrb = &newptrb;    eptrb = &newptrb;
691      md->match_function_type = 0;
692    }    }
693    
694  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 706  for (;;) Line 703  for (;;)
703      case OP_MARK:      case OP_MARK:
704      markptr = ecode + 2;      markptr = ecode + 2;
705      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
706        ims, eptrb, flags, RM55);        eptrb, RM55);
707    
708      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
709      argument, and we must check whether that argument matches this MARK's      argument, and we must check whether that argument matches this MARK's
# Line 732  for (;;) Line 729  for (;;)
729    
730      case OP_COMMIT:      case OP_COMMIT:
731      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
732        ims, eptrb, flags, RM52);        eptrb, RM52);
733      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
734          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
735          rrc != MATCH_THEN)          rrc != MATCH_THEN)
# Line 743  for (;;) Line 740  for (;;)
740    
741      case OP_PRUNE:      case OP_PRUNE:
742      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
743        ims, eptrb, flags, RM51);        eptrb, RM51);
744      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
745      MRRETURN(MATCH_PRUNE);      MRRETURN(MATCH_PRUNE);
746    
747      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
748      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
749        ims, eptrb, flags, RM56);        eptrb, RM56);
750      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
751      md->mark = ecode + 2;      md->mark = ecode + 2;
752      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
# Line 758  for (;;) Line 755  for (;;)
755    
756      case OP_SKIP:      case OP_SKIP:
757      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
758        ims, eptrb, flags, RM53);        eptrb, RM53);
759      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
760        RRETURN(rrc);        RRETURN(rrc);
761      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
# Line 766  for (;;) Line 763  for (;;)
763    
764      case OP_SKIP_ARG:      case OP_SKIP_ARG:
765      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
766        ims, eptrb, flags, RM57);        eptrb, RM57);
767      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
768        RRETURN(rrc);        RRETURN(rrc);
769    
# Line 778  for (;;) Line 775  for (;;)
775      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
776      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
777    
778      /* For THEN (and THEN_ARG) we pass back the address of the bracket or      /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
779      the alt that is at the start of the current branch. This makes it possible      the branch in which it occurs can be determined. Overload the start of
780      to skip back past alternatives that precede the THEN within the current      match pointer to do this. */
     branch. */  
781    
782      case OP_THEN:      case OP_THEN:
783      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
784        ims, eptrb, flags, RM54);        eptrb, RM54);
785      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
786      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode;
787      MRRETURN(MATCH_THEN);      MRRETURN(MATCH_THEN);
788    
789      case OP_THEN_ARG:      case OP_THEN_ARG:
790      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top,
791        offset_top, md, ims, eptrb, flags, RM58);        md, eptrb, RM58);
792      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
793      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode;
794      md->mark = ecode + LINK_SIZE + 2;      md->mark = ecode + 2;
795      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
796    
797      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle an atomic group that does not contain any capturing parentheses.
798      the current subject position in the working slot at the top of the vector.      This can be handled like an assertion. Prior to 8.13, all atomic groups
799      We mustn't change the current values of the data slot, because they may be      were handled this way. In 8.13, the code was changed as below for ONCE, so
800      set from a previous iteration of this group, and be referred to by a      that backups pass through the group and thereby reset captured values.
801      reference inside the group.      However, this uses a lot more stack, so in 8.20, atomic groups that do not
802        contain any captures generate OP_ONCE_NC, which can be handled in the old,
803      If the bracket fails to match, we need to restore this value and also the      less stack intensive way.
804      values of the final offsets, in case they were set by a previous iteration  
805      of the same bracket.      Check the alternative branches in turn - the matching won't pass the KET
806        for this kind of subpattern. If any one branch matches, we carry on as at
807        the end of a normal bracket, leaving the subject pointer, but resetting
808        the start-of-match value in case it was changed by \K. */
809    
810        case OP_ONCE_NC:
811        prev = ecode;
812        saved_eptr = eptr;
813        do
814          {
815          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
816          if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
817            {
818            mstart = md->start_match_ptr;
819            break;
820            }
821          if (rrc == MATCH_THEN)
822            {
823            next = ecode + GET(ecode,1);
824            if (md->start_match_ptr < next &&
825                (*ecode == OP_ALT || *next == OP_ALT))
826              rrc = MATCH_NOMATCH;
827            }
828    
829          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
830          ecode += GET(ecode,1);
831          }
832        while (*ecode == OP_ALT);
833    
834        /* If hit the end of the group (which could be repeated), fail */
835    
836        if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
837    
838        /* Continue as from after the group, updating the offsets high water
839        mark, since extracts may have been taken. */
840    
841        do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
842    
843        offset_top = md->end_offset_top;
844        eptr = md->end_match_ptr;
845    
846        /* For a non-repeating ket, just continue at this level. This also
847        happens for a repeating ket if no characters were matched in the group.
848        This is the forcible breaking of infinite loops as implemented in Perl
849        5.005. */
850    
851        if (*ecode == OP_KET || eptr == saved_eptr)
852          {
853          ecode += 1+LINK_SIZE;
854          break;
855          }
856    
857        /* The repeating kets try the rest of the pattern or restart from the
858        preceding bracket, in the appropriate order. The second "call" of match()
859        uses tail recursion, to avoid using another stack frame. */
860    
861        if (*ecode == OP_KETRMIN)
862          {
863          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
864          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
865          ecode = prev;
866          goto TAIL_RECURSE;
867          }
868        else  /* OP_KETRMAX */
869          {
870          md->match_function_type = MATCH_CBEGROUP;
871          RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
872          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
873          ecode += 1 + LINK_SIZE;
874          goto TAIL_RECURSE;
875          }
876        /* Control never gets here */
877    
878        /* Handle a capturing bracket, other than those that are possessive with an
879        unlimited repeat. If there is space in the offset vector, save the current
880        subject position in the working slot at the top of the vector. We mustn't
881        change the current values of the data slot, because they may be set from a
882        previous iteration of this group, and be referred to by a reference inside
883        the group. A failure to match might occur after the group has succeeded,
884        if something later on doesn't match. For this reason, we need to restore
885        the working value and also the values of the final offsets, in case they
886        were set by a previous iteration of the same bracket.
887    
888      If there isn't enough space in the offset vector, treat this as if it were      If there isn't enough space in the offset vector, treat this as if it were
889      a non-capturing bracket. Don't worry about setting the flag for the error      a non-capturing bracket. Don't worry about setting the flag for the error
# Line 835  for (;;) Line 912  for (;;)
912        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
913          (int)(eptr - md->start_subject);          (int)(eptr - md->start_subject);
914    
915        flags = (op == OP_SCBRA)? match_cbegroup : 0;        for (;;)
       do  
916          {          {
917            if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
918          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
919            ims, eptrb, flags, RM1);            eptrb, RM1);
920          if (rrc != MATCH_NOMATCH &&          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
921              (rrc != MATCH_THEN || md->start_match_ptr != ecode))  
922            RRETURN(rrc);          /* If we backed up to a THEN, check whether it is within the current
923            branch by comparing the address of the THEN that is passed back with
924            the end of the branch. If it is within the current branch, and the
925            branch is one of two or more alternatives (it either starts or ends
926            with OP_ALT), we have reached the limit of THEN's action, so convert
927            the return code to NOMATCH, which will cause normal backtracking to
928            happen from now on. Otherwise, THEN is passed back to an outer
929            alternative. This implements Perl's treatment of parenthesized groups,
930            where a group not containing | does not affect the current alternative,
931            that is, (X) is NOT the same as (X|(*F)). */
932    
933            if (rrc == MATCH_THEN)
934              {
935              next = ecode + GET(ecode,1);
936              if (md->start_match_ptr < next &&
937                  (*ecode == OP_ALT || *next == OP_ALT))
938                rrc = MATCH_NOMATCH;
939              }
940    
941            /* Anything other than NOMATCH is passed back. */
942    
943            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
944          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
945          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
946            if (*ecode != OP_ALT) break;
947          }          }
       while (*ecode == OP_ALT);  
948    
949        DPRINTF(("bracket %d failed\n", number));        DPRINTF(("bracket %d failed\n", number));
   
950        md->offset_vector[offset] = save_offset1;        md->offset_vector[offset] = save_offset1;
951        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
952        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
953    
954        if (rrc != MATCH_THEN) md->mark = markptr;        /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
955        RRETURN(MATCH_NOMATCH);  
956          if (md->mark == NULL) md->mark = markptr;
957          RRETURN(rrc);
958        }        }
959    
960      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
# Line 869  for (;;) Line 968  for (;;)
968      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
969      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
970    
971      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing or atomic group, except for possessive with unlimited
972      final alternative within the brackets, we would return the result of a      repeat and ONCE group with no captures. Loop for all the alternatives.
973      recursive call to match() whatever happened. We can reduce stack usage by  
974      turning this into a tail recursion, except in the case when match_cbegroup      When we get to the final alternative within the brackets, we used to return
975      is set.*/      the result of a recursive call to match() whatever happened so it was
976        possible to reduce stack usage by turning this into a tail recursion,
977        except in the case of a possibly empty group. However, now that there is
978        the possiblity of (*THEN) occurring in the final alternative, this
979        optimization is no longer always possible.
980    
981        We can optimize if we know there are no (*THEN)s in the pattern; at present
982        this is the best that can be done.
983    
984        MATCH_ONCE is returned when the end of an atomic group is successfully
985        reached, but subsequent matching fails. It passes back up the tree (causing
986        captured values to be reset) until the original atomic group level is
987        reached. This is tested by comparing md->once_target with the start of the
988        group. At this point, the return is converted into MATCH_NOMATCH so that
989        previous backup points can be taken. */
990    
991        case OP_ONCE:
992      case OP_BRA:      case OP_BRA:
993      case OP_SBRA:      case OP_SBRA:
994      DPRINTF(("start non-capturing bracket\n"));      DPRINTF(("start non-capturing bracket\n"));
995      flags = (op >= OP_SBRA)? match_cbegroup : 0;  
996      for (;;)      for (;;)
997        {        {
998        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */        if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
999    
1000          /* If this is not a possibly empty group, and there are no (*THEN)s in
1001          the pattern, and this is the final alternative, optimize as described
1002          above. */
1003    
1004          else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1005            {
1006            ecode += _pcre_OP_lengths[*ecode];
1007            goto TAIL_RECURSE;
1008            }
1009    
1010          /* In all other cases, we have to make another call to match(). */
1011    
1012          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
1013            RM2);
1014    
1015          /* See comment in the code for capturing groups above about handling
1016          THEN. */
1017    
1018          if (rrc == MATCH_THEN)
1019          {          {
1020          if (flags == 0)    /* Not a possibly empty group */          next = ecode + GET(ecode,1);
1021            if (md->start_match_ptr < next &&
1022                (*ecode == OP_ALT || *next == OP_ALT))
1023              rrc = MATCH_NOMATCH;
1024            }
1025    
1026          if (rrc != MATCH_NOMATCH)
1027            {
1028            if (rrc == MATCH_ONCE)
1029            {            {
1030            ecode += _pcre_OP_lengths[*ecode];            const uschar *scode = ecode;
1031            DPRINTF(("bracket 0 tail recursion\n"));            if (*scode != OP_ONCE)           /* If not at start, find it */
1032            goto TAIL_RECURSE;              {
1033                while (*scode == OP_ALT) scode += GET(scode, 1);
1034                scode -= GET(scode, 1);
1035                }
1036              if (md->once_target == scode) rrc = MATCH_NOMATCH;
1037            }            }
1038            RRETURN(rrc);
1039            }
1040          ecode += GET(ecode, 1);
1041          if (*ecode != OP_ALT) break;
1042          }
1043    
1044          /* Possibly empty group; can't use tail recursion. */      if (md->mark == NULL) md->mark = markptr;
1045        RRETURN(MATCH_NOMATCH);
1046    
1047          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,      /* Handle possessive capturing brackets with an unlimited repeat. We come
1048            eptrb, flags, RM48);      here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1049          if (rrc == MATCH_NOMATCH) md->mark = markptr;      handled similarly to the normal case above. However, the matching is
1050          RRETURN(rrc);      different. The end of these brackets will always be OP_KETRPOS, which
1051        returns MATCH_KETRPOS without going further in the pattern. By this means
1052        we can handle the group by iteration rather than recursion, thereby
1053        reducing the amount of stack needed. */
1054    
1055        case OP_CBRAPOS:
1056        case OP_SCBRAPOS:
1057        allow_zero = FALSE;
1058    
1059        POSSESSIVE_CAPTURE:
1060        number = GET2(ecode, 1+LINK_SIZE);
1061        offset = number << 1;
1062    
1063    #ifdef PCRE_DEBUG
1064        printf("start possessive bracket %d\n", number);
1065        printf("subject=");
1066        pchars(eptr, 16, TRUE, md);
1067        printf("\n");
1068    #endif
1069    
1070        if (offset < md->offset_max)
1071          {
1072          matched_once = FALSE;
1073          code_offset = ecode - md->start_code;
1074    
1075          save_offset1 = md->offset_vector[offset];
1076          save_offset2 = md->offset_vector[offset+1];
1077          save_offset3 = md->offset_vector[md->offset_end - number];
1078          save_capture_last = md->capture_last;
1079    
1080          DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1081    
1082          /* Each time round the loop, save the current subject position for use
1083          when the group matches. For MATCH_MATCH, the group has matched, so we
1084          restart it with a new subject starting position, remembering that we had
1085          at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1086          usual. If we haven't matched any alternatives in any iteration, check to
1087          see if a previous iteration matched. If so, the group has matched;
1088          continue from afterwards. Otherwise it has failed; restore the previous
1089          capture values before returning NOMATCH. */
1090    
1091          for (;;)
1092            {
1093            md->offset_vector[md->offset_end - number] =
1094              (int)(eptr - md->start_subject);
1095            if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1096            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
1097              eptrb, RM63);
1098            if (rrc == MATCH_KETRPOS)
1099              {
1100              offset_top = md->end_offset_top;
1101              eptr = md->end_match_ptr;
1102              ecode = md->start_code + code_offset;
1103              save_capture_last = md->capture_last;
1104              matched_once = TRUE;
1105              continue;
1106              }
1107    
1108            /* See comment in the code for capturing groups above about handling
1109            THEN. */
1110    
1111            if (rrc == MATCH_THEN)
1112              {
1113              next = ecode + GET(ecode,1);
1114              if (md->start_match_ptr < next &&
1115                  (*ecode == OP_ALT || *next == OP_ALT))
1116                rrc = MATCH_NOMATCH;
1117              }
1118    
1119            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1120            md->capture_last = save_capture_last;
1121            ecode += GET(ecode, 1);
1122            if (*ecode != OP_ALT) break;
1123          }          }
1124    
1125        /* For non-final alternatives, continue the loop for a NOMATCH result;        if (!matched_once)
1126        otherwise return. */          {
1127            md->offset_vector[offset] = save_offset1;
1128            md->offset_vector[offset+1] = save_offset2;
1129            md->offset_vector[md->offset_end - number] = save_offset3;
1130            }
1131    
1132        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        if (md->mark == NULL) md->mark = markptr;
1133          eptrb, flags, RM2);        if (allow_zero || matched_once)
1134        if (rrc != MATCH_NOMATCH &&          {
1135            (rrc != MATCH_THEN || md->start_match_ptr != ecode))          ecode += 1 + LINK_SIZE;
1136          RRETURN(rrc);          break;
1137            }
1138    
1139          RRETURN(MATCH_NOMATCH);
1140          }
1141    
1142        /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1143        as a non-capturing bracket. */
1144    
1145        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1146        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1147    
1148        DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1149    
1150        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1151        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1152    
1153        /* Non-capturing possessive bracket with unlimited repeat. We come here
1154        from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1155        without the capturing complication. It is written out separately for speed
1156        and cleanliness. */
1157    
1158        case OP_BRAPOS:
1159        case OP_SBRAPOS:
1160        allow_zero = FALSE;
1161    
1162        POSSESSIVE_NON_CAPTURE:
1163        matched_once = FALSE;
1164        code_offset = ecode - md->start_code;
1165    
1166        for (;;)
1167          {
1168          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1169          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
1170            eptrb, RM48);
1171          if (rrc == MATCH_KETRPOS)
1172            {
1173            offset_top = md->end_offset_top;
1174            eptr = md->end_match_ptr;
1175            ecode = md->start_code + code_offset;
1176            matched_once = TRUE;
1177            continue;
1178            }
1179    
1180          /* See comment in the code for capturing groups above about handling
1181          THEN. */
1182    
1183          if (rrc == MATCH_THEN)
1184            {
1185            next = ecode + GET(ecode,1);
1186            if (md->start_match_ptr < next &&
1187                (*ecode == OP_ALT || *next == OP_ALT))
1188              rrc = MATCH_NOMATCH;
1189            }
1190    
1191          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1192        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1193          if (*ecode != OP_ALT) break;
1194        }        }
1195    
1196        if (matched_once || allow_zero)
1197          {
1198          ecode += 1 + LINK_SIZE;
1199          break;
1200          }
1201        RRETURN(MATCH_NOMATCH);
1202    
1203      /* Control never reaches here. */      /* Control never reaches here. */
1204    
1205      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
1206      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
1207      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
1208      exactly what going to the ket would do. As there is only one branch to be      exactly what going to the ket would do. */
     obeyed, we can use tail recursion to avoid using another stack frame. */  
1209    
1210      case OP_COND:      case OP_COND:
1211      case OP_SCOND:      case OP_SCOND:
1212      codelink= GET(ecode, 1);      codelink = GET(ecode, 1);
1213    
1214      /* Because of the way auto-callout works during compile, a callout item is      /* Because of the way auto-callout works during compile, a callout item is
1215      inserted between OP_COND and an assertion condition. */      inserted between OP_COND and an assertion condition. */
# Line 928  for (;;) Line 1219  for (;;)
1219        if (pcre_callout != NULL)        if (pcre_callout != NULL)
1220          {          {
1221          pcre_callout_block cb;          pcre_callout_block cb;
1222          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1223          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1224          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1225          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
# Line 940  for (;;) Line 1231  for (;;)
1231          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1232          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1233          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1234            cb.mark             = markptr;
1235          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1236          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1237          }          }
# Line 1093  for (;;) Line 1385  for (;;)
1385        }        }
1386    
1387      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
1388      the final argument match_condassert causes it to stop at the end of an      md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
1389      assertion. */      an assertion. */
1390    
1391      else      else
1392        {        {
1393        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        md->match_function_type = MATCH_CONDASSERT;
1394            match_condassert, RM3);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1395        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1396          {          {
1397            if (md->end_offset_top > offset_top)
1398              offset_top = md->end_offset_top;  /* Captures may have happened */
1399          condition = TRUE;          condition = TRUE;
1400          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1401          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1402          }          }
1403        else if (rrc != MATCH_NOMATCH &&  
1404                (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1405          assertion; it is therefore treated as NOMATCH. */
1406    
1407          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1408          {          {
1409          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1410          }          }
# Line 1118  for (;;) Line 1415  for (;;)
1415          }          }
1416        }        }
1417    
1418      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one, can
1419      we can use tail recursion to avoid using another stack frame, except when      use tail recursion to avoid using another stack frame, except when there is
1420      match_cbegroup is required for an unlimited repeat of a possibly empty      unlimited repeat of a possibly empty group. In the latter case, a recursive
1421      group. If the second alternative doesn't exist, we can just plough on. */      call to match() is always required, unless the second alternative doesn't
1422        exist, in which case we can just plough on. Note that, for compatibility
1423        with Perl, the | in a conditional group is NOT treated as creating two
1424        alternatives. If a THEN is encountered in the branch, it propagates out to
1425        the enclosing alternative (unless nested in a deeper set of alternatives,
1426        of course). */
1427    
1428      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1429        {        {
1430        ecode += 1 + LINK_SIZE;        if (op != OP_SCOND)
       if (op == OP_SCOND)        /* Possibly empty group */  
         {  
         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);  
         RRETURN(rrc);  
         }  
       else                       /* Group must match something */  
1431          {          {
1432          flags = 0;          ecode += 1 + LINK_SIZE;
1433          goto TAIL_RECURSE;          goto TAIL_RECURSE;
1434          }          }
1435    
1436          md->match_function_type = MATCH_CBEGROUP;
1437          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1438          RRETURN(rrc);
1439        }        }
1440      else                         /* Condition false & no alternative */  
1441         /* Condition false & no alternative; continue after the group. */
1442    
1443        else
1444        {        {
1445        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1446        }        }
# Line 1168  for (;;) Line 1471  for (;;)
1471      break;      break;
1472    
1473    
1474      /* End of the pattern, either real or forced. If we are in a top-level      /* End of the pattern, either real or forced. */
     recursion, we should restore the offsets appropriately and continue from  
     after the call. */  
1475    
     case OP_ACCEPT:  
1476      case OP_END:      case OP_END:
1477      if (md->recursive != NULL && md->recursive->group_num == 0)      case OP_ACCEPT:
1478        {      case OP_ASSERT_ACCEPT:
       recursion_info *rec = md->recursive;  
       DPRINTF(("End of pattern in a (?0) recursion\n"));  
       md->recursive = rec->prevrec;  
       memmove(md->offset_vector, rec->offset_save,  
         rec->saved_max * sizeof(int));  
       offset_top = rec->save_offset_top;  
       ims = original_ims;  
       ecode = rec->after_call;  
       break;  
       }  
1479    
1480      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is      /* If we have matched an empty string, fail if not in an assertion and not
1481      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of      in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1482      the subject. In both cases, backtracking will then try other alternatives,      is set and we have matched at the start of the subject. In both cases,
1483      if any. */      backtracking will then try other alternatives, if any. */
1484    
1485      if (eptr == mstart &&      if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1486          (md->notempty ||           md->recursive == NULL &&
1487            (md->notempty_atstart &&           (md->notempty ||
1488              mstart == md->start_subject + md->start_offset)))             (md->notempty_atstart &&
1489                 mstart == md->start_subject + md->start_offset)))
1490        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1491    
1492      /* Otherwise, we have a match. */      /* Otherwise, we have a match. */
# Line 1210  for (;;) Line 1501  for (;;)
1501      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1502      MRRETURN(rrc);      MRRETURN(rrc);
1503    
     /* Change option settings */  
   
     case OP_OPT:  
     ims = ecode[1];  
     ecode += 2;  
     DPRINTF(("ims set to %02lx\n", ims));  
     break;  
   
1504      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
1505      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
1506      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1507      start of each branch to move the current point backwards, so the code at      start of each branch to move the current point backwards, so the code at
1508      this level is identical to the lookahead case. */      this level is identical to the lookahead case. When the assertion is part
1509        of a condition, we want to return immediately afterwards. The caller of
1510        this incarnation of the match() function will have set MATCH_CONDASSERT in
1511        md->match_function type, and one of these opcodes will be the first opcode
1512        that is processed. We use a local variable that is preserved over calls to
1513        match() to remember this case. */
1514    
1515      case OP_ASSERT:      case OP_ASSERT:
1516      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1517        if (md->match_function_type == MATCH_CONDASSERT)
1518          {
1519          condassert = TRUE;
1520          md->match_function_type = 0;
1521          }
1522        else condassert = FALSE;
1523    
1524      do      do
1525        {        {
1526        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
         RM4);  
1527        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1528          {          {
1529          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1530            markptr = md->mark;
1531          break;          break;
1532          }          }
1533        if (rrc != MATCH_NOMATCH &&  
1534            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1535          RRETURN(rrc);        as NOMATCH. */
1536    
1537          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1538        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1539        }        }
1540      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1541    
1542      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1543    
1544      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1545    
1546      if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);      if (condassert) RRETURN(MATCH_MATCH);
1547    
1548      /* Continue from after the assertion, updating the offsets high water      /* Continue from after the assertion, updating the offsets high water
1549      mark, since extracts may have been taken during the assertion. */      mark, since extracts may have been taken during the assertion. */
# Line 1261  for (;;) Line 1559  for (;;)
1559    
1560      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1561      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1562        if (md->match_function_type == MATCH_CONDASSERT)
1563          {
1564          condassert = TRUE;
1565          md->match_function_type = 0;
1566          }
1567        else condassert = FALSE;
1568    
1569      do      do
1570        {        {
1571        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
         RM5);  
1572        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1573        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1574          {          {
1575          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1576          break;          break;
1577          }          }
1578        if (rrc != MATCH_NOMATCH &&  
1579            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1580          RRETURN(rrc);        as NOMATCH. */
1581    
1582          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1583        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1584        }        }
1585      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1586    
1587      if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1588    
1589      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1590      continue;      continue;
# Line 1324  for (;;) Line 1630  for (;;)
1630      if (pcre_callout != NULL)      if (pcre_callout != NULL)
1631        {        {
1632        pcre_callout_block cb;        pcre_callout_block cb;
1633        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1634        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1635        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1636        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
# Line 1336  for (;;) Line 1642  for (;;)
1642        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1643        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1644        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1645          cb.mark             = markptr;
1646        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1647        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1648        }        }
# Line 1346  for (;;) Line 1653  for (;;)
1653      offset data is the offset to the starting bracket from the start of the      offset data is the offset to the starting bracket from the start of the
1654      whole pattern. (This is so that it works from duplicated subpatterns.)      whole pattern. (This is so that it works from duplicated subpatterns.)
1655    
1656      If there are any capturing brackets started but not finished, we have to      The state of the capturing groups is preserved over recursion, and
1657      save their starting points and reinstate them after the recursion. However,      re-instated afterwards. We don't know how many are started and not yet
1658      we don't know how many such there are (offset_top records the completed      finished (offset_top records the completed total) so we just have to save
1659      total) so we just have to save all the potential data. There may be up to      all the potential data. There may be up to 65535 such values, which is too
1660      65535 such values, which is too large to put on the stack, but using malloc      large to put on the stack, but using malloc for small numbers seems
1661      for small numbers seems expensive. As a compromise, the stack is used when      expensive. As a compromise, the stack is used when there are no more than
1662      there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc      REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
     is used. A problem is what to do if the malloc fails ... there is no way of  
     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX  
     values on the stack, and accept that the rest may be wrong.  
1663    
1664      There are also other values that have to be saved. We use a chained      There are also other values that have to be saved. We use a chained
1665      sequence of blocks that actually live on the stack. Thanks to Robin Houston      sequence of blocks that actually live on the stack. Thanks to Robin Houston
1666      for the original version of this logic. */      for the original version of this logic. It has, however, been hacked around
1667        a lot, so he is not to blame for the current way it works. */
1668    
1669      case OP_RECURSE:      case OP_RECURSE:
1670        {        {
1671          recursion_info *ri;
1672          int recno;
1673    
1674        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1675        new_recursive.group_num = (callpat == md->start_code)? 0 :        recno = (callpat == md->start_code)? 0 :
1676          GET2(callpat, 1 + LINK_SIZE);          GET2(callpat, 1 + LINK_SIZE);
1677    
1678          /* Check for repeating a recursion without advancing the subject pointer.
1679          This should catch convoluted mutual recursions. (Some simple cases are
1680          caught at compile time.) */
1681    
1682          for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1683            if (recno == ri->group_num && eptr == ri->subject_position)
1684              RRETURN(PCRE_ERROR_RECURSELOOP);
1685    
1686        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1687    
1688          new_recursive.group_num = recno;
1689          new_recursive.subject_position = eptr;
1690        new_recursive.prevrec = md->recursive;        new_recursive.prevrec = md->recursive;
1691        md->recursive = &new_recursive;        md->recursive = &new_recursive;
1692    
1693        /* Find where to continue from afterwards */        /* Where to continue from afterwards */
1694    
1695        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
       new_recursive.after_call = ecode;  
1696    
1697        /* Now save the offset data. */        /* Now save the offset data */
1698    
1699        new_recursive.saved_max = md->offset_end;        new_recursive.saved_max = md->offset_end;
1700        if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)        if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
# Line 1388  for (;;) Line 1705  for (;;)
1705            (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));            (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1706          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1707          }          }
   
1708        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1709              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
       new_recursive.save_offset_top = offset_top;  
1710    
1711        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. After processing each alternative,
1712        restore the offset and recursion data. */        restore the offset data. If there were nested recursions, md->recursive
1713          might be changed, so reset it before looping. */
1714    
1715        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1716        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;        cbegroup = (*callpat >= OP_SBRA);
1717        do        do
1718          {          {
1719            if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1720          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1721            md, ims, eptrb, flags, RM6);            md, eptrb, RM6);
1722            memcpy(md->offset_vector, new_recursive.offset_save,
1723                new_recursive.saved_max * sizeof(int));
1724            md->recursive = new_recursive.prevrec;
1725          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1726            {            {
1727            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
           md->recursive = new_recursive.prevrec;  
1728            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1729              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1730            MRRETURN(MATCH_MATCH);  
1731              /* Set where we got to in the subject, and reset the start in case
1732              it was changed by \K. This *is* propagated back out of a recursion,
1733              for Perl compatibility. */
1734    
1735              eptr = md->end_match_ptr;
1736              mstart = md->start_match_ptr;
1737              goto RECURSION_MATCHED;        /* Exit loop; end processing */
1738            }            }
1739          else if (rrc != MATCH_NOMATCH &&  
1740                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* PCRE does not allow THEN to escape beyond a recursion; it is treated
1741            as NOMATCH. */
1742    
1743            else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1744            {            {
1745            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1746            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1420  for (;;) Line 1749  for (;;)
1749            }            }
1750    
1751          md->recursive = &new_recursive;          md->recursive = &new_recursive;
         memcpy(md->offset_vector, new_recursive.offset_save,  
             new_recursive.saved_max * sizeof(int));  
1752          callpat += GET(callpat, 1);          callpat += GET(callpat, 1);
1753          }          }
1754        while (*callpat == OP_ALT);        while (*callpat == OP_ALT);
# Line 1432  for (;;) Line 1759  for (;;)
1759          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1760        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1761        }        }
     /* Control never reaches here */  
   
     /* "Once" brackets are like assertion brackets except that after a match,  
     the point in the subject string is not moved back. Thus there can never be  
     a move back into the brackets. Friedl calls these "atomic" subpatterns.  
     Check the alternative branches in turn - the matching won't pass the KET  
     for this kind of subpattern. If any one branch matches, we carry on as at  
     the end of a normal bracket, leaving the subject pointer, but resetting  
     the start-of-match value in case it was changed by \K. */  
   
     case OP_ONCE:  
     prev = ecode;  
     saved_eptr = eptr;  
   
     do  
       {  
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);  
       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */  
         {  
         mstart = md->start_match_ptr;  
         break;  
         }  
       if (rrc != MATCH_NOMATCH &&  
           (rrc != MATCH_THEN || md->start_match_ptr != ecode))  
         RRETURN(rrc);  
       ecode += GET(ecode,1);  
       }  
     while (*ecode == OP_ALT);  
   
     /* If hit the end of the group (which could be repeated), fail */  
1762    
1763      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      RECURSION_MATCHED:
1764        break;
     /* Continue as from after the assertion, updating the offsets high water  
     mark, since extracts may have been taken. */  
   
     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);  
   
     offset_top = md->end_offset_top;  
     eptr = md->end_match_ptr;  
   
     /* For a non-repeating ket, just continue at this level. This also  
     happens for a repeating ket if no characters were matched in the group.  
     This is the forcible breaking of infinite loops as implemented in Perl  
     5.005. If there is an options reset, it will get obeyed in the normal  
     course of events. */  
   
     if (*ecode == OP_KET || eptr == saved_eptr)  
       {  
       ecode += 1+LINK_SIZE;  
       break;  
       }  
   
     /* The repeating kets try the rest of the pattern or restart from the  
     preceding bracket, in the appropriate order. The second "call" of match()  
     uses tail recursion, to avoid using another stack frame. We need to reset  
     any options that changed within the bracket before re-running it, so  
     check the next opcode. */  
   
     if (ecode[1+LINK_SIZE] == OP_OPT)  
       {  
       ims = (ims & ~PCRE_IMS) | ecode[4];  
       DPRINTF(("ims set to %02lx at group repeat\n", ims));  
       }  
   
     if (*ecode == OP_KETRMIN)  
       {  
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);  
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
       ecode = prev;  
       flags = 0;  
       goto TAIL_RECURSE;  
       }  
     else  /* OP_KETRMAX */  
       {  
       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);  
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
       ecode += 1 + LINK_SIZE;  
       flags = 0;  
       goto TAIL_RECURSE;  
       }  
     /* Control never gets here */  
1765    
1766      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1767      bracketed group and go to there. */      bracketed group and go to there. */
# Line 1529  for (;;) Line 1777  for (;;)
1777      optional ones preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1778    
1779      case OP_BRAZERO:      case OP_BRAZERO:
1780        {      next = ecode + 1;
1781        next = ecode+1;      RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1782        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1783        if (rrc != MATCH_NOMATCH) RRETURN(rrc);      do next += GET(next, 1); while (*next == OP_ALT);
1784        do next += GET(next,1); while (*next == OP_ALT);      ecode = next + 1 + LINK_SIZE;
       ecode = next + 1 + LINK_SIZE;  
       }  
1785      break;      break;
1786    
1787      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1788        {      next = ecode + 1;
1789        next = ecode+1;      do next += GET(next, 1); while (*next == OP_ALT);
1790        do next += GET(next, 1); while (*next == OP_ALT);      RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1791        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1792        if (rrc != MATCH_NOMATCH) RRETURN(rrc);      ecode++;
       ecode++;  
       }  
1793      break;      break;
1794    
1795      case OP_SKIPZERO:      case OP_SKIPZERO:
1796        {      next = ecode+1;
1797        next = ecode+1;      do next += GET(next,1); while (*next == OP_ALT);
1798        do next += GET(next,1); while (*next == OP_ALT);      ecode = next + 1 + LINK_SIZE;
       ecode = next + 1 + LINK_SIZE;  
       }  
1799      break;      break;
1800    
1801        /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1802        here; just jump to the group, with allow_zero set TRUE. */
1803    
1804        case OP_BRAPOSZERO:
1805        op = *(++ecode);
1806        allow_zero = TRUE;
1807        if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1808          goto POSSESSIVE_NON_CAPTURE;
1809    
1810      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1811    
1812      case OP_KET:      case OP_KET:
1813      case OP_KETRMIN:      case OP_KETRMIN:
1814      case OP_KETRMAX:      case OP_KETRMAX:
1815        case OP_KETRPOS:
1816      prev = ecode - GET(ecode, 1);      prev = ecode - GET(ecode, 1);
1817    
1818      /* If this was a group that remembered the subject start, in order to break      /* If this was a group that remembered the subject start, in order to break
1819      infinite repeats of empty string matches, retrieve the subject start from      infinite repeats of empty string matches, retrieve the subject start from
1820      the chain. Otherwise, set it NULL. */      the chain. Otherwise, set it NULL. */
1821    
1822      if (*prev >= OP_SBRA)      if (*prev >= OP_SBRA || *prev == OP_ONCE)
1823        {        {
1824        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1825        eptrb = eptrb->epb_prev;              /* Backup to previous group */        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1826        }        }
1827      else saved_eptr = NULL;      else saved_eptr = NULL;
1828    
1829      /* If we are at the end of an assertion group or an atomic group, stop      /* If we are at the end of an assertion group or a non-capturing atomic
1830      matching and return MATCH_MATCH, but record the current high water mark for      group, stop matching and return MATCH_MATCH, but record the current high
1831      use by positive assertions. We also need to record the match start in case      water mark for use by positive assertions. We also need to record the match
1832      it was changed by \K. */      start in case it was changed by \K. */
1833    
1834      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1835          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||           *prev == OP_ONCE_NC)
         *prev == OP_ONCE)  
1836        {        {
1837        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE_NC */
1838        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1839        md->start_match_ptr = mstart;        md->start_match_ptr = mstart;
1840        MRRETURN(MATCH_MATCH);        MRRETURN(MATCH_MATCH);         /* Sets md->mark */
1841        }        }
1842    
1843      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
1844      and if necessary complete handling an extraction by setting the offsets and      and if necessary complete handling an extraction by setting the offsets and
1845      bumping the high water mark. Note that whole-pattern recursion is coded as      bumping the high water mark. Whole-pattern recursion is coded as a recurse
1846      a recurse into group 0, so it won't be picked up here. Instead, we catch it      into group 0, so it won't be picked up here. Instead, we catch it when the
1847      when the OP_END is reached. Other recursion is handled here. */      OP_END is reached. Other recursion is handled here. We just have to record
1848        the current subject position and start match pointer and give a MATCH
1849        return. */
1850    
1851      if (*prev == OP_CBRA || *prev == OP_SCBRA)      if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1852            *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1853        {        {
1854        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1855        offset = number << 1;        offset = number << 1;
# Line 1605  for (;;) Line 1859  for (;;)
1859        printf("\n");        printf("\n");
1860  #endif  #endif
1861    
1862          /* Handle a recursively called group. */
1863    
1864          if (md->recursive != NULL && md->recursive->group_num == number)
1865            {
1866            md->end_match_ptr = eptr;
1867            md->start_match_ptr = mstart;
1868            RRETURN(MATCH_MATCH);
1869            }
1870    
1871          /* Deal with capturing */
1872    
1873        md->capture_last = number;        md->capture_last = number;
1874        if (offset >= md->offset_max) md->offset_overflow = TRUE; else        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1875          {          {
1876            /* If offset is greater than offset_top, it means that we are
1877            "skipping" a capturing group, and that group's offsets must be marked
1878            unset. In earlier versions of PCRE, all the offsets were unset at the
1879            start of matching, but this doesn't work because atomic groups and
1880            assertions can cause a value to be set that should later be unset.
1881            Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1882            part of the atomic group, but this is not on the final matching path,
1883            so must be unset when 2 is set. (If there is no group 2, there is no
1884            problem, because offset_top will then be 2, indicating no capture.) */
1885    
1886            if (offset > offset_top)
1887              {
1888              register int *iptr = md->offset_vector + offset_top;
1889              register int *iend = md->offset_vector + offset;
1890              while (iptr < iend) *iptr++ = -1;
1891              }
1892    
1893            /* Now make the extraction */
1894    
1895          md->offset_vector[offset] =          md->offset_vector[offset] =
1896            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1897          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1898          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1899          }          }
1900          }
1901    
1902        /* Handle a recursively called group. Restore the offsets      /* For an ordinary non-repeating ket, just continue at this level. This
1903        appropriately and continue from after the call. */      also happens for a repeating ket if no characters were matched in the
1904        group. This is the forcible breaking of infinite loops as implemented in
1905        Perl 5.005. For a non-repeating atomic group that includes captures,
1906        establish a backup point by processing the rest of the pattern at a lower
1907        level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1908        original OP_ONCE level, thereby bypassing intermediate backup points, but
1909        resetting any captures that happened along the way. */
1910    
1911        if (md->recursive != NULL && md->recursive->group_num == number)      if (*ecode == OP_KET || eptr == saved_eptr)
1912          {
1913          if (*prev == OP_ONCE)
1914          {          {
1915          recursion_info *rec = md->recursive;          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
1916          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1917          md->recursive = rec->prevrec;          md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
1918          memcpy(md->offset_vector, rec->offset_save,          RRETURN(MATCH_ONCE);
           rec->saved_max * sizeof(int));  
         offset_top = rec->save_offset_top;  
         ecode = rec->after_call;  
         ims = original_ims;  
         break;  
1919          }          }
1920          ecode += 1 + LINK_SIZE;    /* Carry on at this level */
1921          break;
1922        }        }
1923    
1924      /* For both capturing and non-capturing groups, reset the value of the ims      /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
1925      flags, in case they got changed during the group. */      and return the MATCH_KETRPOS. This makes it possible to do the repeats one
1926        at a time from the outer level, thus saving stack. */
1927    
1928      ims = original_ims;      if (*ecode == OP_KETRPOS)
     DPRINTF(("ims reset to %02lx\n", ims));  
   
     /* For a non-repeating ket, just continue at this level. This also  
     happens for a repeating ket if no characters were matched in the group.  
     This is the forcible breaking of infinite loops as implemented in Perl  
     5.005. If there is an options reset, it will get obeyed in the normal  
     course of events. */  
   
     if (*ecode == OP_KET || eptr == saved_eptr)  
1929        {        {
1930        ecode += 1 + LINK_SIZE;        md->end_match_ptr = eptr;
1931        break;        md->end_offset_top = offset_top;
1932          RRETURN(MATCH_KETRPOS);
1933        }        }
1934    
1935      /* The repeating kets try the rest of the pattern or restart from the      /* The normal repeating kets try the rest of the pattern or restart from
1936      preceding bracket, in the appropriate order. In the second case, we can use      the preceding bracket, in the appropriate order. In the second case, we can
1937      tail recursion to avoid using another stack frame, unless we have an      use tail recursion to avoid using another stack frame, unless we have an
1938      unlimited repeat of a group that can match an empty string. */      an atomic group or an unlimited repeat of a group that can match an empty
1939        string. */
     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;  
1940    
1941      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1942        {        {
1943        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
1944        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1945        if (flags != 0)    /* Could match an empty string */        if (*prev == OP_ONCE)
1946            {
1947            RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
1948            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1949            md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
1950            RRETURN(MATCH_ONCE);
1951            }
1952          if (*prev >= OP_SBRA)    /* Could match an empty string */
1953          {          {
1954          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);          md->match_function_type = MATCH_CBEGROUP;
1955            RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
1956          RRETURN(rrc);          RRETURN(rrc);
1957          }          }
1958        ecode = prev;        ecode = prev;
# Line 1670  for (;;) Line 1960  for (;;)
1960        }        }
1961      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1962        {        {
1963        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);        if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1964          RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
1965          if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
1966        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1967          if (*prev == OP_ONCE)
1968            {
1969            RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
1970            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1971            md->once_target = prev;
1972            RRETURN(MATCH_ONCE);
1973            }
1974        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
       flags = 0;  
1975        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1976        }        }
1977      /* Control never gets here */      /* Control never gets here */
1978    
1979      /* Start of subject unless notbol, or after internal newline if multiline */      /* Not multiline mode: start of subject assertion, unless notbol. */
1980    
1981      case OP_CIRC:      case OP_CIRC:
1982      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
     if ((ims & PCRE_MULTILINE) != 0)  
       {  
       if (eptr != md->start_subject &&  
           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))  
         MRRETURN(MATCH_NOMATCH);  
       ecode++;  
       break;  
       }  
     /* ... else fall through */  
1983    
1984      /* Start of subject assertion */      /* Start of subject assertion */
1985    
# Line 1699  for (;;) Line 1988  for (;;)
1988      ecode++;      ecode++;
1989      break;      break;
1990    
1991        /* Multiline mode: start of subject unless notbol, or after any newline. */
1992    
1993        case OP_CIRCM:
1994        if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1995        if (eptr != md->start_subject &&
1996            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1997          MRRETURN(MATCH_NOMATCH);
1998        ecode++;
1999        break;
2000    
2001      /* Start of match assertion */      /* Start of match assertion */
2002    
2003      case OP_SOM:      case OP_SOM:
# Line 1713  for (;;) Line 2012  for (;;)
2012      ecode++;      ecode++;
2013      break;      break;
2014    
2015      /* Assert before internal newline if multiline, or before a terminating      /* Multiline mode: assert before any newline, or before end of subject
2016      newline unless endonly is set, else end of subject unless noteol is set. */      unless noteol is set. */
2017    
2018      case OP_DOLL:      case OP_DOLLM:
2019      if ((ims & PCRE_MULTILINE) != 0)      if (eptr < md->end_subject)
2020        {        { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
2021        if (eptr < md->end_subject)      else
         { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }  
       else  
         {  
         if (md->noteol) MRRETURN(MATCH_NOMATCH);  
         SCHECK_PARTIAL();  
         }  
       ecode++;  
       break;  
       }  
     else  /* Not multiline */  
2022        {        {
2023        if (md->noteol) MRRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
2024        if (!md->endonly) goto ASSERT_NL_OR_EOS;        SCHECK_PARTIAL();
2025        }        }
2026        ecode++;
2027        break;
2028    
2029        /* Not multiline mode: assert before a terminating newline or before end of
2030        subject unless noteol is set. */
2031    
2032        case OP_DOLL:
2033        if (md->noteol) MRRETURN(MATCH_NOMATCH);
2034        if (!md->endonly) goto ASSERT_NL_OR_EOS;
2035    
2036      /* ... else fall through for endonly */      /* ... else fall through for endonly */
2037    
# Line 1884  for (;;) Line 2182  for (;;)
2182      /* Fall through */      /* Fall through */
2183    
2184      case OP_ALLANY:      case OP_ALLANY:
2185      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2186        {        {                            /* not be updated before SCHECK_PARTIAL. */
2187        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2188        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2189        }        }
2190        eptr++;
2191      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2192      ecode++;      ecode++;
2193      break;      break;
# Line 1897  for (;;) Line 2196  for (;;)
2196      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
2197    
2198      case OP_ANYBYTE:      case OP_ANYBYTE:
2199      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2200        {        {                            /* not be updated before SCHECK_PARTIAL. */
2201        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2202        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2203        }        }
2204        eptr++;
2205      ecode++;      ecode++;
2206      break;      break;
2207    
# Line 2017  for (;;) Line 2317  for (;;)
2317      switch(c)      switch(c)
2318        {        {
2319        default: MRRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
2320    
2321        case 0x000d:        case 0x000d:
2322        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2323        break;        break;
# Line 2240  for (;;) Line 2541  for (;;)
2541        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2542        }        }
2543      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2544        if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH);
2545        while (eptr < md->end_subject)
2546        {        {
2547        int category = UCD_CATEGORY(c);        int len = 1;
2548        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);        if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2549        while (eptr < md->end_subject)        if (UCD_CATEGORY(c) != ucp_M) break;
2550          {        eptr += len;
         int len = 1;  
         if (!utf8) c = *eptr; else  
           {  
           GETCHARLEN(c, eptr, len);  
           }  
         category = UCD_CATEGORY(c);  
         if (category != ucp_M) break;  
         eptr += len;  
         }  
2551        }        }
2552      ecode++;      ecode++;
2553      break;      break;
# Line 2269  for (;;) Line 2563  for (;;)
2563      loops). */      loops). */
2564    
2565      case OP_REF:      case OP_REF:
2566        case OP_REFI:
2567        caseless = op == OP_REFI;
2568      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2569      ecode += 3;      ecode += 3;
2570    
# Line 2316  for (;;) Line 2612  for (;;)
2612        break;        break;
2613    
2614        default:               /* No repeat follows */        default:               /* No repeat follows */
2615        if ((length = match_ref(offset, eptr, length, md, ims)) < 0)        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2616          {          {
2617          CHECK_PARTIAL();          CHECK_PARTIAL();
2618          MRRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
# Line 2336  for (;;) Line 2632  for (;;)
2632    
2633      for (i = 1; i <= min; i++)      for (i = 1; i <= min; i++)
2634        {        {
2635        int slength;        int slength;
2636        if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2637          {          {
2638          CHECK_PARTIAL();          CHECK_PARTIAL();
2639          MRRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
# Line 2356  for (;;) Line 2652  for (;;)
2652        {        {
2653        for (fi = min;; fi++)        for (fi = min;; fi++)
2654          {          {
2655          int slength;          int slength;
2656          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2657          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2658          if (fi >= max) MRRETURN(MATCH_NOMATCH);          if (fi >= max) MRRETURN(MATCH_NOMATCH);
2659          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2660            {            {
2661            CHECK_PARTIAL();            CHECK_PARTIAL();
2662            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
# Line 2377  for (;;) Line 2673  for (;;)
2673        pp = eptr;        pp = eptr;
2674        for (i = min; i < max; i++)        for (i = min; i < max; i++)
2675          {          {
2676          int slength;          int slength;
2677          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2678            {            {
2679            CHECK_PARTIAL();            CHECK_PARTIAL();
2680            break;            break;
# Line 2387  for (;;) Line 2683  for (;;)
2683          }          }
2684        while (eptr >= pp)        while (eptr >= pp)
2685          {          {
2686          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2687          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2688          eptr -= length;          eptr -= length;
2689          }          }
# Line 2497  for (;;) Line 2793  for (;;)
2793            {            {
2794            for (fi = min;; fi++)            for (fi = min;; fi++)
2795              {              {
2796              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2797              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2798              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2799              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 2522  for (;;) Line 2818  for (;;)
2818            {            {
2819            for (fi = min;; fi++)            for (fi = min;; fi++)
2820              {              {
2821              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2822              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2823              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2824              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 2568  for (;;) Line 2864  for (;;)
2864              }              }
2865            for (;;)            for (;;)
2866              {              {
2867              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
2868              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2869              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2870              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2591  for (;;) Line 2887  for (;;)
2887              }              }
2888            while (eptr >= pp)            while (eptr >= pp)
2889              {              {
2890              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
2891              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2892              eptr--;              eptr--;
2893              }              }
# Line 2667  for (;;) Line 2963  for (;;)
2963          {          {
2964          for (fi = min;; fi++)          for (fi = min;; fi++)
2965            {            {
2966            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
2967            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2968            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2969            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 2700  for (;;) Line 2996  for (;;)
2996            }            }
2997          for(;;)          for(;;)
2998            {            {
2999            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3000            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3001            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3002            if (utf8) BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
# Line 2745  for (;;) Line 3041  for (;;)
3041    
3042      /* Match a single character, caselessly */      /* Match a single character, caselessly */
3043    
3044      case OP_CHARNC:      case OP_CHARI:
3045  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3046      if (utf8)      if (utf8)
3047        {        {
# Line 2805  for (;;) Line 3101  for (;;)
3101      /* Match a single character repeatedly. */      /* Match a single character repeatedly. */
3102    
3103      case OP_EXACT:      case OP_EXACT:
3104        case OP_EXACTI:
3105      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3106      ecode += 3;      ecode += 3;
3107      goto REPEATCHAR;      goto REPEATCHAR;
3108    
3109      case OP_POSUPTO:      case OP_POSUPTO:
3110        case OP_POSUPTOI:
3111      possessive = TRUE;      possessive = TRUE;
3112      /* Fall through */      /* Fall through */
3113    
3114      case OP_UPTO:      case OP_UPTO:
3115        case OP_UPTOI:
3116      case OP_MINUPTO:      case OP_MINUPTO:
3117        case OP_MINUPTOI:
3118      min = 0;      min = 0;
3119      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3120      minimize = *ecode == OP_MINUPTO;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3121      ecode += 3;      ecode += 3;
3122      goto REPEATCHAR;      goto REPEATCHAR;
3123    
3124      case OP_POSSTAR:      case OP_POSSTAR:
3125        case OP_POSSTARI:
3126      possessive = TRUE;      possessive = TRUE;
3127      min = 0;      min = 0;
3128      max = INT_MAX;      max = INT_MAX;
# Line 2829  for (;;) Line 3130  for (;;)
3130      goto REPEATCHAR;      goto REPEATCHAR;
3131    
3132      case OP_POSPLUS:      case OP_POSPLUS:
3133        case OP_POSPLUSI:
3134      possessive = TRUE;      possessive = TRUE;
3135      min = 1;      min = 1;
3136      max = INT_MAX;      max = INT_MAX;
# Line 2836  for (;;) Line 3138  for (;;)
3138      goto REPEATCHAR;      goto REPEATCHAR;
3139    
3140      case OP_POSQUERY:      case OP_POSQUERY:
3141        case OP_POSQUERYI:
3142      possessive = TRUE;      possessive = TRUE;
3143      min = 0;      min = 0;
3144      max = 1;      max = 1;
# Line 2843  for (;;) Line 3146  for (;;)
3146      goto REPEATCHAR;      goto REPEATCHAR;
3147    
3148      case OP_STAR:      case OP_STAR:
3149        case OP_STARI:
3150      case OP_MINSTAR:      case OP_MINSTAR:
3151        case OP_MINSTARI:
3152      case OP_PLUS:      case OP_PLUS:
3153        case OP_PLUSI:
3154      case OP_MINPLUS:      case OP_MINPLUS:
3155        case OP_MINPLUSI:
3156      case OP_QUERY:      case OP_QUERY:
3157        case OP_QUERYI:
3158      case OP_MINQUERY:      case OP_MINQUERY:
3159      c = *ecode++ - OP_STAR;      case OP_MINQUERYI:
3160        c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3161      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
   
3162      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
3163      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3164      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
# Line 2873  for (;;) Line 3181  for (;;)
3181          {          {
3182  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3183          unsigned int othercase;          unsigned int othercase;
3184          if ((ims & PCRE_CASELESS) != 0 &&          if (op >= OP_STARI &&     /* Caseless */
3185              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
3186            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
3187          else oclength = 0;          else oclength = 0;
# Line 2901  for (;;) Line 3209  for (;;)
3209            {            {
3210            for (fi = min;; fi++)            for (fi = min;; fi++)
3211              {              {
3212              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3213              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3214              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3215              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
# Line 2943  for (;;) Line 3251  for (;;)
3251    
3252            for(;;)            for(;;)
3253              {              {
3254              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3255              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3256              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
3257  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2980  for (;;) Line 3288  for (;;)
3288      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3289        max, eptr));        max, eptr));
3290    
3291      if ((ims & PCRE_CASELESS) != 0)      if (op >= OP_STARI)  /* Caseless */
3292        {        {
3293        fc = md->lcc[fc];        fc = md->lcc[fc];
3294        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
# Line 2997  for (;;) Line 3305  for (;;)
3305          {          {
3306          for (fi = min;; fi++)          for (fi = min;; fi++)
3307            {            {
3308            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3309            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3310            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3311            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 3027  for (;;) Line 3335  for (;;)
3335    
3336          while (eptr >= pp)          while (eptr >= pp)
3337            {            {
3338            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3339            eptr--;            eptr--;
3340            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3341            }            }
# Line 3056  for (;;) Line 3364  for (;;)
3364          {          {
3365          for (fi = min;; fi++)          for (fi = min;; fi++)
3366            {            {
3367            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3368            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3369            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3370            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 3085  for (;;) Line 3393  for (;;)
3393    
3394          while (eptr >= pp)          while (eptr >= pp)
3395            {            {
3396            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3397            eptr--;            eptr--;
3398            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3399            }            }
# Line 3098  for (;;) Line 3406  for (;;)
3406      checking can be multibyte. */      checking can be multibyte. */
3407    
3408      case OP_NOT:      case OP_NOT:
3409        case OP_NOTI:
3410      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
3411        {        {
3412        SCHECK_PARTIAL();        SCHECK_PARTIAL();
# Line 3105  for (;;) Line 3414  for (;;)
3414        }        }
3415      ecode++;      ecode++;
3416      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3417      if ((ims & PCRE_CASELESS) != 0)      if (op == OP_NOTI)         /* The caseless case */
3418        {        {
3419  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3420        if (c < 256)        if (c < 256)
# Line 3113  for (;;) Line 3422  for (;;)
3422        c = md->lcc[c];        c = md->lcc[c];
3423        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3424        }        }
3425      else      else    /* Caseful */
3426        {        {
3427        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3428        }        }
# Line 3127  for (;;) Line 3436  for (;;)
3436      about... */      about... */
3437    
3438      case OP_NOTEXACT:      case OP_NOTEXACT:
3439        case OP_NOTEXACTI:
3440      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3441      ecode += 3;      ecode += 3;
3442      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3443    
3444      case OP_NOTUPTO:      case OP_NOTUPTO:
3445        case OP_NOTUPTOI:
3446      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
3447        case OP_NOTMINUPTOI:
3448      min = 0;      min = 0;
3449      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3450      minimize = *ecode == OP_NOTMINUPTO;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3451      ecode += 3;      ecode += 3;
3452      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3453    
3454      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
3455        case OP_NOTPOSSTARI:
3456      possessive = TRUE;      possessive = TRUE;
3457      min = 0;      min = 0;
3458      max = INT_MAX;      max = INT_MAX;
# Line 3147  for (;;) Line 3460  for (;;)
3460      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3461    
3462      case OP_NOTPOSPLUS:      case OP_NOTPOSPLUS:
3463        case OP_NOTPOSPLUSI:
3464      possessive = TRUE;      possessive = TRUE;
3465      min = 1;      min = 1;
3466      max = INT_MAX;      max = INT_MAX;
# Line 3154  for (;;) Line 3468  for (;;)
3468      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3469    
3470      case OP_NOTPOSQUERY:      case OP_NOTPOSQUERY:
3471        case OP_NOTPOSQUERYI:
3472      possessive = TRUE;      possessive = TRUE;
3473      min = 0;      min = 0;
3474      max = 1;      max = 1;
# Line 3161  for (;;) Line 3476  for (;;)
3476      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3477    
3478      case OP_NOTPOSUPTO:      case OP_NOTPOSUPTO:
3479        case OP_NOTPOSUPTOI:
3480      possessive = TRUE;      possessive = TRUE;
3481      min = 0;      min = 0;
3482      max = GET2(ecode, 1);      max = GET2(ecode, 1);
# Line 3168  for (;;) Line 3484  for (;;)
3484      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3485    
3486      case OP_NOTSTAR:      case OP_NOTSTAR:
3487        case OP_NOTSTARI:
3488      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
3489        case OP_NOTMINSTARI:
3490      case OP_NOTPLUS:      case OP_NOTPLUS:
3491        case OP_NOTPLUSI:
3492      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
3493        case OP_NOTMINPLUSI:
3494      case OP_NOTQUERY:      case OP_NOTQUERY:
3495        case OP_NOTQUERYI:
3496      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
3497      c = *ecode++ - OP_NOTSTAR;      case OP_NOTMINQUERYI:
3498        c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3499      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
3500      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
3501      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
# Line 3195  for (;;) Line 3517  for (;;)
3517      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3518        max, eptr));        max, eptr));
3519    
3520      if ((ims & PCRE_CASELESS) != 0)      if (op >= OP_NOTSTARI)     /* Caseless */
3521        {        {
3522        fc = md->lcc[fc];        fc = md->lcc[fc];
3523    
# Line 3243  for (;;) Line 3565  for (;;)
3565            register unsigned int d;            register unsigned int d;
3566            for (fi = min;; fi++)            for (fi = min;; fi++)
3567              {              {
3568              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3569              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3570              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3571              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 3262  for (;;) Line 3584  for (;;)
3584            {            {
3585            for (fi = min;; fi++)            for (fi = min;; fi++)
3586              {              {
3587              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3588              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3589              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3590              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 3303  for (;;) Line 3625  for (;;)
3625          if (possessive) continue;          if (possessive) continue;
3626          for(;;)          for(;;)
3627              {              {
3628              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3629              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3630              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3631              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 3326  for (;;) Line 3648  for (;;)
3648            if (possessive) continue;            if (possessive) continue;
3649            while (eptr >= pp)            while (eptr >= pp)
3650              {              {
3651              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3652              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3653              eptr--;              eptr--;
3654              }              }
# Line 3383  for (;;) Line 3705  for (;;)
3705            register unsigned int d;            register unsigned int d;
3706            for (fi = min;; fi++)            for (fi = min;; fi++)
3707              {              {
3708              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3709              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3710              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3711              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 3401  for (;;) Line 3723  for (;;)
3723            {            {
3724            for (fi = min;; fi++)            for (fi = min;; fi++)
3725              {              {
3726              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3727              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3728              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3729              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 3441  for (;;) Line 3763  for (;;)
3763            if (possessive) continue;            if (possessive) continue;
3764            for(;;)            for(;;)
3765              {              {
3766              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
3767              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3768              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3769              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 3464  for (;;) Line 3786  for (;;)
3786            if (possessive) continue;            if (possessive) continue;
3787            while (eptr >= pp)            while (eptr >= pp)
3788              {              {
3789              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
3790              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3791              eptr--;              eptr--;
3792              }              }
# Line 3579  for (;;) Line 3901  for (;;)
3901            case PT_LAMP:            case PT_LAMP:
3902            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3903              {              {
3904                int chartype;
3905              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3906                {                {
3907                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3908                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3909                }                }
3910              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3911              prop_chartype = UCD_CHARTYPE(c);              chartype = UCD_CHARTYPE(c);
3912              if ((prop_chartype == ucp_Lu ||              if ((chartype == ucp_Lu ||
3913                   prop_chartype == ucp_Ll ||                   chartype == ucp_Ll ||
3914                   prop_chartype == ucp_Lt) == prop_fail_result)                   chartype == ucp_Lt) == prop_fail_result)
3915                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3916              }              }
3917            break;            break;
# Line 3602  for (;;) Line 3925  for (;;)
3925                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3926                }                }
3927              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3928              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
             if ((prop_category == prop_value) == prop_fail_result)  
3929                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3930              }              }
3931            break;            break;
# Line 3617  for (;;) Line 3939  for (;;)
3939                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3940                }                }
3941              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3942              prop_chartype = UCD_CHARTYPE(c);              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
             if ((prop_chartype == prop_value) == prop_fail_result)  
3943                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3944              }              }
3945            break;            break;
# Line 3632  for (;;) Line 3953  for (;;)
3953                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3954                }                }
3955              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3956              prop_script = UCD_SCRIPT(c);              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
             if ((prop_script == prop_value) == prop_fail_result)  
3957                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3958              }              }
3959            break;            break;
# Line 3641  for (;;) Line 3961  for (;;)
3961            case PT_ALNUM:            case PT_ALNUM:
3962            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3963              {              {
3964                int category;
3965              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3966                {                {
3967                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3968                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3969                }                }
3970              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3971              prop_category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
3972              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                    == prop_fail_result)  
3973                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3974              }              }
3975            break;            break;
# Line 3663  for (;;) Line 3983  for (;;)
3983                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3984                }                }
3985              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3986              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
3987                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
3988                     == prop_fail_result)                     == prop_fail_result)
3989                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 3680  for (;;) Line 3999  for (;;)
3999                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4000                }                }
4001              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4002              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
4003                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4004                     == prop_fail_result)                     == prop_fail_result)
4005                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 3691  for (;;) Line 4009  for (;;)
4009            case PT_WORD:            case PT_WORD:
4010            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4011              {              {
4012                int category;
4013              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4014                {                {
4015                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4016                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4017                }                }
4018              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4019              prop_category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
4020              if ((prop_category == ucp_L || prop_category == ucp_N ||              if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
                  c == CHAR_UNDERSCORE)  
4021                     == prop_fail_result)                     == prop_fail_result)
4022                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4023              }              }
# Line 3725  for (;;) Line 4043  for (;;)
4043              MRRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4044              }              }
4045            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4046            prop_category = UCD_CATEGORY(c);            if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH);
           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);  
4047            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4048              {              {
4049              int len = 1;              int len = 1;
4050              if (!utf8) c = *eptr;              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4051                else { GETCHARLEN(c, eptr, len); }              if (UCD_CATEGORY(c) != ucp_M) break;
             prop_category = UCD_CATEGORY(c);  
             if (prop_category != ucp_M) break;  
4052              eptr += len;              eptr += len;
4053              }              }
4054            }            }
# Line 3791  for (;;) Line 4106  for (;;)
4106            switch(c)            switch(c)
4107              {              {
4108              default: MRRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4109    
4110              case 0x000d:              case 0x000d:
4111              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4112              break;              break;
# Line 4067  for (;;) Line 4383  for (;;)
4383            switch(*eptr++)            switch(*eptr++)
4384              {              {
4385              default: MRRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4386    
4387              case 0x000d:              case 0x000d:
4388              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4389              break;              break;
4390    
4391              case 0x000a:              case 0x000a:
4392              break;              break;
4393    
# Line 4259  for (;;) Line 4577  for (;;)
4577            case PT_ANY:            case PT_ANY:
4578            for (fi = min;; fi++)            for (fi = min;; fi++)
4579              {              {
4580              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4581              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4582              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4583              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4275  for (;;) Line 4593  for (;;)
4593            case PT_LAMP:            case PT_LAMP:
4594            for (fi = min;; fi++)            for (fi = min;; fi++)
4595              {              {
4596              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);              int chartype;
4597                RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4598              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4599              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4600              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4284  for (;;) Line 4603  for (;;)
4603                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4604                }                }
4605              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4606              prop_chartype = UCD_CHARTYPE(c);              chartype = UCD_CHARTYPE(c);
4607              if ((prop_chartype == ucp_Lu ||              if ((chartype == ucp_Lu ||
4608                   prop_chartype == ucp_Ll ||                   chartype == ucp_Ll ||
4609                   prop_chartype == ucp_Lt) == prop_fail_result)                   chartype == ucp_Lt) == prop_fail_result)
4610                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4611              }              }
4612            /* Control never gets here */            /* Control never gets here */
# Line 4295  for (;;) Line 4614  for (;;)
4614            case PT_GC:            case PT_GC:
4615            for (fi = min;; fi++)            for (fi = min;; fi++)
4616              {              {
4617              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4618              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4619              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4620              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4304  for (;;) Line 4623  for (;;)
4623                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4624                }                }
4625              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4626              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
             if ((prop_category == prop_value) == prop_fail_result)  
4627                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4628              }              }
4629            /* Control never gets here */            /* Control never gets here */
# Line 4313  for (;;) Line 4631  for (;;)
4631            case PT_PC:            case PT_PC:
4632            for (fi = min;; fi++)            for (fi = min;; fi++)
4633              {              {
4634              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4635              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4636              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4637              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4322  for (;;) Line 4640  for (;;)
4640                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4641                }                }
4642              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4643              prop_chartype = UCD_CHARTYPE(c);              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
             if ((prop_chartype == prop_value) == prop_fail_result)  
4644                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4645              }              }
4646            /* Control never gets here */            /* Control never gets here */
# Line 4331  for (;;) Line 4648  for (;;)
4648            case PT_SC:            case PT_SC:
4649            for (fi = min;; fi++)            for (fi = min;; fi++)
4650              {              {
4651              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4652              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4653              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4654              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4340  for (;;) Line 4657  for (;;)
4657                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4658                }                }
4659              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4660              prop_script = UCD_SCRIPT(c);              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
             if ((prop_script == prop_value) == prop_fail_result)  
4661                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4662              }              }
4663            /* Control never gets here */            /* Control never gets here */
# Line 4349  for (;;) Line 4665  for (;;)
4665            case PT_ALNUM:            case PT_ALNUM:
4666            for (fi = min;; fi++)            for (fi = min;; fi++)
4667              {              {
4668              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);              int category;
4669                RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4670              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4671              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4672              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4358  for (;;) Line 4675  for (;;)
4675                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4676                }                }
4677              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4678              prop_category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
4679              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                    == prop_fail_result)  
4680                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4681              }              }
4682            /* Control never gets here */            /* Control never gets here */
# Line 4368  for (;;) Line 4684  for (;;)
4684            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
4685            for (fi = min;; fi++)            for (fi = min;; fi++)
4686              {              {
4687              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
4688              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4689              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4690              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4377  for (;;) Line 4693  for (;;)
4693                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4694                }                }
4695              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4696              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
4697                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
4698                     == prop_fail_result)                     == prop_fail_result)
4699                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4388  for (;;) Line 4703  for (;;)
4703            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
4704            for (fi = min;; fi++)            for (fi = min;; fi++)
4705              {              {
4706              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
4707              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4708              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4709              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4397  for (;;) Line 4712  for (;;)
4712                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4713                }                }
4714              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4715              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
4716                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4717                     == prop_fail_result)                     == prop_fail_result)
4718                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4408  for (;;) Line 4722  for (;;)
4722            case PT_WORD:            case PT_WORD:
4723            for (fi = min;; fi++)            for (fi = min;; fi++)
4724              {              {
4725              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);              int category;
4726                RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
4727              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4728              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4729              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4417  for (;;) Line 4732  for (;;)
4732                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4733                }                }
4734              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4735              prop_category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
4736              if ((prop_category == ucp_L ||              if ((category == ucp_L ||
4737                   prop_category == ucp_N ||                   category == ucp_N ||
4738                   c == CHAR_UNDERSCORE)                   c == CHAR_UNDERSCORE)
4739                     == prop_fail_result)                     == prop_fail_result)
4740                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4440  for (;;) Line 4755  for (;;)
4755          {          {
4756          for (fi = min;; fi++)          for (fi = min;; fi++)
4757            {            {
4758            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
4759            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4760            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4761            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 4449  for (;;) Line 4764  for (;;)
4764              MRRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4765              }              }
4766            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4767            prop_category = UCD_CATEGORY(c);            if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH);
           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);  
4768            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4769              {              {
4770              int len = 1;              int len = 1;
4771              if (!utf8) c = *eptr;              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4772                else { GETCHARLEN(c, eptr, len); }              if (UCD_CATEGORY(c) != ucp_M) break;
             prop_category = UCD_CATEGORY(c);  
             if (prop_category != ucp_M) break;  
4773              eptr += len;              eptr += len;
4774              }              }
4775            }            }
4776          }          }
   
4777        else        else
4778  #endif     /* SUPPORT_UCP */  #endif     /* SUPPORT_UCP */
4779    
# Line 4472  for (;;) Line 4783  for (;;)
4783          {          {
4784          for (fi = min;; fi++)          for (fi = min;; fi++)
4785            {            {
4786            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
4787            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4788            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4789            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 4635  for (;;) Line 4946  for (;;)
4946          {          {
4947          for (fi = min;; fi++)          for (fi = min;; fi++)
4948            {            {
4949            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
4950            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4951            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4952            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 4783  for (;;) Line 5094  for (;;)
5094            case PT_LAMP:            case PT_LAMP:
5095            for (i = min; i < max; i++)            for (i = min; i < max; i++)
5096              {              {
5097                int chartype;
5098              int len = 1;              int len = 1;
5099              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
5100                {                {
# Line 4790  for (;;) Line 5102  for (;;)
5102                break;                break;
5103                }                }
5104              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5105              prop_chartype = UCD_CHARTYPE(c);              chartype = UCD_CHARTYPE(c);
5106              if ((prop_chartype == ucp_Lu ||              if ((chartype == ucp_Lu ||
5107                   prop_chartype == ucp_Ll ||                   chartype == ucp_Ll ||
5108                   prop_chartype == ucp_Lt) == prop_fail_result)                   chartype == ucp_Lt) == prop_fail_result)
5109                break;                break;
5110              eptr+= len;              eptr+= len;
5111              }              }
# Line 4809  for (;;) Line 5121  for (;;)
5121                break;                break;
5122                }                }
5123              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5124              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
             if ((prop_category == prop_value) == prop_fail_result)  
               break;  
5125              eptr+= len;              eptr+= len;
5126              }              }
5127            break;            break;
# Line 4826  for (;;) Line 5136  for (;;)
5136                break;                break;
5137                }                }
5138              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5139              prop_chartype = UCD_CHARTYPE(c);              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
             if ((prop_chartype == prop_value) == prop_fail_result)  
               break;  
5140              eptr+= len;              eptr+= len;
5141              }              }
5142            break;            break;
# Line 4843  for (;;) Line 5151  for (;;)
5151                break;                break;
5152                }                }
5153              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5154              prop_script = UCD_SCRIPT(c);              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
             if ((prop_script == prop_value) == prop_fail_result)  
               break;  
5155              eptr+= len;              eptr+= len;
5156              }              }
5157            break;            break;
# Line 4853  for (;;) Line 5159  for (;;)
5159            case PT_ALNUM:            case PT_ALNUM:
5160            for (i = min; i < max; i++)            for (i = min; i < max; i++)
5161              {              {
5162                int category;
5163              int len = 1;              int len = 1;
5164              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
5165                {                {
# Line 4860  for (;;) Line 5167  for (;;)
5167                break;                break;
5168                }                }
5169              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5170              prop_category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
5171              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                  == prop_fail_result)  
5172                break;                break;
5173              eptr+= len;              eptr+= len;
5174              }              }
# Line 4878  for (;;) Line 5184  for (;;)
5184                break;                break;
5185                }                }
5186              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5187              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
5188                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
5189                   == prop_fail_result)                   == prop_fail_result)
5190                break;                break;
# Line 4897  for (;;) Line 5202  for (;;)
5202                break;                break;
5203                }                }
5204              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5205              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
5206                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
5207                   == prop_fail_result)                   == prop_fail_result)
5208                break;                break;
# Line 4909  for (;;) Line 5213  for (;;)
5213            case PT_WORD:            case PT_WORD:
5214            for (i = min; i < max; i++)            for (i = min; i < max; i++)
5215              {              {
5216                int category;
5217              int len = 1;              int len = 1;
5218              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
5219                {                {
# Line 4916  for (;;) Line 5221  for (;;)
5221                break;                break;
5222                }                }
5223              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5224              prop_category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
5225              if ((prop_category == ucp_L || prop_category == ucp_N ||              if ((category == ucp_L || category == ucp_N ||
5226                   c == CHAR_UNDERSCORE) == prop_fail_result)                   c == CHAR_UNDERSCORE) == prop_fail_result)
5227                break;                break;
5228              eptr+= len;              eptr+= len;
# Line 4933  for (;;) Line 5238  for (;;)
5238          if (possessive) continue;          if (possessive) continue;
5239          for(;;)          for(;;)
5240            {            {
5241            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5242            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5243            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5244            if (utf8) BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
# Line 4947  for (;;) Line 5252  for (;;)
5252          {          {
5253          for (i = min; i < max; i++)          for (i = min; i < max; i++)
5254            {            {
5255              int len = 1;
5256            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
5257              {              {
5258              SCHECK_PARTIAL();              SCHECK_PARTIAL();
5259              break;              break;
5260              }              }
5261            GETCHARINCTEST(c, eptr);            if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5262            prop_category = UCD_CATEGORY(c);            if (UCD_CATEGORY(c) == ucp_M) break;
5263            if (prop_category == ucp_M) break;            eptr += len;
5264            while (eptr < md->end_subject)            while (eptr < md->end_subject)
5265              {              {
5266              int len = 1;              len = 1;
5267              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5268                {              if (UCD_CATEGORY(c) != ucp_M) break;
               GETCHARLEN(c, eptr, len);  
               }  
             prop_category = UCD_CATEGORY(c);  
             if (prop_category != ucp_M) break;  
5269              eptr += len;              eptr += len;
5270              }              }
5271            }            }
# Line 4974  for (;;) Line 5276  for (;;)
5276    
5277          for(;;)          for(;;)
5278            {            {
5279            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5280            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5281            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5282            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
5283              {              {
             int len = 1;  
5284              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr; else
5285                {                {
5286                BACKCHAR(eptr);                BACKCHAR(eptr);
5287                GETCHARLEN(c, eptr, len);                GETCHAR(c, eptr);
5288                }                }
5289              prop_category = UCD_CATEGORY(c);              if (UCD_CATEGORY(c) != ucp_M) break;
             if (prop_category != ucp_M) break;  
5290              eptr--;              eptr--;
5291              }              }
5292            }            }
# Line 5050  for (;;) Line 5350  for (;;)
5350                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5351                }                }
5352              }              }
5353            else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */            else
5354                {
5355                eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5356                SCHECK_PARTIAL();
5357                }
5358            break;            break;
5359    
5360            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 5258  for (;;) Line 5562  for (;;)
5562            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5563            }            }
5564    
5565          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run. If possessive, we are
5566            done (no backing up). Otherwise, match at this position; anything other
5567            than no match is immediately returned. For nomatch, back up one
5568            character, unless we are matching \R and the last thing matched was
5569            \r\n, in which case, back up two bytes. */
5570    
5571          if (possessive) continue;          if (possessive) continue;
5572          for(;;)          for(;;)
5573            {            {
5574            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5575            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5576            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5577            BACKCHAR(eptr);            BACKCHAR(eptr);
5578              if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
5579                  eptr[-1] == '\r') eptr--;
5580            }            }
5581          }          }
5582        else        else
# Line 5465  for (;;) Line 5775  for (;;)
5775            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5776            }            }
5777    
5778          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run. If possessive, we are
5779            done (no backing up). Otherwise, match at this position; anything other
5780            than no match is immediately returned. For nomatch, back up one
5781            character (byte), unless we are matching \R and the last thing matched
5782            was \r\n, in which case, back up two bytes. */
5783    
5784          if (possessive) continue;          if (possessive) continue;
5785          while (eptr >= pp)          while (eptr >= pp)
5786            {            {
5787            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
           eptr--;  
5788            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5789              eptr--;
5790              if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
5791                  eptr[-1] == '\r') eptr--;
5792            }            }
5793          }          }
5794    
# Line 5511  switch (frame->Xwhere) Line 5827  switch (frame->Xwhere)
5827    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5828    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5829    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5830    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
5831      LBL(65) LBL(66)
5832  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5833    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5834    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
# Line 5540  Undefine all the macros that were define Line 5857  Undefine all the macros that were define
5857  #undef ecode  #undef ecode
5858  #undef mstart  #undef mstart
5859  #undef offset_top  #undef offset_top
 #undef ims  
5860  #undef eptrb  #undef eptrb
5861  #undef flags  #undef flags
5862    
# Line 5558  Undefine all the macros that were define Line 5874  Undefine all the macros that were define
5874  #undef condition  #undef condition
5875  #undef prev_is_word  #undef prev_is_word
5876    
 #undef original_ims  
   
5877  #undef ctype  #undef ctype
5878  #undef length  #undef length
5879  #undef max  #undef max
# Line 5616  pcre_exec(const pcre *argument_re, const Line 5930  pcre_exec(const pcre *argument_re, const
5930    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5931    int offsetcount)    int offsetcount)
5932  {  {
5933  int rc, resetcount, ocount;  int rc, ocount, arg_offset_max;
5934  int first_byte = -1;  int first_byte = -1;
5935  int req_byte = -1;  int req_byte = -1;
5936  int req_byte2 = -1;  int req_byte2 = -1;
5937  int newline;  int newline;
 unsigned long int ims;  
5938  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
5939  BOOL anchored;  BOOL anchored;
5940  BOOL startline;  BOOL startline;
# Line 5653  if (re == NULL || subject == NULL || Line 5966  if (re == NULL || subject == NULL ||
5966  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5967  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5968    
5969  /* This information is for finding all the numbers associated with a given  /* These two settings are used in the code for checking a UTF-8 string that
5970  name, for condition testing. */  follows immediately afterwards. Other values in the md block are used only
5971    during "normal" pcre_exec() processing, not when the JIT support is in use,
5972    so they are set up later. */
5973    
5974    utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5975    md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5976                  ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5977    
5978    /* Check a UTF-8 string if required. Pass back the character offset and error
5979    code for an invalid string if a results vector is available. */
5980    
5981    #ifdef SUPPORT_UTF8
5982    if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5983      {
5984      int erroroffset;
5985      int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
5986      if (errorcode != 0)
5987        {
5988        if (offsetcount >= 2)
5989          {
5990          offsets[0] = erroroffset;
5991          offsets[1] = errorcode;
5992          }
5993        return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
5994          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5995        }
5996    
5997      /* Check that a start_offset points to the start of a UTF-8 character. */
5998      if (start_offset > 0 && start_offset < length &&
5999          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
6000        return PCRE_ERROR_BADUTF8_OFFSET;
6001      }
6002    #endif
6003    
6004    /* If the pattern was successfully studied with JIT support, run the JIT
6005    executable instead of the rest of this function. Most options must be set at
6006    compile time for the JIT code to be usable. Fallback to the normal code path if
6007    an unsupported flag is set. In particular, JIT does not support partial
6008    matching. */
6009    
6010    #ifdef SUPPORT_JIT
6011    if (extra_data != NULL
6012        && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
6013        && extra_data->executable_jit != NULL
6014        && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6015                        PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
6016      return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,
6017        start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
6018        ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
6019    #endif
6020    
6021    /* Carry on with non-JIT matching. This information is for finding all the
6022    numbers associated with a given name, for condition testing. */
6023    
6024  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (uschar *)re + re->name_table_offset;
6025  md->name_count = re->name_count;  md->name_count = re->name_count;
# Line 5721  md->end_subject = md->start_subject + le Line 6086  md->end_subject = md->start_subject + le
6086  end_subject = md->end_subject;  end_subject = md->end_subject;
6087    
6088  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  
6089  md->use_ucp = (re->options & PCRE_UCP) != 0;  md->use_ucp = (re->options & PCRE_UCP) != 0;
6090  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6091    
6092    /* Some options are unpacked into BOOL variables in the hope that testing
6093    them will be faster than individual option bits. */
6094    
6095  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
6096  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
6097  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
6098  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6099  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  
               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;  
6100  md->hitend = FALSE;  md->hitend = FALSE;
6101  md->mark = NULL;                        /* In case never set */  md->mark = NULL;                        /* In case never set */
6102    
6103  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
6104    md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6105    
6106  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
6107  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
# Line 5812  defined (though never set). So there's n Line 6179  defined (though never set). So there's n
6179  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6180    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
6181    
 /* Check a UTF-8 string if required. Pass back the character offset and error  
 code if a results vector is available. */  
   
 #ifdef SUPPORT_UTF8  
 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  
   {  
   int errorcode;  
   int tb = _pcre_valid_utf8((USPTR)subject, length, &errorcode);  
   if (tb >= 0)  
     {  
     if (offsetcount >= 2)  
       {  
       offsets[0] = tb;  
       offsets[1] = errorcode;  
       }  
     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?  
       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;  
     }  
   if (start_offset > 0 && start_offset < length)  
     {  
     tb = ((USPTR)subject)[start_offset] & 0xc0;  
     if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;  
     }  
   }  
 #endif  
   
 /* The ims options can vary during the matching as a result of the presence  
 of (?ims) items in the pattern. They are kept in a local variable so that  
 restoring at the exit of a group is easy. */  
   
 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);  
   
6182  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
6183  hold, we get a temporary chunk of working store to use during the matching.  hold, we get a temporary chunk of working store to use during the matching.
6184  Otherwise, we can use the vector supplied, rounding down its size to a multiple  Otherwise, we can use the vector supplied, rounding down its size to a multiple
6185  of 3. */  of 3. */
6186    
6187  ocount = offsetcount - (offsetcount % 3);  ocount = offsetcount - (offsetcount % 3);
6188    arg_offset_max = (2*ocount)/3;
6189    
6190  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
6191    {    {
# Line 5866  md->offset_max = (2*ocount)/3; Line 6202  md->offset_max = (2*ocount)/3;
6202  md->offset_overflow = FALSE;  md->offset_overflow = FALSE;
6203  md->capture_last = -1;  md->capture_last = -1;
6204    
 /* Compute the minimum number of offsets that we need to reset each time. Doing  
 this makes a huge difference to execution time when there aren't many brackets  
 in the pattern. */  
   
 resetcount = 2 + re->top_bracket * 2;  
 if (resetcount > offsetcount) resetcount = ocount;  
   
6205  /* Reset the working variable associated with each extraction. These should  /* Reset the working variable associated with each extraction. These should
6206  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
6207  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. Also, unset the
6208    offsets for the matched string. This is really just for tidiness with callouts,
6209    in case they inspect these fields. */
6210    
6211  if (md->offset_vector != NULL)  if (md->offset_vector != NULL)
6212    {    {
6213    register int *iptr = md->offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
6214    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - re->top_bracket;
6215      if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6216    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
6217      md->offset_vector[0] = md->offset_vector[1] = -1;
6218    }    }
6219    
6220  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 5915  if ((re->flags & PCRE_REQCHSET) != 0) Line 6248  if ((re->flags & PCRE_REQCHSET) != 0)
6248    }    }
6249    
6250    
6251    
6252    
6253  /* ==========================================================================*/  /* ==========================================================================*/
6254    
6255  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
# Line 5925  for(;;) Line 6260  for(;;)
6260    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
6261    USPTR new_start_match;    USPTR new_start_match;
6262    
   /* Reset the maximum number of extractions we might see. */  
   
   if (md->offset_vector != NULL)  
     {  
     register int *iptr = md->offset_vector;  
     register int *iend = iptr + resetcount;  
     while (iptr < iend) *iptr++ = -1;  
     }  
   
6263    /* If firstline is TRUE, the start of the match is constrained to the first    /* If firstline is TRUE, the start of the match is constrained to the first
6264    line of a multiline string. That is, the match must be before or at the first    line of a multiline string. That is, the match must be before or at the first
6265    newline. Implement this by temporarily adjusting end_subject so that we stop    newline. Implement this by temporarily adjusting end_subject so that we stop
# Line 6039  for(;;) Line 6365  for(;;)
6365    /* The following two optimizations are disabled for partial matching or if    /* The following two optimizations are disabled for partial matching or if
6366    disabling is explicitly requested. */    disabling is explicitly requested. */
6367    
6368    if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6369      {      {
6370      /* If the pattern was studied, a minimum subject length may be set. This is      /* If the pattern was studied, a minimum subject length may be set. This is
6371      a lower bound; no actual string of that length may actually match the      a lower bound; no actual string of that length may actually match the
# Line 6122  for(;;) Line 6448  for(;;)
6448    md->start_match_ptr = start_match;    md->start_match_ptr = start_match;
6449    md->start_used_ptr = start_match;    md->start_used_ptr = start_match;
6450    md->match_call_count = 0;    md->match_call_count = 0;
6451    rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,    md->match_function_type = 0;
6452      0, 0);    md->end_offset_top = 0;
6453      rc = match(start_match, md->start_code, start_match, NULL, 2, md, NULL, 0);
6454    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6455    
6456    switch(rc)    switch(rc)
# Line 6233  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6560  if (rc == MATCH_MATCH || rc == MATCH_ACC
6560    {    {
6561    if (using_temporary_offsets)    if (using_temporary_offsets)
6562      {      {
6563      if (offsetcount >= 4)      if (arg_offset_max >= 4)
6564        {        {
6565        memcpy(offsets + 2, md->offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
6566          (offsetcount - 2) * sizeof(int));          (arg_offset_max - 2) * sizeof(int));
6567        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
6568        }        }
6569      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
6570      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
6571      (pcre_free)(md->offset_vector);      (pcre_free)(md->offset_vector);
6572      }      }
6573    
6574    /* Set the return code to the number of captured strings, or 0 if there are    /* Set the return code to the number of captured strings, or 0 if there were
6575    too many to fit into the vector. */    too many to fit into the vector. */
6576    
6577    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
6578        0 : md->end_offset_top/2;
6579    
6580      /* If there is space in the offset vector, set any unused pairs at the end of
6581      the pattern to -1 for backwards compatibility. It is documented that this
6582      happens. In earlier versions, the whole set of potential capturing offsets
6583      was set to -1 each time round the loop, but this is handled differently now.
6584      "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
6585      those at the end that need unsetting here. We can't just unset them all at
6586      the start of the whole thing because they may get set in one branch that is
6587      not the final matching branch. */
6588    
6589      if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
6590        {
6591        register int *iptr, *iend;
6592        int resetcount = 2 + re->top_bracket * 2;
6593        if (resetcount > offsetcount) resetcount = ocount;
6594        iptr = offsets + md->end_offset_top;
6595        iend = offsets + resetcount;
6596        while (iptr < iend) *iptr++ = -1;
6597        }
6598    
6599    /* If there is space, set up the whole thing as substring 0. The value of    /* If there is space, set up the whole thing as substring 0. The value of
6600    md->start_match_ptr might be modified if \K was encountered on the success    md->start_match_ptr might be modified if \K was encountered on the success

Legend:
Removed from v.598  
changed lines
  Added in v.733

  ViewVC Help
Powered by ViewVC 1.1.5