/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 595 by ph10, Mon May 2 10:33:29 2011 UTC revision 614 by ph10, Sat Jul 9 10:48:16 2011 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2010 University of Cambridge             Copyright (c) 1997-2011 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 57  possible. There are also some static sup Line 57  possible. There are also some static sup
57  #undef min  #undef min
58  #undef max  #undef max
59    
60  /* Flag bits for the match() function */  /* Values for setting in md->match_function_type to indicate two special types
61    of call to match(). We do it this way to save on using another stack variable,
62    as stack usage is to be discouraged. */
63    
64  #define match_condassert     0x01  /* Called to check a condition assertion */  #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
65  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
66    
67  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
68  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 73  negative to avoid the external error cod Line 75  negative to avoid the external error cod
75    
76  #define MATCH_ACCEPT       (-999)  #define MATCH_ACCEPT       (-999)
77  #define MATCH_COMMIT       (-998)  #define MATCH_COMMIT       (-998)
78  #define MATCH_PRUNE        (-997)  #define MATCH_KETRPOS      (-997)
79  #define MATCH_SKIP         (-996)  #define MATCH_PRUNE        (-996)
80  #define MATCH_SKIP_ARG     (-995)  #define MATCH_SKIP         (-995)
81  #define MATCH_THEN         (-994)  #define MATCH_SKIP_ARG     (-994)
82    #define MATCH_THEN         (-993)
83    
84  /* This is a convenience macro for code that occurs many times. */  /* This is a convenience macro for code that occurs many times. */
85    
# Line 142  Arguments: Line 145  Arguments:
145    eptr        pointer into the subject    eptr        pointer into the subject
146    length      length of reference to be matched (number of bytes)    length      length of reference to be matched (number of bytes)
147    md          points to match data block    md          points to match data block
148    ims         the ims flags    caseless    TRUE if caseless
149    
150  Returns:      < 0 if not matched, otherwise the number of subject bytes matched  Returns:      < 0 if not matched, otherwise the number of subject bytes matched
151  */  */
152    
153  static int  static int
154  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
155    unsigned long int ims)    BOOL caseless)
156  {  {
157  USPTR eptr_start = eptr;  USPTR eptr_start = eptr;
158  register USPTR p = md->start_subject + md->offset_vector[offset];  register USPTR p = md->start_subject + md->offset_vector[offset];
# Line 175  if (length < 0) return -1; Line 178  if (length < 0) return -1;
178  properly if Unicode properties are supported. Otherwise, we can check only  properly if Unicode properties are supported. Otherwise, we can check only
179  ASCII characters. */  ASCII characters. */
180    
181  if ((ims & PCRE_CASELESS) != 0)  if (caseless)
182    {    {
183  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
184  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 193  if ((ims & PCRE_CASELESS) != 0) Line 196  if ((ims & PCRE_CASELESS) != 0)
196      while (p < endptr)      while (p < endptr)
197        {        {
198        int c, d;        int c, d;
199          if (eptr >= md->end_subject) return -1;
200        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
201        GETCHARINC(d, p);        GETCHARINC(d, p);
202        if (c != d && c != UCD_OTHERCASE(d)) return -1;        if (c != d && c != UCD_OTHERCASE(d)) return -1;
# Line 204  if ((ims & PCRE_CASELESS) != 0) Line 208  if ((ims & PCRE_CASELESS) != 0)
208    
209    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
210    is no UCP support. */    is no UCP support. */
211        {
212    while (length-- > 0)      if (eptr + length > md->end_subject) return -1;
213      { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }      while (length-- > 0)
214          { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
215        }
216    }    }
217    
218  /* In the caseful case, we can just compare the bytes, whether or not we  /* In the caseful case, we can just compare the bytes, whether or not we
219  are in UTF-8 mode. */  are in UTF-8 mode. */
220    
221  else  else
222    { while (length-- > 0) if (*p++ != *eptr++) return -1; }    {
223      if (eptr + length > md->end_subject) return -1;
224      while (length-- > 0) if (*p++ != *eptr++) return -1;
225      }
226    
227  return eptr - eptr_start;  return eptr - eptr_start;
228  }  }
# Line 267  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 276  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
276         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
277         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
278         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
279         RM61,  RM62 };         RM61,  RM62, RM63};
280    
281  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
282  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 277  actually used in this definition. */ Line 286  actually used in this definition. */
286  #define REGISTER register  #define REGISTER register
287    
288  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
289  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rw) \
290    { \    { \
291    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
292    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1); \
293    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
294    }    }
295  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 289  actually used in this definition. */ Line 298  actually used in this definition. */
298    return ra; \    return ra; \
299    }    }
300  #else  #else
301  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rw) \
302    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rdepth+1)
303  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
304  #endif  #endif
305    
# Line 303  argument of match(), which never changes Line 312  argument of match(), which never changes
312    
313  #define REGISTER  #define REGISTER
314    
315  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\  #define RMATCH(ra,rb,rc,rd,re,rw)\
316    {\    {\
317    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
318    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
# Line 313  argument of match(), which never changes Line 322  argument of match(), which never changes
322    newframe->Xmstart = mstart;\    newframe->Xmstart = mstart;\
323    newframe->Xmarkptr = markptr;\    newframe->Xmarkptr = markptr;\
324    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
325    newframe->Xims = re;\    newframe->Xeptrb = re;\
   newframe->Xeptrb = rf;\  
   newframe->Xflags = rg;\  
326    newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
327    newframe->Xprevframe = frame;\    newframe->Xprevframe = frame;\
328    frame = newframe;\    frame = newframe;\
# Line 351  typedef struct heapframe { Line 358  typedef struct heapframe {
358    USPTR Xmstart;    USPTR Xmstart;
359    USPTR Xmarkptr;    USPTR Xmarkptr;
360    int Xoffset_top;    int Xoffset_top;
   long int Xims;  
361    eptrblock *Xeptrb;    eptrblock *Xeptrb;
   int Xflags;  
362    unsigned int Xrdepth;    unsigned int Xrdepth;
363    
364    /* Function local variables */    /* Function local variables */
# Line 374  typedef struct heapframe { Line 379  typedef struct heapframe {
379    BOOL Xcondition;    BOOL Xcondition;
380    BOOL Xprev_is_word;    BOOL Xprev_is_word;
381    
   unsigned long int Xoriginal_ims;  
   
382  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
383    int Xprop_type;    int Xprop_type;
384    int Xprop_value;    int Xprop_value;
# Line 461  Arguments: Line 464  Arguments:
464     markptr     pointer to the most recent MARK name, or NULL     markptr     pointer to the most recent MARK name, or NULL
465     offset_top  current top pointer     offset_top  current top pointer
466     md          pointer to "static" info for the match     md          pointer to "static" info for the match
    ims         current /i, /m, and /s options  
467     eptrb       pointer to chain of blocks containing eptr at start of     eptrb       pointer to chain of blocks containing eptr at start of
468                   brackets - for testing for empty matches                   brackets - for testing for empty matches
    flags       can contain  
                  match_condassert - this is an assertion condition  
                  match_cbegroup - this is the start of an unlimited repeat  
                    group that can match an empty string  
469     rdepth      the recursion depth     rdepth      the recursion depth
470    
471  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 479  Returns:       MATCH_MATCH if matched Line 477  Returns:       MATCH_MATCH if matched
477    
478  static int  static int
479  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
480    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,    const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb,
481    eptrblock *eptrb, int flags, unsigned int rdepth)    unsigned int rdepth)
482  {  {
483  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
484  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 492  register unsigned int c;   /* Character Line 490  register unsigned int c;   /* Character
490  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
491    
492  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
493    BOOL caseless;
494  int condcode;  int condcode;
495    
496  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
# Line 511  frame->Xecode = ecode; Line 510  frame->Xecode = ecode;
510  frame->Xmstart = mstart;  frame->Xmstart = mstart;
511  frame->Xmarkptr = markptr;  frame->Xmarkptr = markptr;
512  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
 frame->Xims = ims;  
513  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
 frame->Xflags = flags;  
514  frame->Xrdepth = rdepth;  frame->Xrdepth = rdepth;
515    
516  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
# Line 527  HEAP_RECURSE: Line 524  HEAP_RECURSE:
524  #define mstart             frame->Xmstart  #define mstart             frame->Xmstart
525  #define markptr            frame->Xmarkptr  #define markptr            frame->Xmarkptr
526  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
 #define ims                frame->Xims  
527  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
 #define flags              frame->Xflags  
528  #define rdepth             frame->Xrdepth  #define rdepth             frame->Xrdepth
529    
530  /* Ditto for the local variables */  /* Ditto for the local variables */
# Line 551  HEAP_RECURSE: Line 546  HEAP_RECURSE:
546  #define condition          frame->Xcondition  #define condition          frame->Xcondition
547  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
548    
 #define original_ims       frame->Xoriginal_ims  
   
549  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
550  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
551  #define prop_value         frame->Xprop_value  #define prop_value         frame->Xprop_value
# Line 589  i, and fc and c, can be the same variabl Line 582  i, and fc and c, can be the same variabl
582  #define fi i  #define fi i
583  #define fc c  #define fc c
584    
585    /* Many of the following variables are used only in small blocks of the code.
586  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */  My normal style of coding would have declared them within each of those blocks.
587  const uschar *charptr;             /* in small blocks of the code. My normal */  However, in order to accommodate the version of this code that uses an external
588  #endif                             /* style of coding would have declared    */  "stack" implemented on the heap, it is easier to declare them all here, so the
589  const uschar *callpat;             /* them within each of those blocks.      */  declarations can be cut out in a block. The only declarations within blocks
590  const uschar *data;                /* However, in order to accommodate the   */  below are for variables that do not have to be preserved over a recursive call
591  const uschar *next;                /* version of this code that uses an      */  to RMATCH(). */
592  USPTR         pp;                  /* external "stack" implemented on the    */  
593  const uschar *prev;                /* heap, it is easier to declare them all */  #ifdef SUPPORT_UTF8
594  USPTR         saved_eptr;          /* here, so the declarations can be cut   */  const uschar *charptr;
595                                     /* out in a block. The only declarations  */  #endif
596  recursion_info new_recursive;      /* within blocks below are for variables  */  const uschar *callpat;
597                                     /* that do not have to be preserved over  */  const uschar *data;
598  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */  const uschar *next;
599    USPTR         pp;
600    const uschar *prev;
601    USPTR         saved_eptr;
602    
603    recursion_info new_recursive;
604    
605    BOOL cur_is_word;
606  BOOL condition;  BOOL condition;
607  BOOL prev_is_word;  BOOL prev_is_word;
608    
 unsigned long int original_ims;  
   
609  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
610  int prop_type;  int prop_type;
611  int prop_value;  int prop_value;
# Line 634  int stacksave[REC_STACK_SAVE_MAX]; Line 632  int stacksave[REC_STACK_SAVE_MAX];
632  eptrblock newptrb;  eptrblock newptrb;
633  #endif     /* NO_RECURSE */  #endif     /* NO_RECURSE */
634    
635    /* To save space on the stack and in the heap frame, I have doubled up on some
636    of the local variables that are used only in localised parts of the code, but
637    still need to be preserved over recursive calls of match(). These macros define
638    the alternative names that are used. */
639    
640    #define allow_zero    cur_is_word
641    #define cbegroup      condition
642    #define code_offset   codelink
643    #define condassert    condition
644    #define matched_once  prev_is_word
645    
646  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
647  variables. */  variables. */
648    
# Line 670  haven't exceeded the recursive call limi Line 679  haven't exceeded the recursive call limi
679  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
680  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
681    
 original_ims = ims;    /* Save for resetting on ')' */  
   
682  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
683  string, the match_cbegroup flag is set. When this is the case, add the current  string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
684  subject pointer to the chain of such remembered pointers, to be checked when we  done this way to save having to use another function argument, which would take
685  hit the closing ket, in order to break infinite loops that match no characters.  up space on the stack. See also MATCH_CONDASSERT below.
686  When match() is called in other circumstances, don't add to the chain. The  
687  match_cbegroup flag must NOT be used with tail recursion, because the memory  When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
688  block that is used is on the stack, so a new one may be required for each  such remembered pointers, to be checked when we hit the closing ket, in order
689  match(). */  to break infinite loops that match no characters. When match() is called in
690    other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
691    NOT be used with tail recursion, because the memory block that is used is on
692    the stack, so a new one may be required for each match(). */
693    
694  if ((flags & match_cbegroup) != 0)  if (md->match_function_type == MATCH_CBEGROUP)
695    {    {
696    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
697    newptrb.epb_prev = eptrb;    newptrb.epb_prev = eptrb;
698    eptrb = &newptrb;    eptrb = &newptrb;
699      md->match_function_type = 0;
700    }    }
701    
702  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 694  for (;;) Line 705  for (;;)
705    {    {
706    minimize = possessive = FALSE;    minimize = possessive = FALSE;
707    op = *ecode;    op = *ecode;
708    
709    switch(op)    switch(op)
710      {      {
711      case OP_MARK:      case OP_MARK:
712      markptr = ecode + 2;      markptr = ecode + 2;
713      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
714        ims, eptrb, flags, RM55);        eptrb, RM55);
715    
716      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
717      argument, and we must check whether that argument matches this MARK's      argument, and we must check whether that argument matches this MARK's
# Line 726  for (;;) Line 737  for (;;)
737    
738      case OP_COMMIT:      case OP_COMMIT:
739      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
740        ims, eptrb, flags, RM52);        eptrb, RM52);
741      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
742          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
743          rrc != MATCH_THEN)          rrc != MATCH_THEN)
# Line 737  for (;;) Line 748  for (;;)
748    
749      case OP_PRUNE:      case OP_PRUNE:
750      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
751        ims, eptrb, flags, RM51);        eptrb, RM51);
752      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
753      MRRETURN(MATCH_PRUNE);      MRRETURN(MATCH_PRUNE);
754    
755      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
756      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
757        ims, eptrb, flags, RM56);        eptrb, RM56);
758      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
759      md->mark = ecode + 2;      md->mark = ecode + 2;
760      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
# Line 752  for (;;) Line 763  for (;;)
763    
764      case OP_SKIP:      case OP_SKIP:
765      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
766        ims, eptrb, flags, RM53);        eptrb, RM53);
767      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
768        RRETURN(rrc);        RRETURN(rrc);
769      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
# Line 760  for (;;) Line 771  for (;;)
771    
772      case OP_SKIP_ARG:      case OP_SKIP_ARG:
773      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
774        ims, eptrb, flags, RM57);        eptrb, RM57);
775      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
776        RRETURN(rrc);        RRETURN(rrc);
777    
# Line 779  for (;;) Line 790  for (;;)
790    
791      case OP_THEN:      case OP_THEN:
792      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
793        ims, eptrb, flags, RM54);        eptrb, RM54);
794      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
795      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode - GET(ecode, 1);
796      MRRETURN(MATCH_THEN);      MRRETURN(MATCH_THEN);
797    
798      case OP_THEN_ARG:      case OP_THEN_ARG:
799      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
800        offset_top, md, ims, eptrb, flags, RM58);        offset_top, md, eptrb, RM58);
801      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
802      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode - GET(ecode, 1);
803      md->mark = ecode + LINK_SIZE + 2;      md->mark = ecode + LINK_SIZE + 2;
804      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
805    
806      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket, other than those that are possessive with an
807      the current subject position in the working slot at the top of the vector.      unlimited repeat. If there is space in the offset vector, save the current
808      We mustn't change the current values of the data slot, because they may be      subject position in the working slot at the top of the vector. We mustn't
809      set from a previous iteration of this group, and be referred to by a      change the current values of the data slot, because they may be set from a
810      reference inside the group.      previous iteration of this group, and be referred to by a reference inside
811        the group. If we fail to match, we need to restore this value and also the
     If the bracket fails to match, we need to restore this value and also the  
812      values of the final offsets, in case they were set by a previous iteration      values of the final offsets, in case they were set by a previous iteration
813      of the same bracket.      of the same bracket.
814    
# Line 810  for (;;) Line 820  for (;;)
820      case OP_SCBRA:      case OP_SCBRA:
821      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
822      offset = number << 1;      offset = number << 1;
823    
824  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
825      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
826      printf("subject=");      printf("subject=");
# Line 829  for (;;) Line 839  for (;;)
839        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
840          (int)(eptr - md->start_subject);          (int)(eptr - md->start_subject);
841    
842        flags = (op == OP_SCBRA)? match_cbegroup : 0;        for (;;)
       do  
843          {          {
844          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
845            ims, eptrb, flags, RM1);          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
846              eptrb, RM1);
847          if (rrc != MATCH_NOMATCH &&          if (rrc != MATCH_NOMATCH &&
848              (rrc != MATCH_THEN || md->start_match_ptr != ecode))              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
849            RRETURN(rrc);            RRETURN(rrc);
850    
851            /* If md->end_offset_top is greater than offset_top, it means that the
852            branch we have just failed to match did manage to match some capturing
853            parentheses within an atomic group or an assertion. Although offset_top
854            reverts to its original value at this level, we must unset the captured
855            values in case a later match sets a higher capturing number. Example:
856            matching /((?>(a))b|(a)c)/ against "ac". This captures 3, but we need
857            to ensure that 2 - which was captured in the atomic matching - is
858            unset. */
859    
860            if (md->end_offset_top > offset_top)
861              {
862              register int *iptr = md->offset_vector + offset_top;
863              register int *iend = md->offset_vector + md->end_offset_top;
864              while (iptr < iend) *iptr++ = -1;
865              }
866    
867          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
868          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
869            if (*ecode != OP_ALT) break;
870          }          }
       while (*ecode == OP_ALT);  
871    
872        DPRINTF(("bracket %d failed\n", number));        DPRINTF(("bracket %d failed\n", number));
873    
# Line 848  for (;;) Line 875  for (;;)
875        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
876        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
877    
878        if (rrc != MATCH_THEN) md->mark = markptr;        if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;
879        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
880        }        }
881    
# Line 863  for (;;) Line 890  for (;;)
890      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
891      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
892    
893      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket, except for possessive with unlimited repeat. Loop
894      final alternative within the brackets, we would return the result of a      for all the alternatives. When we get to the final alternative within the
895      recursive call to match() whatever happened. We can reduce stack usage by      brackets, we used to return the result of a recursive call to match()
896      turning this into a tail recursion, except in the case when match_cbegroup      whatever happened so it was possible to reduce stack usage by turning this
897      is set.*/      into a tail recursion, except in the case of a possibly empty group.
898        However, now that there is the possiblity of (*THEN) occurring in the final
899        alternative, this optimization is no longer possible. */
900    
901      case OP_BRA:      case OP_BRA:
902      case OP_SBRA:      case OP_SBRA:
903      DPRINTF(("start non-capturing bracket\n"));      DPRINTF(("start non-capturing bracket\n"));
     flags = (op >= OP_SBRA)? match_cbegroup : 0;  
904      for (;;)      for (;;)
905        {        {
906        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
907          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
908            RM2);
909          if (rrc != MATCH_NOMATCH &&
910              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
911            RRETURN(rrc);
912    
913          /* See explanatory comment above under OP_CBRA. */
914    
915          if (md->end_offset_top > offset_top)
916            {
917            register int *iptr = md->offset_vector + offset_top;
918            register int *iend = md->offset_vector + md->end_offset_top;
919            while (iptr < iend) *iptr++ = -1;
920            }
921    
922          ecode += GET(ecode, 1);
923          if (*ecode != OP_ALT) break;
924          }
925    
926        if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;
927        RRETURN(MATCH_NOMATCH);
928    
929        /* Handle possessive capturing brackets with an unlimited repeat. We come
930        here from BRAZERO with allow_zero set TRUE. The offset_vector values are
931        handled similarly to the normal case above. However, the matching is
932        different. The end of these brackets will always be OP_KETRPOS, which
933        returns MATCH_KETRPOS without going further in the pattern. By this means
934        we can handle the group by iteration rather than recursion, thereby
935        reducing the amount of stack needed. */
936    
937        case OP_CBRAPOS:
938        case OP_SCBRAPOS:
939        allow_zero = FALSE;
940    
941        POSSESSIVE_CAPTURE:
942        number = GET2(ecode, 1+LINK_SIZE);
943        offset = number << 1;
944    
945    #ifdef PCRE_DEBUG
946        printf("start possessive bracket %d\n", number);
947        printf("subject=");
948        pchars(eptr, 16, TRUE, md);
949        printf("\n");
950    #endif
951    
952        if (offset < md->offset_max)
953          {
954          matched_once = FALSE;
955          code_offset = ecode - md->start_code;
956    
957          save_offset1 = md->offset_vector[offset];
958          save_offset2 = md->offset_vector[offset+1];
959          save_offset3 = md->offset_vector[md->offset_end - number];
960          save_capture_last = md->capture_last;
961    
962          DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
963    
964          /* Each time round the loop, save the current subject position for use
965          when the group matches. For MATCH_MATCH, the group has matched, so we
966          restart it with a new subject starting position, remembering that we had
967          at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
968          usual. If we haven't matched any alternatives in any iteration, check to
969          see if a previous iteration matched. If so, the group has matched;
970          continue from afterwards. Otherwise it has failed; restore the previous
971          capture values before returning NOMATCH. */
972    
973          for (;;)
974          {          {
975          if (flags == 0)    /* Not a possibly empty group */          md->offset_vector[md->offset_end - number] =
976              (int)(eptr - md->start_subject);
977            if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
978            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
979              eptrb, RM63);
980            if (rrc == MATCH_KETRPOS)
981            {            {
982            ecode += _pcre_OP_lengths[*ecode];            offset_top = md->end_offset_top;
983            DPRINTF(("bracket 0 tail recursion\n"));            eptr = md->end_match_ptr;
984            goto TAIL_RECURSE;            ecode = md->start_code + code_offset;
985            }            save_capture_last = md->capture_last;
986              matched_once = TRUE;
987              continue;
988              }
989            if (rrc != MATCH_NOMATCH &&
990                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
991              RRETURN(rrc);
992    
993          /* Possibly empty group; can't use tail recursion. */          /* See explanatory comment above under OP_CBRA. */
994    
995            if (md->end_offset_top > offset_top)
996              {
997              register int *iptr = md->offset_vector + offset_top;
998              register int *iend = md->offset_vector + md->end_offset_top;
999              while (iptr < iend) *iptr++ = -1;
1000              }
1001    
1002          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,          md->capture_last = save_capture_last;
1003            eptrb, flags, RM48);          ecode += GET(ecode, 1);
1004          if (rrc == MATCH_NOMATCH) md->mark = markptr;          if (*ecode != OP_ALT) break;
         RRETURN(rrc);  
1005          }          }
1006    
1007        /* For non-final alternatives, continue the loop for a NOMATCH result;        if (!matched_once)
1008        otherwise return. */          {
1009            md->offset_vector[offset] = save_offset1;
1010            md->offset_vector[offset+1] = save_offset2;
1011            md->offset_vector[md->offset_end - number] = save_offset3;
1012            }
1013    
1014          if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;
1015          if (allow_zero || matched_once)
1016            {
1017            ecode += 1 + LINK_SIZE;
1018            break;
1019            }
1020    
1021          RRETURN(MATCH_NOMATCH);
1022          }
1023    
1024        /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1025        as a non-capturing bracket. */
1026    
1027        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1028        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1029    
1030        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1031          eptrb, flags, RM2);  
1032        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1033        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1034    
1035        /* Non-capturing possessive bracket with unlimited repeat. We come here
1036        from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1037        without the capturing complication. It is written out separately for speed
1038        and cleanliness. */
1039    
1040        case OP_BRAPOS:
1041        case OP_SBRAPOS:
1042        allow_zero = FALSE;
1043    
1044        POSSESSIVE_NON_CAPTURE:
1045        matched_once = FALSE;
1046        code_offset = ecode - md->start_code;
1047    
1048        for (;;)
1049          {
1050          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1051          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
1052            eptrb, RM48);
1053          if (rrc == MATCH_KETRPOS)
1054            {
1055            offset_top = md->end_offset_top;
1056            eptr = md->end_match_ptr;
1057            ecode = md->start_code + code_offset;
1058            matched_once = TRUE;
1059            continue;
1060            }
1061        if (rrc != MATCH_NOMATCH &&        if (rrc != MATCH_NOMATCH &&
1062            (rrc != MATCH_THEN || md->start_match_ptr != ecode))            (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1063          RRETURN(rrc);          RRETURN(rrc);
1064    
1065          /* See explanatory comment above under OP_CBRA. */
1066    
1067          if (md->end_offset_top > offset_top)
1068            {
1069            register int *iptr = md->offset_vector + offset_top;
1070            register int *iend = md->offset_vector + md->end_offset_top;
1071            while (iptr < iend) *iptr++ = -1;
1072            }
1073    
1074        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1075          if (*ecode != OP_ALT) break;
1076        }        }
1077    
1078        if (matched_once || allow_zero)
1079          {
1080          ecode += 1 + LINK_SIZE;
1081          break;
1082          }
1083        RRETURN(MATCH_NOMATCH);
1084    
1085      /* Control never reaches here. */      /* Control never reaches here. */
1086    
1087      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
1088      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
1089      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
1090      exactly what going to the ket would do. As there is only one branch to be      exactly what going to the ket would do. */
     obeyed, we can use tail recursion to avoid using another stack frame. */  
1091    
1092      case OP_COND:      case OP_COND:
1093      case OP_SCOND:      case OP_SCOND:
1094      codelink= GET(ecode, 1);      codelink = GET(ecode, 1);
1095    
1096      /* Because of the way auto-callout works during compile, a callout item is      /* Because of the way auto-callout works during compile, a callout item is
1097      inserted between OP_COND and an assertion condition. */      inserted between OP_COND and an assertion condition. */
# Line 1087  for (;;) Line 1266  for (;;)
1266        }        }
1267    
1268      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
1269      the final argument match_condassert causes it to stop at the end of an      md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
1270      assertion. */      an assertion. */
1271    
1272      else      else
1273        {        {
1274        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        md->match_function_type = MATCH_CONDASSERT;
1275            match_condassert, RM3);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1276        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1277          {          {
1278          condition = TRUE;          condition = TRUE;
# Line 1113  for (;;) Line 1292  for (;;)
1292        }        }
1293    
1294      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1295      we can use tail recursion to avoid using another stack frame, except when      we used to use tail recursion to avoid using another stack frame, except
1296      match_cbegroup is required for an unlimited repeat of a possibly empty      when there was unlimited repeat of a possibly empty group. However, that
1297      group. If the second alternative doesn't exist, we can just plough on. */      strategy no longer works because of the possibilty of (*THEN) being
1298        encountered in the branch. A recursive call to match() is always required,
1299        unless the second alternative doesn't exist, in which case we can just
1300        plough on. */
1301    
1302      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1303        {        {
1304        ecode += 1 + LINK_SIZE;        if (op == OP_SCOND) md->match_function_type = MATCH_CBEGROUP;
1305        if (op == OP_SCOND)        /* Possibly empty group */        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1306          {        if (rrc == MATCH_THEN && md->start_match_ptr == ecode)
1307          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);          rrc = MATCH_NOMATCH;
1308          RRETURN(rrc);        RRETURN(rrc);
         }  
       else                       /* Group must match something */  
         {  
         flags = 0;  
         goto TAIL_RECURSE;  
         }  
1309        }        }
1310      else                         /* Condition false & no alternative */      else                         /* Condition false & no alternative */
1311        {        {
# Line 1162  for (;;) Line 1338  for (;;)
1338      break;      break;
1339    
1340    
1341      /* End of the pattern, either real or forced. If we are in a top-level      /* End of the pattern, either real or forced. If we are in a recursion, we
1342      recursion, we should restore the offsets appropriately and continue from      should restore the offsets appropriately, and if it's a top-level
1343      after the call. */      recursion, continue from after the call. */
1344    
1345      case OP_ACCEPT:      case OP_ACCEPT:
1346        case OP_ASSERT_ACCEPT:
1347      case OP_END:      case OP_END:
1348      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL)
1349        {        {
1350        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
       DPRINTF(("End of pattern in a (?0) recursion\n"));  
1351        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1352        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1353          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1354        offset_top = rec->save_offset_top;        offset_top = rec->save_offset_top;
1355        ims = original_ims;        if (rec->group_num == 0)
1356        ecode = rec->after_call;          {
1357        break;          ecode = rec->after_call;
1358            break;
1359            }
1360        }        }
1361    
1362      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is      /* Otherwise, if we have matched an empty string, fail if not in an
1363      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of      assertion and if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1364      the subject. In both cases, backtracking will then try other alternatives,      is set and we have matched at the start of the subject. In both cases,
1365      if any. */      backtracking will then try other alternatives, if any. */
1366    
1367      if (eptr == mstart &&      else if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1368          (md->notempty ||          (md->notempty ||
1369            (md->notempty_atstart &&            (md->notempty_atstart &&
1370              mstart == md->start_subject + md->start_offset)))              mstart == md->start_subject + md->start_offset)))
1371        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1372    
1373      /* Otherwise, we have a match. */      /* Otherwise, we have a match. */
1374    
1375      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
1376      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
1377      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
# Line 1204  for (;;) Line 1382  for (;;)
1382      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1383      MRRETURN(rrc);      MRRETURN(rrc);
1384    
     /* Change option settings */  
   
     case OP_OPT:  
     ims = ecode[1];  
     ecode += 2;  
     DPRINTF(("ims set to %02lx\n", ims));  
     break;  
   
1385      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
1386      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
1387      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1388      start of each branch to move the current point backwards, so the code at      start of each branch to move the current point backwards, so the code at
1389      this level is identical to the lookahead case. */      this level is identical to the lookahead case. When the assertion is part
1390        of a condition, we want to return immediately afterwards. The caller of
1391        this incarnation of the match() function will have set MATCH_CONDASSERT in
1392        md->match_function type, and one of these opcodes will be the first opcode
1393        that is processed. We use a local variable that is preserved over calls to
1394        match() to remember this case. */
1395    
1396      case OP_ASSERT:      case OP_ASSERT:
1397      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1398        if (md->match_function_type == MATCH_CONDASSERT)
1399          {
1400          condassert = TRUE;
1401          md->match_function_type = 0;
1402          }
1403        else condassert = FALSE;
1404    
1405      do      do
1406        {        {
1407        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
         RM4);  
1408        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1409          {          {
1410          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
# Line 1232  for (;;) Line 1413  for (;;)
1413        if (rrc != MATCH_NOMATCH &&        if (rrc != MATCH_NOMATCH &&
1414            (rrc != MATCH_THEN || md->start_match_ptr != ecode))            (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1415          RRETURN(rrc);          RRETURN(rrc);
1416    
1417          /* See explanatory comment above under OP_CBRA. */
1418    
1419          if (md->end_offset_top > offset_top)
1420            {
1421            register int *iptr = md->offset_vector + offset_top;
1422            register int *iend = md->offset_vector + md->end_offset_top;
1423            while (iptr < iend) *iptr++ = -1;
1424            }
1425    
1426        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1427        }        }
1428      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1429    
1430      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1431    
1432      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1433    
1434      if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);      if (condassert) RRETURN(MATCH_MATCH);
1435    
1436      /* Continue from after the assertion, updating the offsets high water      /* Continue from after the assertion, updating the offsets high water
1437      mark, since extracts may have been taken during the assertion. */      mark, since extracts may have been taken during the assertion. */
# Line 1255  for (;;) Line 1447  for (;;)
1447    
1448      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1449      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1450        if (md->match_function_type == MATCH_CONDASSERT)
1451          {
1452          condassert = TRUE;
1453          md->match_function_type = 0;
1454          }
1455        else condassert = FALSE;
1456    
1457      do      do
1458        {        {
1459        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
         RM5);  
1460        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1461        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1462          {          {
# Line 1272  for (;;) Line 1470  for (;;)
1470        }        }
1471      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1472    
1473      if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1474    
1475      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1476      continue;      continue;
1477    
# Line 1386  for (;;) Line 1584  for (;;)
1584        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1585              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1586        new_recursive.save_offset_top = offset_top;        new_recursive.save_offset_top = offset_top;
1587    
1588        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1589        restore the offset and recursion data. */        restore the offset and recursion data. */
1590    
1591        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1592        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;        cbegroup = (*callpat >= OP_SBRA);
1593        do        do
1594          {          {
1595            if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1596          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1597            md, ims, eptrb, flags, RM6);            md, eptrb, RM6);
1598          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1599            {            {
1600            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
# Line 1442  for (;;) Line 1641  for (;;)
1641    
1642      do      do
1643        {        {
1644        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
1645        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1646          {          {
1647          mstart = md->start_match_ptr;          mstart = md->start_match_ptr;
# Line 1451  for (;;) Line 1650  for (;;)
1650        if (rrc != MATCH_NOMATCH &&        if (rrc != MATCH_NOMATCH &&
1651            (rrc != MATCH_THEN || md->start_match_ptr != ecode))            (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1652          RRETURN(rrc);          RRETURN(rrc);
1653    
1654          /* See explanatory comment above under OP_CBRA. */
1655    
1656          if (md->end_offset_top > offset_top)
1657            {
1658            register int *iptr = md->offset_vector + offset_top;
1659            register int *iend = md->offset_vector + md->end_offset_top;
1660            while (iptr < iend) *iptr++ = -1;
1661            }
1662    
1663        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1664        }        }
1665      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1459  for (;;) Line 1668  for (;;)
1668    
1669      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1670    
1671      /* Continue as from after the assertion, updating the offsets high water      /* Continue after the group, updating the offsets high water mark, since
1672      mark, since extracts may have been taken. */      extracts may have been taken. */
1673    
1674      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1675    
# Line 1481  for (;;) Line 1690  for (;;)
1690    
1691      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1692      preceding bracket, in the appropriate order. The second "call" of match()      preceding bracket, in the appropriate order. The second "call" of match()
1693      uses tail recursion, to avoid using another stack frame. We need to reset      uses tail recursion, to avoid using another stack frame. */
     any options that changed within the bracket before re-running it, so  
     check the next opcode. */  
   
     if (ecode[1+LINK_SIZE] == OP_OPT)  
       {  
       ims = (ims & ~PCRE_IMS) | ecode[4];  
       DPRINTF(("ims set to %02lx at group repeat\n", ims));  
       }  
1694    
1695      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1696        {        {
1697        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM8);
1698        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1699        ecode = prev;        ecode = prev;
       flags = 0;  
1700        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1701        }        }
1702      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1703        {        {
1704        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);        md->match_function_type = MATCH_CBEGROUP;
1705          RMATCH(eptr, prev, offset_top, md, eptrb, RM9);
1706        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1707        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
       flags = 0;  
1708        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1709        }        }
1710      /* Control never gets here */      /* Control never gets here */
# Line 1521  for (;;) Line 1721  for (;;)
1721      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1722      with fixed upper repeat limits are compiled as a number of copies, with the      with fixed upper repeat limits are compiled as a number of copies, with the
1723      optional ones preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1724    
1725      case OP_BRAZERO:      case OP_BRAZERO:
1726        {      next = ecode + 1;
1727        next = ecode+1;      RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1728        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1729        if (rrc != MATCH_NOMATCH) RRETURN(rrc);      do next += GET(next, 1); while (*next == OP_ALT);
1730        do next += GET(next,1); while (*next == OP_ALT);      ecode = next + 1 + LINK_SIZE;
       ecode = next + 1 + LINK_SIZE;  
       }  
1731      break;      break;
1732    
1733      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1734        {      next = ecode + 1;
1735        next = ecode+1;      do next += GET(next, 1); while (*next == OP_ALT);
1736        do next += GET(next, 1); while (*next == OP_ALT);      RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1737        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1738        if (rrc != MATCH_NOMATCH) RRETURN(rrc);      ecode++;
       ecode++;  
       }  
1739      break;      break;
1740    
1741      case OP_SKIPZERO:      case OP_SKIPZERO:
1742        {      next = ecode+1;
1743        next = ecode+1;      do next += GET(next,1); while (*next == OP_ALT);
1744        do next += GET(next,1); while (*next == OP_ALT);      ecode = next + 1 + LINK_SIZE;
       ecode = next + 1 + LINK_SIZE;  
       }  
1745      break;      break;
1746    
1747        /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1748        here; just jump to the group, with allow_zero set TRUE. */
1749    
1750        case OP_BRAPOSZERO:
1751        op = *(++ecode);
1752        allow_zero = TRUE;
1753        if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1754          goto POSSESSIVE_NON_CAPTURE;
1755    
1756      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1757    
1758      case OP_KET:      case OP_KET:
1759      case OP_KETRMIN:      case OP_KETRMIN:
1760      case OP_KETRMAX:      case OP_KETRMAX:
1761        case OP_KETRPOS:
1762      prev = ecode - GET(ecode, 1);      prev = ecode - GET(ecode, 1);
1763    
1764      /* If this was a group that remembered the subject start, in order to break      /* If this was a group that remembered the subject start, in order to break
# Line 1589  for (;;) Line 1793  for (;;)
1793      a recurse into group 0, so it won't be picked up here. Instead, we catch it      a recurse into group 0, so it won't be picked up here. Instead, we catch it
1794      when the OP_END is reached. Other recursion is handled here. */      when the OP_END is reached. Other recursion is handled here. */
1795    
1796      if (*prev == OP_CBRA || *prev == OP_SCBRA)      if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1797            *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1798        {        {
1799        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1800        offset = number << 1;        offset = number << 1;
# Line 1620  for (;;) Line 1825  for (;;)
1825            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1826          offset_top = rec->save_offset_top;          offset_top = rec->save_offset_top;
1827          ecode = rec->after_call;          ecode = rec->after_call;
         ims = original_ims;  
1828          break;          break;
1829          }          }
1830        }        }
1831    
     /* For both capturing and non-capturing groups, reset the value of the ims  
     flags, in case they got changed during the group. */  
   
     ims = original_ims;  
     DPRINTF(("ims reset to %02lx\n", ims));  
   
1832      /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1833      happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1834      This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
# Line 1642  for (;;) Line 1840  for (;;)
1840        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1841        break;        break;
1842        }        }
1843    
1844      /* The repeating kets try the rest of the pattern or restart from the      /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
1845      preceding bracket, in the appropriate order. In the second case, we can use      and return the MATCH_KETRPOS. This makes it possible to do the repeats one
1846      tail recursion to avoid using another stack frame, unless we have an      at a time from the outer level, thus saving stack. */
1847    
1848        if (*ecode == OP_KETRPOS)
1849          {
1850          md->end_match_ptr = eptr;
1851          md->end_offset_top = offset_top;
1852          RRETURN(MATCH_KETRPOS);
1853          }
1854    
1855        /* The normal repeating kets try the rest of the pattern or restart from
1856        the preceding bracket, in the appropriate order. In the second case, we can
1857        use tail recursion to avoid using another stack frame, unless we have an
1858      unlimited repeat of a group that can match an empty string. */      unlimited repeat of a group that can match an empty string. */
1859    
     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;  
   
1860      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1861        {        {
1862        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
1863        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1864        if (flags != 0)    /* Could match an empty string */        if (*prev >= OP_SBRA)    /* Could match an empty string */
1865          {          {
1866          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);          md->match_function_type = MATCH_CBEGROUP;
1867            RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
1868          RRETURN(rrc);          RRETURN(rrc);
1869          }          }
1870        ecode = prev;        ecode = prev;
# Line 1664  for (;;) Line 1872  for (;;)
1872        }        }
1873      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1874        {        {
1875        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);        if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1876          RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
1877        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1878        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
       flags = 0;  
1879        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1880        }        }
1881      /* Control never gets here */      /* Control never gets here */
1882    
1883      /* Start of subject unless notbol, or after internal newline if multiline */      /* Not multiline mode: start of subject assertion, unless notbol. */
1884    
1885      case OP_CIRC:      case OP_CIRC:
1886      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1887      if ((ims & PCRE_MULTILINE) != 0)  
       {  
       if (eptr != md->start_subject &&  
           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))  
         MRRETURN(MATCH_NOMATCH);  
       ecode++;  
       break;  
       }  
     /* ... else fall through */  
   
1888      /* Start of subject assertion */      /* Start of subject assertion */
1889    
1890      case OP_SOD:      case OP_SOD:
1891      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1892      ecode++;      ecode++;
1893      break;      break;
1894    
1895        /* Multiline mode: start of subject unless notbol, or after any newline. */
1896    
1897        case OP_CIRCM:
1898        if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1899        if (eptr != md->start_subject &&
1900            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1901          MRRETURN(MATCH_NOMATCH);
1902        ecode++;
1903        break;
1904    
1905      /* Start of match assertion */      /* Start of match assertion */
1906    
# Line 1707  for (;;) Line 1916  for (;;)
1916      ecode++;      ecode++;
1917      break;      break;
1918    
1919      /* Assert before internal newline if multiline, or before a terminating      /* Multiline mode: assert before any newline, or before end of subject
1920      newline unless endonly is set, else end of subject unless noteol is set. */      unless noteol is set. */
1921    
1922      case OP_DOLL:      case OP_DOLLM:
1923      if ((ims & PCRE_MULTILINE) != 0)      if (eptr < md->end_subject)
1924        {        { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1925        if (eptr < md->end_subject)      else
         { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }  
       else  
         {  
         if (md->noteol) MRRETURN(MATCH_NOMATCH);  
         SCHECK_PARTIAL();  
         }  
       ecode++;  
       break;  
       }  
     else  /* Not multiline */  
1926        {        {
1927        if (md->noteol) MRRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1928        if (!md->endonly) goto ASSERT_NL_OR_EOS;        SCHECK_PARTIAL();
1929        }        }
1930        ecode++;
1931        break;
1932    
1933        /* Not multiline mode: assert before a terminating newline or before end of
1934        subject unless noteol is set. */
1935    
1936        case OP_DOLL:
1937        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1938        if (!md->endonly) goto ASSERT_NL_OR_EOS;
1939    
1940      /* ... else fall through for endonly */      /* ... else fall through for endonly */
1941    
# Line 2011  for (;;) Line 2219  for (;;)
2219      switch(c)      switch(c)
2220        {        {
2221        default: MRRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
2222    
2223        case 0x000d:        case 0x000d:
2224        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2225        break;        break;
# Line 2263  for (;;) Line 2472  for (;;)
2472      loops). */      loops). */
2473    
2474      case OP_REF:      case OP_REF:
2475        case OP_REFI:
2476        caseless = op == OP_REFI;
2477      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2478      ecode += 3;      ecode += 3;
2479    
# Line 2310  for (;;) Line 2521  for (;;)
2521        break;        break;
2522    
2523        default:               /* No repeat follows */        default:               /* No repeat follows */
2524        if ((length = match_ref(offset, eptr, length, md, ims)) < 0)        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2525          {          {
2526          CHECK_PARTIAL();          CHECK_PARTIAL();
2527          MRRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
# Line 2331  for (;;) Line 2542  for (;;)
2542      for (i = 1; i <= min; i++)      for (i = 1; i <= min; i++)
2543        {        {
2544        int slength;        int slength;
2545        if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2546          {          {
2547          CHECK_PARTIAL();          CHECK_PARTIAL();
2548          MRRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
# Line 2351  for (;;) Line 2562  for (;;)
2562        for (fi = min;; fi++)        for (fi = min;; fi++)
2563          {          {
2564          int slength;          int slength;
2565          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2566          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2567          if (fi >= max) MRRETURN(MATCH_NOMATCH);          if (fi >= max) MRRETURN(MATCH_NOMATCH);
2568          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2569            {            {
2570            CHECK_PARTIAL();            CHECK_PARTIAL();
2571            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
# Line 2372  for (;;) Line 2583  for (;;)
2583        for (i = min; i < max; i++)        for (i = min; i < max; i++)
2584          {          {
2585          int slength;          int slength;
2586          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2587            {            {
2588            CHECK_PARTIAL();            CHECK_PARTIAL();
2589            break;            break;
# Line 2381  for (;;) Line 2592  for (;;)
2592          }          }
2593        while (eptr >= pp)        while (eptr >= pp)
2594          {          {
2595          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2596          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2597          eptr -= length;          eptr -= length;
2598          }          }
# Line 2491  for (;;) Line 2702  for (;;)
2702            {            {
2703            for (fi = min;; fi++)            for (fi = min;; fi++)
2704              {              {
2705              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2706              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2707              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2708              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 2516  for (;;) Line 2727  for (;;)
2727            {            {
2728            for (fi = min;; fi++)            for (fi = min;; fi++)
2729              {              {
2730              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2731              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2732              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2733              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 2562  for (;;) Line 2773  for (;;)
2773              }              }
2774            for (;;)            for (;;)
2775              {              {
2776              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
2777              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2778              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2779              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2585  for (;;) Line 2796  for (;;)
2796              }              }
2797            while (eptr >= pp)            while (eptr >= pp)
2798              {              {
2799              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
2800              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2801              eptr--;              eptr--;
2802              }              }
# Line 2661  for (;;) Line 2872  for (;;)
2872          {          {
2873          for (fi = min;; fi++)          for (fi = min;; fi++)
2874            {            {
2875            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
2876            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2877            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2878            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 2694  for (;;) Line 2905  for (;;)
2905            }            }
2906          for(;;)          for(;;)
2907            {            {
2908            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
2909            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2910            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2911            if (utf8) BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
# Line 2739  for (;;) Line 2950  for (;;)
2950    
2951      /* Match a single character, caselessly */      /* Match a single character, caselessly */
2952    
2953      case OP_CHARNC:      case OP_CHARI:
2954  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2955      if (utf8)      if (utf8)
2956        {        {
# Line 2799  for (;;) Line 3010  for (;;)
3010      /* Match a single character repeatedly. */      /* Match a single character repeatedly. */
3011    
3012      case OP_EXACT:      case OP_EXACT:
3013        case OP_EXACTI:
3014      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3015      ecode += 3;      ecode += 3;
3016      goto REPEATCHAR;      goto REPEATCHAR;
3017    
3018      case OP_POSUPTO:      case OP_POSUPTO:
3019        case OP_POSUPTOI:
3020      possessive = TRUE;      possessive = TRUE;
3021      /* Fall through */      /* Fall through */
3022    
3023      case OP_UPTO:      case OP_UPTO:
3024        case OP_UPTOI:
3025      case OP_MINUPTO:      case OP_MINUPTO:
3026        case OP_MINUPTOI:
3027      min = 0;      min = 0;
3028      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3029      minimize = *ecode == OP_MINUPTO;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3030      ecode += 3;      ecode += 3;
3031      goto REPEATCHAR;      goto REPEATCHAR;
3032    
3033      case OP_POSSTAR:      case OP_POSSTAR:
3034        case OP_POSSTARI:
3035      possessive = TRUE;      possessive = TRUE;
3036      min = 0;      min = 0;
3037      max = INT_MAX;      max = INT_MAX;
# Line 2823  for (;;) Line 3039  for (;;)
3039      goto REPEATCHAR;      goto REPEATCHAR;
3040    
3041      case OP_POSPLUS:      case OP_POSPLUS:
3042        case OP_POSPLUSI:
3043      possessive = TRUE;      possessive = TRUE;
3044      min = 1;      min = 1;
3045      max = INT_MAX;      max = INT_MAX;
# Line 2830  for (;;) Line 3047  for (;;)
3047      goto REPEATCHAR;      goto REPEATCHAR;
3048    
3049      case OP_POSQUERY:      case OP_POSQUERY:
3050        case OP_POSQUERYI:
3051      possessive = TRUE;      possessive = TRUE;
3052      min = 0;      min = 0;
3053      max = 1;      max = 1;
# Line 2837  for (;;) Line 3055  for (;;)
3055      goto REPEATCHAR;      goto REPEATCHAR;
3056    
3057      case OP_STAR:      case OP_STAR:
3058        case OP_STARI:
3059      case OP_MINSTAR:      case OP_MINSTAR:
3060        case OP_MINSTARI:
3061      case OP_PLUS:      case OP_PLUS:
3062        case OP_PLUSI:
3063      case OP_MINPLUS:      case OP_MINPLUS:
3064        case OP_MINPLUSI:
3065      case OP_QUERY:      case OP_QUERY:
3066        case OP_QUERYI:
3067      case OP_MINQUERY:      case OP_MINQUERY:
3068      c = *ecode++ - OP_STAR;      case OP_MINQUERYI:
3069        c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3070      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
   
3071      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
3072      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3073      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
# Line 2867  for (;;) Line 3090  for (;;)
3090          {          {
3091  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3092          unsigned int othercase;          unsigned int othercase;
3093          if ((ims & PCRE_CASELESS) != 0 &&          if (op >= OP_STARI &&     /* Caseless */
3094              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
3095            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
3096          else oclength = 0;          else oclength = 0;
# Line 2895  for (;;) Line 3118  for (;;)
3118            {            {
3119            for (fi = min;; fi++)            for (fi = min;; fi++)
3120              {              {
3121              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3122              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3123              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3124              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
# Line 2937  for (;;) Line 3160  for (;;)
3160    
3161            for(;;)            for(;;)
3162              {              {
3163              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3164              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3165              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
3166  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2974  for (;;) Line 3197  for (;;)
3197      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3198        max, eptr));        max, eptr));
3199    
3200      if ((ims & PCRE_CASELESS) != 0)      if (op >= OP_STARI)  /* Caseless */
3201        {        {
3202        fc = md->lcc[fc];        fc = md->lcc[fc];
3203        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
# Line 2991  for (;;) Line 3214  for (;;)
3214          {          {
3215          for (fi = min;; fi++)          for (fi = min;; fi++)
3216            {            {
3217            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3218            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3219            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3220            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 3021  for (;;) Line 3244  for (;;)
3244    
3245          while (eptr >= pp)          while (eptr >= pp)
3246            {            {
3247            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3248            eptr--;            eptr--;
3249            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3250            }            }
# Line 3050  for (;;) Line 3273  for (;;)
3273          {          {
3274          for (fi = min;; fi++)          for (fi = min;; fi++)
3275            {            {
3276            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3277            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3278            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3279            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 3079  for (;;) Line 3302  for (;;)
3302    
3303          while (eptr >= pp)          while (eptr >= pp)
3304            {            {
3305            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3306            eptr--;            eptr--;
3307            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3308            }            }
# Line 3092  for (;;) Line 3315  for (;;)
3315      checking can be multibyte. */      checking can be multibyte. */
3316    
3317      case OP_NOT:      case OP_NOT:
3318        case OP_NOTI:
3319      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
3320        {        {
3321        SCHECK_PARTIAL();        SCHECK_PARTIAL();
# Line 3099  for (;;) Line 3323  for (;;)
3323        }        }
3324      ecode++;      ecode++;
3325      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3326      if ((ims & PCRE_CASELESS) != 0)      if (op == OP_NOTI)         /* The caseless case */
3327        {        {
3328  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3329        if (c < 256)        if (c < 256)
# Line 3107  for (;;) Line 3331  for (;;)
3331        c = md->lcc[c];        c = md->lcc[c];
3332        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3333        }        }
3334      else      else    /* Caseful */
3335        {        {
3336        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3337        }        }
# Line 3121  for (;;) Line 3345  for (;;)
3345      about... */      about... */
3346    
3347      case OP_NOTEXACT:      case OP_NOTEXACT:
3348        case OP_NOTEXACTI:
3349      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3350      ecode += 3;      ecode += 3;
3351      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3352    
3353      case OP_NOTUPTO:      case OP_NOTUPTO:
3354        case OP_NOTUPTOI:
3355      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
3356        case OP_NOTMINUPTOI:
3357      min = 0;      min = 0;
3358      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3359      minimize = *ecode == OP_NOTMINUPTO;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3360      ecode += 3;      ecode += 3;
3361      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3362    
3363      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
3364        case OP_NOTPOSSTARI:
3365      possessive = TRUE;      possessive = TRUE;
3366      min = 0;      min = 0;
3367      max = INT_MAX;      max = INT_MAX;
# Line 3141  for (;;) Line 3369  for (;;)
3369      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3370    
3371      case OP_NOTPOSPLUS:      case OP_NOTPOSPLUS:
3372        case OP_NOTPOSPLUSI:
3373      possessive = TRUE;      possessive = TRUE;
3374      min = 1;      min = 1;
3375      max = INT_MAX;      max = INT_MAX;
# Line 3148  for (;;) Line 3377  for (;;)
3377      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3378    
3379      case OP_NOTPOSQUERY:      case OP_NOTPOSQUERY:
3380        case OP_NOTPOSQUERYI:
3381      possessive = TRUE;      possessive = TRUE;
3382      min = 0;      min = 0;
3383      max = 1;      max = 1;
# Line 3155  for (;;) Line 3385  for (;;)
3385      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3386    
3387      case OP_NOTPOSUPTO:      case OP_NOTPOSUPTO:
3388        case OP_NOTPOSUPTOI:
3389      possessive = TRUE;      possessive = TRUE;
3390      min = 0;      min = 0;
3391      max = GET2(ecode, 1);      max = GET2(ecode, 1);
# Line 3162  for (;;) Line 3393  for (;;)
3393      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3394    
3395      case OP_NOTSTAR:      case OP_NOTSTAR:
3396        case OP_NOTSTARI:
3397      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
3398        case OP_NOTMINSTARI:
3399      case OP_NOTPLUS:      case OP_NOTPLUS:
3400        case OP_NOTPLUSI:
3401      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
3402        case OP_NOTMINPLUSI:
3403      case OP_NOTQUERY:      case OP_NOTQUERY:
3404        case OP_NOTQUERYI:
3405      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
3406      c = *ecode++ - OP_NOTSTAR;      case OP_NOTMINQUERYI:
3407        c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3408      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
3409      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
3410      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
# Line 3189  for (;;) Line 3426  for (;;)
3426      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3427        max, eptr));        max, eptr));
3428    
3429      if ((ims & PCRE_CASELESS) != 0)      if (op >= OP_NOTSTARI)     /* Caseless */
3430        {        {
3431        fc = md->lcc[fc];        fc = md->lcc[fc];
3432    
# Line 3237  for (;;) Line 3474  for (;;)
3474            register unsigned int d;            register unsigned int d;
3475            for (fi = min;; fi++)            for (fi = min;; fi++)
3476              {              {
3477              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3478              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3479              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3480              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 3256  for (;;) Line 3493  for (;;)
3493            {            {
3494            for (fi = min;; fi++)            for (fi = min;; fi++)
3495              {              {
3496              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3497              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3498              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3499              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 3297  for (;;) Line 3534  for (;;)
3534          if (possessive) continue;          if (possessive) continue;
3535          for(;;)          for(;;)
3536              {              {
3537              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3538              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3539              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3540              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 3320  for (;;) Line 3557  for (;;)
3557            if (possessive) continue;            if (possessive) continue;
3558            while (eptr >= pp)            while (eptr >= pp)
3559              {              {
3560              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3561              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3562              eptr--;              eptr--;
3563              }              }
# Line 3377  for (;;) Line 3614  for (;;)
3614            register unsigned int d;            register unsigned int d;
3615            for (fi = min;; fi++)            for (fi = min;; fi++)
3616              {              {
3617              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3618              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3619              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3620              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 3395  for (;;) Line 3632  for (;;)
3632            {            {
3633            for (fi = min;; fi++)            for (fi = min;; fi++)
3634              {              {
3635              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3636              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3637              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3638              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 3435  for (;;) Line 3672  for (;;)
3672            if (possessive) continue;            if (possessive) continue;
3673            for(;;)            for(;;)
3674              {              {
3675              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
3676              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3677              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3678              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 3458  for (;;) Line 3695  for (;;)
3695            if (possessive) continue;            if (possessive) continue;
3696            while (eptr >= pp)            while (eptr >= pp)
3697              {              {
3698              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
3699              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3700              eptr--;              eptr--;
3701              }              }
# Line 3785  for (;;) Line 4022  for (;;)
4022            switch(c)            switch(c)
4023              {              {
4024              default: MRRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4025    
4026              case 0x000d:              case 0x000d:
4027              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4028              break;              break;
# Line 4061  for (;;) Line 4299  for (;;)
4299            switch(*eptr++)            switch(*eptr++)
4300              {              {
4301              default: MRRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4302    
4303              case 0x000d:              case 0x000d:
4304              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4305              break;              break;
4306    
4307              case 0x000a:              case 0x000a:
4308              break;              break;
4309    
# Line 4253  for (;;) Line 4493  for (;;)
4493            case PT_ANY:            case PT_ANY:
4494            for (fi = min;; fi++)            for (fi = min;; fi++)
4495              {              {
4496              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4497              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4498              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4499              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4269  for (;;) Line 4509  for (;;)
4509            case PT_LAMP:            case PT_LAMP:
4510            for (fi = min;; fi++)            for (fi = min;; fi++)
4511              {              {
4512              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4513              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4514              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4515              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4289  for (;;) Line 4529  for (;;)
4529            case PT_GC:            case PT_GC:
4530            for (fi = min;; fi++)            for (fi = min;; fi++)
4531              {              {
4532              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4533              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4534              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4535              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4307  for (;;) Line 4547  for (;;)
4547            case PT_PC:            case PT_PC:
4548            for (fi = min;; fi++)            for (fi = min;; fi++)
4549              {              {
4550              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4551              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4552              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4553              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4325  for (;;) Line 4565  for (;;)
4565            case PT_SC:            case PT_SC:
4566            for (fi = min;; fi++)            for (fi = min;; fi++)
4567              {              {
4568              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4569              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4570              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4571              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4343  for (;;) Line 4583  for (;;)
4583            case PT_ALNUM:            case PT_ALNUM:
4584            for (fi = min;; fi++)            for (fi = min;; fi++)
4585              {              {
4586              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4587              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4588              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4589              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4362  for (;;) Line 4602  for (;;)
4602            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
4603            for (fi = min;; fi++)            for (fi = min;; fi++)
4604              {              {
4605              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
4606              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4607              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4608              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4382  for (;;) Line 4622  for (;;)
4622            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
4623            for (fi = min;; fi++)            for (fi = min;; fi++)
4624              {              {
4625              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
4626              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4627              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4628              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4402  for (;;) Line 4642  for (;;)
4642            case PT_WORD:            case PT_WORD:
4643            for (fi = min;; fi++)            for (fi = min;; fi++)
4644              {              {
4645              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
4646              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4647              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4648              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4434  for (;;) Line 4674  for (;;)
4674          {          {
4675          for (fi = min;; fi++)          for (fi = min;; fi++)
4676            {            {
4677            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
4678            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4679            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4680            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 4466  for (;;) Line 4706  for (;;)
4706          {          {
4707          for (fi = min;; fi++)          for (fi = min;; fi++)
4708            {            {
4709            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
4710            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4711            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4712            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 4629  for (;;) Line 4869  for (;;)
4869          {          {
4870          for (fi = min;; fi++)          for (fi = min;; fi++)
4871            {            {
4872            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
4873            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4874            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4875            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 4927  for (;;) Line 5167  for (;;)
5167          if (possessive) continue;          if (possessive) continue;
5168          for(;;)          for(;;)
5169            {            {
5170            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5171            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5172            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5173            if (utf8) BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
# Line 4968  for (;;) Line 5208  for (;;)
5208    
5209          for(;;)          for(;;)
5210            {            {
5211            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5212            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5213            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5214            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
# Line 5252  for (;;) Line 5492  for (;;)
5492            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5493            }            }
5494    
5495          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run. If possessive, we are
5496            done (no backing up). Otherwise, match at this position; anything other
5497            than no match is immediately returned. For nomatch, back up one
5498            character, unless we are matching \R and the last thing matched was
5499            \r\n, in which case, back up two bytes. */
5500    
5501          if (possessive) continue;          if (possessive) continue;
5502          for(;;)          for(;;)
5503            {            {
5504            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5505            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5506            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5507            BACKCHAR(eptr);            BACKCHAR(eptr);
5508              if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
5509                  eptr[-1] == '\r') eptr--;
5510            }            }
5511          }          }
5512        else        else
# Line 5459  for (;;) Line 5705  for (;;)
5705            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5706            }            }
5707    
5708          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run. If possessive, we are
5709            done (no backing up). Otherwise, match at this position; anything other
5710            than no match is immediately returned. For nomatch, back up one
5711            character (byte), unless we are matching \R and the last thing matched
5712            was \r\n, in which case, back up two bytes. */
5713    
5714          if (possessive) continue;          if (possessive) continue;
5715          while (eptr >= pp)          while (eptr >= pp)
5716            {            {
5717            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
           eptr--;  
5718            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5719              eptr--;
5720              if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
5721                  eptr[-1] == '\r') eptr--;
5722            }            }
5723          }          }
5724    
# Line 5505  switch (frame->Xwhere) Line 5757  switch (frame->Xwhere)
5757    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5758    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5759    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5760    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63)
5761  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5762    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5763    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
# Line 5534  Undefine all the macros that were define Line 5786  Undefine all the macros that were define
5786  #undef ecode  #undef ecode
5787  #undef mstart  #undef mstart
5788  #undef offset_top  #undef offset_top
 #undef ims  
5789  #undef eptrb  #undef eptrb
5790  #undef flags  #undef flags
5791    
# Line 5552  Undefine all the macros that were define Line 5803  Undefine all the macros that were define
5803  #undef condition  #undef condition
5804  #undef prev_is_word  #undef prev_is_word
5805    
 #undef original_ims  
   
5806  #undef ctype  #undef ctype
5807  #undef length  #undef length
5808  #undef max  #undef max
# Line 5615  int first_byte = -1; Line 5864  int first_byte = -1;
5864  int req_byte = -1;  int req_byte = -1;
5865  int req_byte2 = -1;  int req_byte2 = -1;
5866  int newline;  int newline;
 unsigned long int ims;  
5867  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
5868  BOOL anchored;  BOOL anchored;
5869  BOOL startline;  BOOL startline;
# Line 5719  utf8 = md->utf8 = (re->options & PCRE_UT Line 5967  utf8 = md->utf8 = (re->options & PCRE_UT
5967  md->use_ucp = (re->options & PCRE_UCP) != 0;  md->use_ucp = (re->options & PCRE_UCP) != 0;
5968  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5969    
5970    /* Some options are unpacked into BOOL variables in the hope that testing
5971    them will be faster than individual option bits. */
5972    
5973  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
5974  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
5975  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
5976  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
5977  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5978                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5979    
5980    
5981  md->hitend = FALSE;  md->hitend = FALSE;
5982  md->mark = NULL;                        /* In case never set */  md->mark = NULL;                        /* In case never set */
5983    
# Line 5806  defined (though never set). So there's n Line 6059  defined (though never set). So there's n
6059  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6060    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
6061    
6062  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Pass back the character offset and error
6063  back the character offset. */  code for an invalid string if a results vector is available. */
6064    
6065  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
6066  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
6067    {    {
6068    int tb;    int erroroffset;
6069    if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0)    int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
6070      return (tb == length && md->partial > 1)?    if (errorcode != 0)
       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;  
   if (start_offset > 0 && start_offset < length)  
6071      {      {
6072      tb = ((USPTR)subject)[start_offset] & 0xc0;      if (offsetcount >= 2)
6073      if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;        {
6074      }        offsets[0] = erroroffset;
6075          offsets[1] = errorcode;
6076          }
6077        return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6078          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6079        }
6080    
6081      /* Check that a start_offset points to the start of a UTF-8 character. */
6082    
6083      if (start_offset > 0 && start_offset < length &&
6084          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
6085        return PCRE_ERROR_BADUTF8_OFFSET;
6086    }    }
6087  #endif  #endif
6088    
 /* The ims options can vary during the matching as a result of the presence  
 of (?ims) items in the pattern. They are kept in a local variable so that  
 restoring at the exit of a group is easy. */  
   
 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);  
   
6089  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
6090  hold, we get a temporary chunk of working store to use during the matching.  hold, we get a temporary chunk of working store to use during the matching.
6091  Otherwise, we can use the vector supplied, rounding down its size to a multiple  Otherwise, we can use the vector supplied, rounding down its size to a multiple
# Line 6108  for(;;) Line 6364  for(;;)
6364    md->start_match_ptr = start_match;    md->start_match_ptr = start_match;
6365    md->start_used_ptr = start_match;    md->start_used_ptr = start_match;
6366    md->match_call_count = 0;    md->match_call_count = 0;
6367    rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,    md->match_function_type = 0;
6368      0, 0);    md->end_offset_top = 0;
6369      rc = match(start_match, md->start_code, start_match, NULL, 2, md, NULL, 0);
6370    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6371    
6372    switch(rc)    switch(rc)
# Line 6232  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6489  if (rc == MATCH_MATCH || rc == MATCH_ACC
6489    
6490    /* Set the return code to the number of captured strings, or 0 if there are    /* Set the return code to the number of captured strings, or 0 if there are
6491    too many to fit into the vector. */    too many to fit into the vector. */
6492    
6493    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = md->offset_overflow? 0 : md->end_offset_top/2;
6494    
6495    /* If there is space, set up the whole thing as substring 0. The value of    /* If there is space, set up the whole thing as substring 0. The value of

Legend:
Removed from v.595  
changed lines
  Added in v.614

  ViewVC Help
Powered by ViewVC 1.1.5