/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 511 by ph10, Mon Mar 29 09:25:38 2010 UTC revision 602 by ph10, Wed May 25 08:29:03 2011 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2010 University of Cambridge             Copyright (c) 1997-2011 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 132  while (length-- > 0) Line 132  while (length-- > 0)
132  *          Match a back-reference                *  *          Match a back-reference                *
133  *************************************************/  *************************************************/
134    
135  /* If a back reference hasn't been set, the length that is passed is greater  /* Normally, if a back reference hasn't been set, the length that is passed is
136  than the number of characters left in the string, so the match fails.  negative, so the match always fails. However, in JavaScript compatibility mode,
137    the length passed is zero. Note that in caseless UTF-8 mode, the number of
138    subject bytes matched may be different to the number of reference bytes.
139    
140  Arguments:  Arguments:
141    offset      index into the offset vector    offset      index into the offset vector
142    eptr        points into the subject    eptr        pointer into the subject
143    length      length to be matched    length      length of reference to be matched (number of bytes)
144    md          points to match data block    md          points to match data block
145    ims         the ims flags    caseless    TRUE if caseless
146    
147  Returns:      TRUE if matched  Returns:      < 0 if not matched, otherwise the number of subject bytes matched
148  */  */
149    
150  static BOOL  static int
151  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
152    unsigned long int ims)    BOOL caseless)
153  {  {
154  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR eptr_start = eptr;
155    register USPTR p = md->start_subject + md->offset_vector[offset];
156    
157  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
158  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 164  pchars(p, length, FALSE, md); Line 167  pchars(p, length, FALSE, md);
167  printf("\n");  printf("\n");
168  #endif  #endif
169    
170  /* Always fail if not enough characters left */  /* Always fail if reference not set (and not JavaScript compatible). */
171    
172  if (length > md->end_subject - eptr) return FALSE;  if (length < 0) return -1;
173    
174  /* Separate the caseless case for speed. In UTF-8 mode we can only do this  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175  properly if Unicode properties are supported. Otherwise, we can check only  properly if Unicode properties are supported. Otherwise, we can check only
176  ASCII characters. */  ASCII characters. */
177    
178  if ((ims & PCRE_CASELESS) != 0)  if (caseless)
179    {    {
180  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
181  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
182    if (md->utf8)    if (md->utf8)
183      {      {
184      USPTR endptr = eptr + length;      /* Match characters up to the end of the reference. NOTE: the number of
185      while (eptr < endptr)      bytes matched may differ, because there are some characters whose upper and
186        lower case versions code as different numbers of bytes. For example, U+023A
187        (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188        a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189        the latter. It is important, therefore, to check the length along the
190        reference, not along the subject (earlier code did this wrong). */
191    
192        USPTR endptr = p + length;
193        while (p < endptr)
194        {        {
195        int c, d;        int c, d;
196          if (eptr >= md->end_subject) return -1;
197        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
198        GETCHARINC(d, p);        GETCHARINC(d, p);
199        if (c != d && c != UCD_OTHERCASE(d)) return FALSE;        if (c != d && c != UCD_OTHERCASE(d)) return -1;
200        }        }
201      }      }
202    else    else
# Line 193  if ((ims & PCRE_CASELESS) != 0) Line 205  if ((ims & PCRE_CASELESS) != 0)
205    
206    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
207    is no UCP support. */    is no UCP support. */
208        {
209    while (length-- > 0)      if (eptr + length > md->end_subject) return -1;
210      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }      while (length-- > 0)
211          { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
212        }
213    }    }
214    
215  /* In the caseful case, we can just compare the bytes, whether or not we  /* In the caseful case, we can just compare the bytes, whether or not we
216  are in UTF-8 mode. */  are in UTF-8 mode. */
217    
218  else  else
219    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    {
220      if (eptr + length > md->end_subject) return -1;
221      while (length-- > 0) if (*p++ != *eptr++) return -1;
222      }
223    
224  return TRUE;  return eptr - eptr_start;
225  }  }
226    
227    
# Line 255  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 272  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
272         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
273         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
274         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
275         RM51,  RM52, RM53, RM54 };         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
276           RM61,  RM62 };
277    
278  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
279  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 265  actually used in this definition. */ Line 283  actually used in this definition. */
283  #define REGISTER register  #define REGISTER register
284    
285  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
286  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rw) \
287    { \    { \
288    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
289    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rdepth+1); \
290    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
291    }    }
292  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 277  actually used in this definition. */ Line 295  actually used in this definition. */
295    return ra; \    return ra; \
296    }    }
297  #else  #else
298  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rw) \
299    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rdepth+1)
300  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
301  #endif  #endif
302    
# Line 291  argument of match(), which never changes Line 309  argument of match(), which never changes
309    
310  #define REGISTER  #define REGISTER
311    
312  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\  #define RMATCH(ra,rb,rc,rd,re,rf,rw)\
313    {\    {\
314    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
315      if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
316    frame->Xwhere = rw; \    frame->Xwhere = rw; \
317    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
318    newframe->Xecode = rb;\    newframe->Xecode = rb;\
319    newframe->Xmstart = mstart;\    newframe->Xmstart = mstart;\
320    newframe->Xmarkptr = markptr;\    newframe->Xmarkptr = markptr;\
321    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
322    newframe->Xims = re;\    newframe->Xeptrb = re;\
323    newframe->Xeptrb = rf;\    newframe->Xflags = rf;\
   newframe->Xflags = rg;\  
324    newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
325    newframe->Xprevframe = frame;\    newframe->Xprevframe = frame;\
326    frame = newframe;\    frame = newframe;\
# Line 314  argument of match(), which never changes Line 332  argument of match(), which never changes
332    
333  #define RRETURN(ra)\  #define RRETURN(ra)\
334    {\    {\
335    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
336    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
337    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
338    if (frame != NULL)\    if (frame != NULL)\
339      {\      {\
340      rrc = ra;\      rrc = ra;\
# Line 338  typedef struct heapframe { Line 356  typedef struct heapframe {
356    USPTR Xmstart;    USPTR Xmstart;
357    USPTR Xmarkptr;    USPTR Xmarkptr;
358    int Xoffset_top;    int Xoffset_top;
   long int Xims;  
359    eptrblock *Xeptrb;    eptrblock *Xeptrb;
360    int Xflags;    int Xflags;
361    unsigned int Xrdepth;    unsigned int Xrdepth;
# Line 361  typedef struct heapframe { Line 378  typedef struct heapframe {
378    BOOL Xcondition;    BOOL Xcondition;
379    BOOL Xprev_is_word;    BOOL Xprev_is_word;
380    
   unsigned long int Xoriginal_ims;  
   
381  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
382    int Xprop_type;    int Xprop_type;
383    int Xprop_value;    int Xprop_value;
# Line 420  immediately. The second one is used when Line 435  immediately. The second one is used when
435  the subject. */  the subject. */
436    
437  #define CHECK_PARTIAL()\  #define CHECK_PARTIAL()\
438    if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\    if (md->partial != 0 && eptr >= md->end_subject && \
439      {\        eptr > md->start_used_ptr) \
440      md->hitend = TRUE;\      { \
441      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\      md->hitend = TRUE; \
442        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
443      }      }
444    
445  #define SCHECK_PARTIAL()\  #define SCHECK_PARTIAL()\
446    if (md->partial != 0 && eptr > mstart)\    if (md->partial != 0 && eptr > md->start_used_ptr) \
447      {\      { \
448      md->hitend = TRUE;\      md->hitend = TRUE; \
449      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
450      }      }
451    
452    
# Line 447  Arguments: Line 463  Arguments:
463     markptr     pointer to the most recent MARK name, or NULL     markptr     pointer to the most recent MARK name, or NULL
464     offset_top  current top pointer     offset_top  current top pointer
465     md          pointer to "static" info for the match     md          pointer to "static" info for the match
    ims         current /i, /m, and /s options  
466     eptrb       pointer to chain of blocks containing eptr at start of     eptrb       pointer to chain of blocks containing eptr at start of
467                   brackets - for testing for empty matches                   brackets - for testing for empty matches
468     flags       can contain     flags       can contain
# Line 465  Returns:       MATCH_MATCH if matched Line 480  Returns:       MATCH_MATCH if matched
480    
481  static int  static int
482  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
483    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,    const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb,
484    eptrblock *eptrb, int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
485  {  {
486  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
487  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 478  register unsigned int c;   /* Character Line 493  register unsigned int c;   /* Character
493  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
494    
495  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
496    BOOL caseless;
497  int condcode;  int condcode;
498    
499  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
# Line 486  heap storage. Set up the top-level frame Line 502  heap storage. Set up the top-level frame
502  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  heap whenever RMATCH() does a "recursion". See the macro definitions above. */
503    
504  #ifdef NO_RECURSE  #ifdef NO_RECURSE
505  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
506    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
507  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
508    
509  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 496  frame->Xecode = ecode; Line 513  frame->Xecode = ecode;
513  frame->Xmstart = mstart;  frame->Xmstart = mstart;
514  frame->Xmarkptr = markptr;  frame->Xmarkptr = markptr;
515  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
 frame->Xims = ims;  
516  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
517  frame->Xflags = flags;  frame->Xflags = flags;
518  frame->Xrdepth = rdepth;  frame->Xrdepth = rdepth;
# Line 512  HEAP_RECURSE: Line 528  HEAP_RECURSE:
528  #define mstart             frame->Xmstart  #define mstart             frame->Xmstart
529  #define markptr            frame->Xmarkptr  #define markptr            frame->Xmarkptr
530  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
 #define ims                frame->Xims  
531  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
532  #define flags              frame->Xflags  #define flags              frame->Xflags
533  #define rdepth             frame->Xrdepth  #define rdepth             frame->Xrdepth
# Line 536  HEAP_RECURSE: Line 551  HEAP_RECURSE:
551  #define condition          frame->Xcondition  #define condition          frame->Xcondition
552  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
553    
 #define original_ims       frame->Xoriginal_ims  
   
554  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
555  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
556  #define prop_value         frame->Xprop_value  #define prop_value         frame->Xprop_value
# Line 591  BOOL cur_is_word;                  /* a Line 604  BOOL cur_is_word;                  /* a
604  BOOL condition;  BOOL condition;
605  BOOL prev_is_word;  BOOL prev_is_word;
606    
 unsigned long int original_ims;  
   
607  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
608  int prop_type;  int prop_type;
609  int prop_value;  int prop_value;
# Line 655  haven't exceeded the recursive call limi Line 666  haven't exceeded the recursive call limi
666  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
667  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
668    
 original_ims = ims;    /* Save for resetting on ')' */  
   
669  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
670  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
671  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
# Line 685  for (;;) Line 694  for (;;)
694      case OP_MARK:      case OP_MARK:
695      markptr = ecode + 2;      markptr = ecode + 2;
696      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
697        ims, eptrb, flags, RM51);        eptrb, flags, RM55);
698    
699      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
700      argument, and we must check whether that argument matches this MARK's      argument, and we must check whether that argument matches this MARK's
701      argument. It is passed back in md->start_match_ptr (an overloading of that      argument. It is passed back in md->start_match_ptr (an overloading of that
702      variable). If it does match, we reset that variable to the current subject      variable). If it does match, we reset that variable to the current subject
703      position and return MATCH_SKIP. Otherwise, pass back the return code      position and return MATCH_SKIP. Otherwise, pass back the return code
704      unaltered. */      unaltered. */
705    
706      if (rrc == MATCH_SKIP_ARG &&      if (rrc == MATCH_SKIP_ARG &&
707          strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)          strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
708        {        {
709        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
710        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
711        }        }
712    
713      if (md->mark == NULL) md->mark = markptr;      if (md->mark == NULL) md->mark = markptr;
714      RRETURN(rrc);      RRETURN(rrc);
715    
716      case OP_FAIL:      case OP_FAIL:
717      MRRETURN(MATCH_NOMATCH);      MRRETURN(MATCH_NOMATCH);
718    
719        /* COMMIT overrides PRUNE, SKIP, and THEN */
720    
721      case OP_COMMIT:      case OP_COMMIT:
722      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
723        ims, eptrb, flags, RM52);        eptrb, flags, RM52);
724      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
725            rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
726            rrc != MATCH_THEN)
727          RRETURN(rrc);
728      MRRETURN(MATCH_COMMIT);      MRRETURN(MATCH_COMMIT);
729    
730        /* PRUNE overrides THEN */
731    
732      case OP_PRUNE:      case OP_PRUNE:
733      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
734        ims, eptrb, flags, RM51);        eptrb, flags, RM51);
735      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
736      MRRETURN(MATCH_PRUNE);      MRRETURN(MATCH_PRUNE);
737    
738      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
739      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
740        ims, eptrb, flags, RM51);        eptrb, flags, RM56);
741      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
742      md->mark = ecode + 2;      md->mark = ecode + 2;
743      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
744    
745        /* SKIP overrides PRUNE and THEN */
746    
747      case OP_SKIP:      case OP_SKIP:
748      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
749        ims, eptrb, flags, RM53);        eptrb, flags, RM53);
750      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
751          RRETURN(rrc);
752      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
753      MRRETURN(MATCH_SKIP);      MRRETURN(MATCH_SKIP);
754    
755      case OP_SKIP_ARG:      case OP_SKIP_ARG:
756      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
757        ims, eptrb, flags, RM53);        eptrb, flags, RM57);
758      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
759          RRETURN(rrc);
760      /* Pass back the current skip name by overloading md->start_match_ptr and  
761      returning the special MATCH_SKIP_ARG return code. This will either be      /* Pass back the current skip name by overloading md->start_match_ptr and
762      caught by a matching MARK, or get to the top, where it is treated the same      returning the special MATCH_SKIP_ARG return code. This will either be
763        caught by a matching MARK, or get to the top, where it is treated the same
764      as PRUNE. */      as PRUNE. */
765    
766      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
767      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
768    
769        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
770        the alt that is at the start of the current branch. This makes it possible
771        to skip back past alternatives that precede the THEN within the current
772        branch. */
773    
774      case OP_THEN:      case OP_THEN:
775      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
776        ims, eptrb, flags, RM54);        eptrb, flags, RM54);
777      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
778        md->start_match_ptr = ecode - GET(ecode, 1);
779      MRRETURN(MATCH_THEN);      MRRETURN(MATCH_THEN);
780    
781      case OP_THEN_ARG:      case OP_THEN_ARG:
782      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
783        ims, eptrb, flags, RM54);        offset_top, md, eptrb, flags, RM58);
784      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
785      md->mark = ecode + 2;      md->start_match_ptr = ecode - GET(ecode, 1);
786        md->mark = ecode + LINK_SIZE + 2;
787      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
788    
789      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
# Line 793  for (;;) Line 820  for (;;)
820        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
821    
822        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
823        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
824            (int)(eptr - md->start_subject);
825    
826        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
827        do        do
828          {          {
829          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
830            ims, eptrb, flags, RM1);            eptrb, flags, RM1);
831          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
832                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
833              RRETURN(rrc);
834          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
835          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
836          }          }
# Line 850  for (;;) Line 880  for (;;)
880    
881          /* Possibly empty group; can't use tail recursion. */          /* Possibly empty group; can't use tail recursion. */
882    
883          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
884            eptrb, flags, RM48);            flags, RM48);
885          if (rrc == MATCH_NOMATCH) md->mark = markptr;          if (rrc == MATCH_NOMATCH) md->mark = markptr;
886          RRETURN(rrc);          RRETURN(rrc);
887          }          }
888    
889        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
890        otherwise return. */        otherwise return. */
891    
892        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
893          eptrb, flags, RM2);          flags, RM2);
894        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
895              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
896            RRETURN(rrc);
897        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
898        }        }
899      /* Control never reaches here. */      /* Control never reaches here. */
# Line 888  for (;;) Line 920  for (;;)
920          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
921          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
922          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
923          cb.subject_length   = md->end_subject - md->start_subject;          cb.subject_length   = (int)(md->end_subject - md->start_subject);
924          cb.start_match      = mstart - md->start_subject;          cb.start_match      = (int)(mstart - md->start_subject);
925          cb.current_position = eptr - md->start_subject;          cb.current_position = (int)(eptr - md->start_subject);
926          cb.pattern_position = GET(ecode, LINK_SIZE + 3);          cb.pattern_position = GET(ecode, LINK_SIZE + 3);
927          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
928          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
# Line 1054  for (;;) Line 1086  for (;;)
1086    
1087      else      else
1088        {        {
1089        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL,
1090            match_condassert, RM3);            match_condassert, RM3);
1091        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1092          {          {
# Line 1062  for (;;) Line 1094  for (;;)
1094          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1095          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1096          }          }
1097        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)        else if (rrc != MATCH_NOMATCH &&
1098                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1099          {          {
1100          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1101          }          }
# Line 1083  for (;;) Line 1116  for (;;)
1116        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1117        if (op == OP_SCOND)        /* Possibly empty group */        if (op == OP_SCOND)        /* Possibly empty group */
1118          {          {
1119          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);          RMATCH(eptr, ecode, offset_top, md, eptrb, match_cbegroup, RM49);
1120          RRETURN(rrc);          RRETURN(rrc);
1121          }          }
1122        else                       /* Group must match something */        else                       /* Group must match something */
# Line 1116  for (;;) Line 1149  for (;;)
1149        {        {
1150        md->offset_vector[offset] =        md->offset_vector[offset] =
1151          md->offset_vector[md->offset_end - number];          md->offset_vector[md->offset_end - number];
1152        md->offset_vector[offset+1] = eptr - md->start_subject;        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1153        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1154        }        }
1155      ecode += 3;      ecode += 3;
# Line 1137  for (;;) Line 1170  for (;;)
1170        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1171          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1172        offset_top = rec->save_offset_top;        offset_top = rec->save_offset_top;
       ims = original_ims;  
1173        ecode = rec->after_call;        ecode = rec->after_call;
1174        break;        break;
1175        }        }
# Line 1158  for (;;) Line 1190  for (;;)
1190      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
1191      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
1192      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
     MRRETURN(((op == OP_END)? MATCH_MATCH : MATCH_ACCEPT));  
1193    
1194      /* Change option settings */      /* For some reason, the macros don't work properly if an expression is
1195        given as the argument to MRRETURN when the heap is in use. */
1196    
1197      case OP_OPT:      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1198      ims = ecode[1];      MRRETURN(rrc);
     ecode += 2;  
     DPRINTF(("ims set to %02lx\n", ims));  
     break;  
1199    
1200      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
1201      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
# Line 1178  for (;;) Line 1207  for (;;)
1207      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1208      do      do
1209        {        {
1210        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, 0,
1211          RM4);          RM4);
1212        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1213          {          {
1214          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1215          break;          break;
1216          }          }
1217        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1218              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1219            RRETURN(rrc);
1220        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1221        }        }
1222      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1211  for (;;) Line 1242  for (;;)
1242      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1243      do      do
1244        {        {
1245        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, 0,
1246          RM5);          RM5);
1247        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1248        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
# Line 1219  for (;;) Line 1250  for (;;)
1250          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1251          break;          break;
1252          }          }
1253        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1254              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1255            RRETURN(rrc);
1256        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1257        }        }
1258      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1274  for (;;) Line 1307  for (;;)
1307        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1308        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1309        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1310        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1311        cb.start_match      = mstart - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1312        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1313        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1314        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1315        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
# Line 1347  for (;;) Line 1380  for (;;)
1380        do        do
1381          {          {
1382          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1383            md, ims, eptrb, flags, RM6);            md, eptrb, flags, RM6);
1384          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1385            {            {
1386            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
# Line 1356  for (;;) Line 1389  for (;;)
1389              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1390            MRRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1391            }            }
1392          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH &&
1393                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1394            {            {
1395            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1396            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1393  for (;;) Line 1427  for (;;)
1427    
1428      do      do
1429        {        {
1430        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, 0, RM7);
1431        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1432          {          {
1433          mstart = md->start_match_ptr;          mstart = md->start_match_ptr;
1434          break;          break;
1435          }          }
1436        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1437              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1438            RRETURN(rrc);
1439        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1440        }        }
1441      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1430  for (;;) Line 1466  for (;;)
1466    
1467      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1468      preceding bracket, in the appropriate order. The second "call" of match()      preceding bracket, in the appropriate order. The second "call" of match()
1469      uses tail recursion, to avoid using another stack frame. We need to reset      uses tail recursion, to avoid using another stack frame. */
     any options that changed within the bracket before re-running it, so  
     check the next opcode. */  
   
     if (ecode[1+LINK_SIZE] == OP_OPT)  
       {  
       ims = (ims & ~PCRE_IMS) | ecode[4];  
       DPRINTF(("ims set to %02lx at group repeat\n", ims));  
       }  
1470    
1471      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1472        {        {
1473        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, 0, RM8);
1474        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1475        ecode = prev;        ecode = prev;
1476        flags = 0;        flags = 0;
# Line 1450  for (;;) Line 1478  for (;;)
1478        }        }
1479      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1480        {        {
1481        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);        RMATCH(eptr, prev, offset_top, md, eptrb, match_cbegroup, RM9);
1482        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1483        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1484        flags = 0;        flags = 0;
# Line 1474  for (;;) Line 1502  for (;;)
1502      case OP_BRAZERO:      case OP_BRAZERO:
1503        {        {
1504        next = ecode+1;        next = ecode+1;
1505        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);        RMATCH(eptr, next, offset_top, md, eptrb, 0, RM10);
1506        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1507        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1508        ecode = next + 1 + LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
# Line 1485  for (;;) Line 1513  for (;;)
1513        {        {
1514        next = ecode+1;        next = ecode+1;
1515        do next += GET(next, 1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1516        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, 0, RM11);
1517        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1518        ecode++;        ecode++;
1519        }        }
# Line 1553  for (;;) Line 1581  for (;;)
1581          {          {
1582          md->offset_vector[offset] =          md->offset_vector[offset] =
1583            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1584          md->offset_vector[offset+1] = eptr - md->start_subject;          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1585          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1586          }          }
1587    
# Line 1569  for (;;) Line 1597  for (;;)
1597            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1598          offset_top = rec->save_offset_top;          offset_top = rec->save_offset_top;
1599          ecode = rec->after_call;          ecode = rec->after_call;
         ims = original_ims;  
1600          break;          break;
1601          }          }
1602        }        }
1603    
     /* For both capturing and non-capturing groups, reset the value of the ims  
     flags, in case they got changed during the group. */  
   
     ims = original_ims;  
     DPRINTF(("ims reset to %02lx\n", ims));  
   
1604      /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1605      happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1606      This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
# Line 1601  for (;;) Line 1622  for (;;)
1622    
1623      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1624        {        {
1625        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, 0, RM12);
1626        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1627        if (flags != 0)    /* Could match an empty string */        if (flags != 0)    /* Could match an empty string */
1628          {          {
1629          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);          RMATCH(eptr, prev, offset_top, md, eptrb, flags, RM50);
1630          RRETURN(rrc);          RRETURN(rrc);
1631          }          }
1632        ecode = prev;        ecode = prev;
# Line 1613  for (;;) Line 1634  for (;;)
1634        }        }
1635      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1636        {        {
1637        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);        RMATCH(eptr, prev, offset_top, md, eptrb, flags, RM13);
1638        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1639        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1640        flags = 0;        flags = 0;
# Line 1621  for (;;) Line 1642  for (;;)
1642        }        }
1643      /* Control never gets here */      /* Control never gets here */
1644    
1645      /* Start of subject unless notbol, or after internal newline if multiline */      /* Not multiline mode: start of subject assertion, unless notbol. */
1646    
1647      case OP_CIRC:      case OP_CIRC:
1648      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1649      if ((ims & PCRE_MULTILINE) != 0)  
       {  
       if (eptr != md->start_subject &&  
           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))  
         MRRETURN(MATCH_NOMATCH);  
       ecode++;  
       break;  
       }  
     /* ... else fall through */  
   
1650      /* Start of subject assertion */      /* Start of subject assertion */
1651    
1652      case OP_SOD:      case OP_SOD:
1653      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1654      ecode++;      ecode++;
1655      break;      break;
1656    
1657        /* Multiline mode: start of subject unless notbol, or after any newline. */
1658    
1659        case OP_CIRCM:
1660        if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1661        if (eptr != md->start_subject &&
1662            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1663          MRRETURN(MATCH_NOMATCH);
1664        ecode++;
1665        break;
1666    
1667      /* Start of match assertion */      /* Start of match assertion */
1668    
# Line 1656  for (;;) Line 1678  for (;;)
1678      ecode++;      ecode++;
1679      break;      break;
1680    
1681      /* Assert before internal newline if multiline, or before a terminating      /* Multiline mode: assert before any newline, or before end of subject
1682      newline unless endonly is set, else end of subject unless noteol is set. */      unless noteol is set. */
1683    
1684      case OP_DOLL:      case OP_DOLLM:
1685      if ((ims & PCRE_MULTILINE) != 0)      if (eptr < md->end_subject)
1686        {        { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
       if (eptr < md->end_subject)  
         { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }  
       else  
         { if (md->noteol) MRRETURN(MATCH_NOMATCH); }  
       ecode++;  
       break;  
       }  
1687      else      else
1688        {        {
1689        if (md->noteol) MRRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1690        if (!md->endonly)        SCHECK_PARTIAL();
         {  
         if (eptr != md->end_subject &&  
             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))  
           MRRETURN(MATCH_NOMATCH);  
         ecode++;  
         break;  
         }  
1691        }        }
1692        ecode++;
1693        break;
1694    
1695        /* Not multiline mode: assert before a terminating newline or before end of
1696        subject unless noteol is set. */
1697    
1698        case OP_DOLL:
1699        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1700        if (!md->endonly) goto ASSERT_NL_OR_EOS;
1701    
1702      /* ... else fall through for endonly */      /* ... else fall through for endonly */
1703    
1704      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1705    
1706      case OP_EOD:      case OP_EOD:
1707      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1708        SCHECK_PARTIAL();
1709      ecode++;      ecode++;
1710      break;      break;
1711    
1712      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1713    
1714      case OP_EODN:      case OP_EODN:
1715      if (eptr != md->end_subject &&      ASSERT_NL_OR_EOS:
1716        if (eptr < md->end_subject &&
1717          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1718        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1719    
1720        /* Either at end of string or \n before end. */
1721    
1722        SCHECK_PARTIAL();
1723      ecode++;      ecode++;
1724      break;      break;
1725    
# Line 1713  for (;;) Line 1737  for (;;)
1737  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1738        if (utf8)        if (utf8)
1739          {          {
1740            /* Get status of previous character */
1741    
1742          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1743            {            {
1744            USPTR lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1745            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1746            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1747            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1748    #ifdef SUPPORT_UCP
1749              if (md->use_ucp)
1750                {
1751                if (c == '_') prev_is_word = TRUE; else
1752                  {
1753                  int cat = UCD_CATEGORY(c);
1754                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1755                  }
1756                }
1757              else
1758    #endif
1759            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1760            }            }
1761    
1762            /* Get status of next character */
1763    
1764          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1765            {            {
1766            SCHECK_PARTIAL();            SCHECK_PARTIAL();
# Line 1729  for (;;) Line 1769  for (;;)
1769          else          else
1770            {            {
1771            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1772    #ifdef SUPPORT_UCP
1773              if (md->use_ucp)
1774                {
1775                if (c == '_') cur_is_word = TRUE; else
1776                  {
1777                  int cat = UCD_CATEGORY(c);
1778                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1779                  }
1780                }
1781              else
1782    #endif
1783            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1784            }            }
1785          }          }
1786        else        else
1787  #endif  #endif
1788    
1789        /* Not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1790          consistency with the behaviour of \w we do use it in this case. */
1791    
1792          {          {
1793            /* Get status of previous character */
1794    
1795          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1796            {            {
1797            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1798    #ifdef SUPPORT_UCP
1799              if (md->use_ucp)
1800                {
1801                c = eptr[-1];
1802                if (c == '_') prev_is_word = TRUE; else
1803                  {
1804                  int cat = UCD_CATEGORY(c);
1805                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1806                  }
1807                }
1808              else
1809    #endif
1810            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1811            }            }
1812    
1813            /* Get status of next character */
1814    
1815          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1816            {            {
1817            SCHECK_PARTIAL();            SCHECK_PARTIAL();
1818            cur_is_word = FALSE;            cur_is_word = FALSE;
1819            }            }
1820          else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          else
1821    #ifdef SUPPORT_UCP
1822            if (md->use_ucp)
1823              {
1824              c = *eptr;
1825              if (c == '_') cur_is_word = TRUE; else
1826                {
1827                int cat = UCD_CATEGORY(c);
1828                cur_is_word = (cat == ucp_L || cat == ucp_N);
1829                }
1830              }
1831            else
1832    #endif
1833            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1834          }          }
1835    
1836        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
# Line 1899  for (;;) Line 1981  for (;;)
1981      switch(c)      switch(c)
1982        {        {
1983        default: MRRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
1984    
1985        case 0x000d:        case 0x000d:
1986        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1987        break;        break;
# Line 2055  for (;;) Line 2138  for (;;)
2138               prop->chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2139               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2140            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2141           break;          break;
2142    
2143          case PT_GC:          case PT_GC:
2144          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
# Line 2072  for (;;) Line 2155  for (;;)
2155            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2156          break;          break;
2157    
2158            /* These are specials */
2159    
2160            case PT_ALNUM:
2161            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2162                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2163              MRRETURN(MATCH_NOMATCH);
2164            break;
2165    
2166            case PT_SPACE:    /* Perl space */
2167            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2168                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2169                   == (op == OP_NOTPROP))
2170              MRRETURN(MATCH_NOMATCH);
2171            break;
2172    
2173            case PT_PXSPACE:  /* POSIX space */
2174            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2175                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2176                 c == CHAR_FF || c == CHAR_CR)
2177                   == (op == OP_NOTPROP))
2178              MRRETURN(MATCH_NOMATCH);
2179            break;
2180    
2181            case PT_WORD:
2182            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2183                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2184                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2185              MRRETURN(MATCH_NOMATCH);
2186            break;
2187    
2188            /* This should never occur */
2189    
2190          default:          default:
2191          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
2192          }          }
# Line 2119  for (;;) Line 2234  for (;;)
2234      loops). */      loops). */
2235    
2236      case OP_REF:      case OP_REF:
2237        {      case OP_REFI:
2238        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      caseless = op == OP_REFI;
2239        ecode += 3;      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2240        ecode += 3;
2241    
2242        /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2243    
2244        (a) In the default, Perl-compatible state, set the length to be longer      (a) In the default, Perl-compatible state, set the length negative;
2245        than the amount of subject left; this ensures that every attempt at a      this ensures that every attempt at a match fails. We can't just fail
2246        match fails. We can't just fail here, because of the possibility of      here, because of the possibility of quantifiers with zero minima.
       quantifiers with zero minima.  
2247    
2248        (b) If the JavaScript compatibility flag is set, set the length to zero      (b) If the JavaScript compatibility flag is set, set the length to zero
2249        so that the back reference matches an empty string.      so that the back reference matches an empty string.
2250    
2251        Otherwise, set the length to the length of what was matched by the      Otherwise, set the length to the length of what was matched by the
2252        referenced subpattern. */      referenced subpattern. */
2253    
2254        if (offset >= offset_top || md->offset_vector[offset] < 0)      if (offset >= offset_top || md->offset_vector[offset] < 0)
2255          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;        length = (md->jscript_compat)? 0 : -1;
2256        else      else
2257          length = md->offset_vector[offset+1] - md->offset_vector[offset];        length = md->offset_vector[offset+1] - md->offset_vector[offset];
2258    
2259        /* Set up for repetition, or handle the non-repeated case */      /* Set up for repetition, or handle the non-repeated case */
2260    
2261        switch (*ecode)      switch (*ecode)
2262          {        {
2263          case OP_CRSTAR:        case OP_CRSTAR:
2264          case OP_CRMINSTAR:        case OP_CRMINSTAR:
2265          case OP_CRPLUS:        case OP_CRPLUS:
2266          case OP_CRMINPLUS:        case OP_CRMINPLUS:
2267          case OP_CRQUERY:        case OP_CRQUERY:
2268          case OP_CRMINQUERY:        case OP_CRMINQUERY:
2269          c = *ecode++ - OP_CRSTAR;        c = *ecode++ - OP_CRSTAR;
2270          minimize = (c & 1) != 0;        minimize = (c & 1) != 0;
2271          min = rep_min[c];                 /* Pick up values from tables; */        min = rep_min[c];                 /* Pick up values from tables; */
2272          max = rep_max[c];                 /* zero for max => infinity */        max = rep_max[c];                 /* zero for max => infinity */
2273          if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2274          break;        break;
2275    
2276          case OP_CRRANGE:        case OP_CRRANGE:
2277          case OP_CRMINRANGE:        case OP_CRMINRANGE:
2278          minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2279          min = GET2(ecode, 1);        min = GET2(ecode, 1);
2280          max = GET2(ecode, 3);        max = GET2(ecode, 3);
2281          if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2282          ecode += 5;        ecode += 5;
2283          break;        break;
2284    
2285          default:               /* No repeat follows */        default:               /* No repeat follows */
2286          if (!match_ref(offset, eptr, length, md, ims))        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2287            {          {
2288            CHECK_PARTIAL();          CHECK_PARTIAL();
2289            MRRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
           }  
         eptr += length;  
         continue;              /* With the main loop */  
2290          }          }
2291          eptr += length;
2292          continue;              /* With the main loop */
2293          }
2294    
2295        /* If the length of the reference is zero, just continue with the      /* Handle repeated back references. If the length of the reference is
2296        main loop. */      zero, just continue with the main loop. */
2297    
2298        if (length == 0) continue;      if (length == 0) continue;
2299    
2300        /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2301        the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
2302        address of eptr, so that eptr can be a register variable. */      address of eptr, so that eptr can be a register variable. */
2303    
2304        for (i = 1; i <= min; i++)      for (i = 1; i <= min; i++)
2305          {
2306          int slength;
2307          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2308          {          {
2309          if (!match_ref(offset, eptr, length, md, ims))          CHECK_PARTIAL();
2310            {          MRRETURN(MATCH_NOMATCH);
           CHECK_PARTIAL();  
           MRRETURN(MATCH_NOMATCH);  
           }  
         eptr += length;  
2311          }          }
2312          eptr += slength;
2313          }
2314    
2315        /* If min = max, continue at the same level without recursion.      /* If min = max, continue at the same level without recursion.
2316        They are not both allowed to be zero. */      They are not both allowed to be zero. */
2317    
2318        if (min == max) continue;      if (min == max) continue;
2319    
2320        /* If minimizing, keep trying and advancing the pointer */      /* If minimizing, keep trying and advancing the pointer */
2321    
2322        if (minimize)      if (minimize)
2323          {
2324          for (fi = min;; fi++)
2325          {          {
2326          for (fi = min;; fi++)          int slength;
2327            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM14);
2328            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2329            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2330            if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2331            {            {
2332            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);            CHECK_PARTIAL();
2333            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            MRRETURN(MATCH_NOMATCH);
           if (fi >= max) MRRETURN(MATCH_NOMATCH);  
           if (!match_ref(offset, eptr, length, md, ims))  
             {  
             CHECK_PARTIAL();  
             MRRETURN(MATCH_NOMATCH);  
             }  
           eptr += length;  
2334            }            }
2335          /* Control never gets here */          eptr += slength;
2336          }          }
2337          /* Control never gets here */
2338          }
2339    
2340        /* If maximizing, find the longest string and work backwards */      /* If maximizing, find the longest string and work backwards */
2341    
2342        else      else
2343          {
2344          pp = eptr;
2345          for (i = min; i < max; i++)
2346          {          {
2347          pp = eptr;          int slength;
2348          for (i = min; i < max; i++)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2349            {            {
2350            if (!match_ref(offset, eptr, length, md, ims))            CHECK_PARTIAL();
2351              {            break;
             CHECK_PARTIAL();  
             break;  
             }  
           eptr += length;  
           }  
         while (eptr >= pp)  
           {  
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);  
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
           eptr -= length;  
2352            }            }
2353          MRRETURN(MATCH_NOMATCH);          eptr += slength;
2354          }          }
2355          while (eptr >= pp)
2356            {
2357            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM15);
2358            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2359            eptr -= length;
2360            }
2361          MRRETURN(MATCH_NOMATCH);
2362        }        }
2363      /* Control never gets here */      /* Control never gets here */
2364    
# Line 2347  for (;;) Line 2464  for (;;)
2464            {            {
2465            for (fi = min;; fi++)            for (fi = min;; fi++)
2466              {              {
2467              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM16);
2468              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2469              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2470              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 2372  for (;;) Line 2489  for (;;)
2489            {            {
2490            for (fi = min;; fi++)            for (fi = min;; fi++)
2491              {              {
2492              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM17);
2493              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2494              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2495              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 2418  for (;;) Line 2535  for (;;)
2535              }              }
2536            for (;;)            for (;;)
2537              {              {
2538              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM18);
2539              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2540              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2541              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2441  for (;;) Line 2558  for (;;)
2558              }              }
2559            while (eptr >= pp)            while (eptr >= pp)
2560              {              {
2561              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM19);
2562              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2563              eptr--;              eptr--;
2564              }              }
# Line 2517  for (;;) Line 2634  for (;;)
2634          {          {
2635          for (fi = min;; fi++)          for (fi = min;; fi++)
2636            {            {
2637            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM20);
2638            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2639            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2640            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 2550  for (;;) Line 2667  for (;;)
2667            }            }
2668          for(;;)          for(;;)
2669            {            {
2670            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM21);
2671            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2672            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2673            if (utf8) BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
# Line 2595  for (;;) Line 2712  for (;;)
2712    
2713      /* Match a single character, caselessly */      /* Match a single character, caselessly */
2714    
2715      case OP_CHARNC:      case OP_CHARI:
2716  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2717      if (utf8)      if (utf8)
2718        {        {
# Line 2655  for (;;) Line 2772  for (;;)
2772      /* Match a single character repeatedly. */      /* Match a single character repeatedly. */
2773    
2774      case OP_EXACT:      case OP_EXACT:
2775        case OP_EXACTI:
2776      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2777      ecode += 3;      ecode += 3;
2778      goto REPEATCHAR;      goto REPEATCHAR;
2779    
2780      case OP_POSUPTO:      case OP_POSUPTO:
2781        case OP_POSUPTOI:
2782      possessive = TRUE;      possessive = TRUE;
2783      /* Fall through */      /* Fall through */
2784    
2785      case OP_UPTO:      case OP_UPTO:
2786        case OP_UPTOI:
2787      case OP_MINUPTO:      case OP_MINUPTO:
2788        case OP_MINUPTOI:
2789      min = 0;      min = 0;
2790      max = GET2(ecode, 1);      max = GET2(ecode, 1);
2791      minimize = *ecode == OP_MINUPTO;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
2792      ecode += 3;      ecode += 3;
2793      goto REPEATCHAR;      goto REPEATCHAR;
2794    
2795      case OP_POSSTAR:      case OP_POSSTAR:
2796        case OP_POSSTARI:
2797      possessive = TRUE;      possessive = TRUE;
2798      min = 0;      min = 0;
2799      max = INT_MAX;      max = INT_MAX;
# Line 2679  for (;;) Line 2801  for (;;)
2801      goto REPEATCHAR;      goto REPEATCHAR;
2802    
2803      case OP_POSPLUS:      case OP_POSPLUS:
2804        case OP_POSPLUSI:
2805      possessive = TRUE;      possessive = TRUE;
2806      min = 1;      min = 1;
2807      max = INT_MAX;      max = INT_MAX;
# Line 2686  for (;;) Line 2809  for (;;)
2809      goto REPEATCHAR;      goto REPEATCHAR;
2810    
2811      case OP_POSQUERY:      case OP_POSQUERY:
2812        case OP_POSQUERYI:
2813      possessive = TRUE;      possessive = TRUE;
2814      min = 0;      min = 0;
2815      max = 1;      max = 1;
# Line 2693  for (;;) Line 2817  for (;;)
2817      goto REPEATCHAR;      goto REPEATCHAR;
2818    
2819      case OP_STAR:      case OP_STAR:
2820        case OP_STARI:
2821      case OP_MINSTAR:      case OP_MINSTAR:
2822        case OP_MINSTARI:
2823      case OP_PLUS:      case OP_PLUS:
2824        case OP_PLUSI:
2825      case OP_MINPLUS:      case OP_MINPLUS:
2826        case OP_MINPLUSI:
2827      case OP_QUERY:      case OP_QUERY:
2828        case OP_QUERYI:
2829      case OP_MINQUERY:      case OP_MINQUERY:
2830      c = *ecode++ - OP_STAR;      case OP_MINQUERYI:
2831        c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
2832      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
   
2833      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2834      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2835      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
# Line 2723  for (;;) Line 2852  for (;;)
2852          {          {
2853  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2854          unsigned int othercase;          unsigned int othercase;
2855          if ((ims & PCRE_CASELESS) != 0 &&          if (op >= OP_STARI &&     /* Caseless */
2856              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
2857            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2858          else oclength = 0;          else oclength = 0;
# Line 2751  for (;;) Line 2880  for (;;)
2880            {            {
2881            for (fi = min;; fi++)            for (fi = min;; fi++)
2882              {              {
2883              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM22);
2884              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2885              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2886              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
# Line 2793  for (;;) Line 2922  for (;;)
2922    
2923            for(;;)            for(;;)
2924              {              {
2925              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM23);
2926              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2927              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2928  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2830  for (;;) Line 2959  for (;;)
2959      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2960        max, eptr));        max, eptr));
2961    
2962      if ((ims & PCRE_CASELESS) != 0)      if (op >= OP_STARI)  /* Caseless */
2963        {        {
2964        fc = md->lcc[fc];        fc = md->lcc[fc];
2965        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
# Line 2847  for (;;) Line 2976  for (;;)
2976          {          {
2977          for (fi = min;; fi++)          for (fi = min;; fi++)
2978            {            {
2979            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM24);
2980            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2981            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2982            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 2877  for (;;) Line 3006  for (;;)
3006    
3007          while (eptr >= pp)          while (eptr >= pp)
3008            {            {
3009            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM25);
3010            eptr--;            eptr--;
3011            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3012            }            }
# Line 2906  for (;;) Line 3035  for (;;)
3035          {          {
3036          for (fi = min;; fi++)          for (fi = min;; fi++)
3037            {            {
3038            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM26);
3039            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3040            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3041            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 2935  for (;;) Line 3064  for (;;)
3064    
3065          while (eptr >= pp)          while (eptr >= pp)
3066            {            {
3067            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM27);
3068            eptr--;            eptr--;
3069            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3070            }            }
# Line 2948  for (;;) Line 3077  for (;;)
3077      checking can be multibyte. */      checking can be multibyte. */
3078    
3079      case OP_NOT:      case OP_NOT:
3080        case OP_NOTI:
3081      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
3082        {        {
3083        SCHECK_PARTIAL();        SCHECK_PARTIAL();
# Line 2955  for (;;) Line 3085  for (;;)
3085        }        }
3086      ecode++;      ecode++;
3087      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3088      if ((ims & PCRE_CASELESS) != 0)      if (op == OP_NOTI)         /* The caseless case */
3089        {        {
3090  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3091        if (c < 256)        if (c < 256)
# Line 2963  for (;;) Line 3093  for (;;)
3093        c = md->lcc[c];        c = md->lcc[c];
3094        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3095        }        }
3096      else      else    /* Caseful */
3097        {        {
3098        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3099        }        }
# Line 2977  for (;;) Line 3107  for (;;)
3107      about... */      about... */
3108    
3109      case OP_NOTEXACT:      case OP_NOTEXACT:
3110        case OP_NOTEXACTI:
3111      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3112      ecode += 3;      ecode += 3;
3113      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3114    
3115      case OP_NOTUPTO:      case OP_NOTUPTO:
3116        case OP_NOTUPTOI:
3117      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
3118        case OP_NOTMINUPTOI:
3119      min = 0;      min = 0;
3120      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3121      minimize = *ecode == OP_NOTMINUPTO;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3122      ecode += 3;      ecode += 3;
3123      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3124    
3125      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
3126        case OP_NOTPOSSTARI:
3127      possessive = TRUE;      possessive = TRUE;
3128      min = 0;      min = 0;
3129      max = INT_MAX;      max = INT_MAX;
# Line 2997  for (;;) Line 3131  for (;;)
3131      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3132    
3133      case OP_NOTPOSPLUS:      case OP_NOTPOSPLUS:
3134        case OP_NOTPOSPLUSI:
3135      possessive = TRUE;      possessive = TRUE;
3136      min = 1;      min = 1;
3137      max = INT_MAX;      max = INT_MAX;
# Line 3004  for (;;) Line 3139  for (;;)
3139      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3140    
3141      case OP_NOTPOSQUERY:      case OP_NOTPOSQUERY:
3142        case OP_NOTPOSQUERYI:
3143      possessive = TRUE;      possessive = TRUE;
3144      min = 0;      min = 0;
3145      max = 1;      max = 1;
# Line 3011  for (;;) Line 3147  for (;;)
3147      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3148    
3149      case OP_NOTPOSUPTO:      case OP_NOTPOSUPTO:
3150        case OP_NOTPOSUPTOI:
3151      possessive = TRUE;      possessive = TRUE;
3152      min = 0;      min = 0;
3153      max = GET2(ecode, 1);      max = GET2(ecode, 1);
# Line 3018  for (;;) Line 3155  for (;;)
3155      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3156    
3157      case OP_NOTSTAR:      case OP_NOTSTAR:
3158        case OP_NOTSTARI:
3159      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
3160        case OP_NOTMINSTARI:
3161      case OP_NOTPLUS:      case OP_NOTPLUS:
3162        case OP_NOTPLUSI:
3163      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
3164        case OP_NOTMINPLUSI:
3165      case OP_NOTQUERY:      case OP_NOTQUERY:
3166        case OP_NOTQUERYI:
3167      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
3168      c = *ecode++ - OP_NOTSTAR;      case OP_NOTMINQUERYI:
3169        c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3170      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
3171      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
3172      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
# Line 3045  for (;;) Line 3188  for (;;)
3188      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3189        max, eptr));        max, eptr));
3190    
3191      if ((ims & PCRE_CASELESS) != 0)      if (op >= OP_NOTSTARI)     /* Caseless */
3192        {        {
3193        fc = md->lcc[fc];        fc = md->lcc[fc];
3194    
# Line 3093  for (;;) Line 3236  for (;;)
3236            register unsigned int d;            register unsigned int d;
3237            for (fi = min;; fi++)            for (fi = min;; fi++)
3238              {              {
3239              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM28);
3240              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3241              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3242              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 3112  for (;;) Line 3255  for (;;)
3255            {            {
3256            for (fi = min;; fi++)            for (fi = min;; fi++)
3257              {              {
3258              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM29);
3259              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3260              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3261              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 3153  for (;;) Line 3296  for (;;)
3296          if (possessive) continue;          if (possessive) continue;
3297          for(;;)          for(;;)
3298              {              {
3299              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM30);
3300              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3301              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3302              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 3176  for (;;) Line 3319  for (;;)
3319            if (possessive) continue;            if (possessive) continue;
3320            while (eptr >= pp)            while (eptr >= pp)
3321              {              {
3322              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM31);
3323              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3324              eptr--;              eptr--;
3325              }              }
# Line 3233  for (;;) Line 3376  for (;;)
3376            register unsigned int d;            register unsigned int d;
3377            for (fi = min;; fi++)            for (fi = min;; fi++)
3378              {              {
3379              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM32);
3380              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3381              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3382              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 3251  for (;;) Line 3394  for (;;)
3394            {            {
3395            for (fi = min;; fi++)            for (fi = min;; fi++)
3396              {              {
3397              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM33);
3398              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3399              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3400              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 3291  for (;;) Line 3434  for (;;)
3434            if (possessive) continue;            if (possessive) continue;
3435            for(;;)            for(;;)
3436              {              {
3437              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM34);
3438              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3439              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3440              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 3314  for (;;) Line 3457  for (;;)
3457            if (possessive) continue;            if (possessive) continue;
3458            while (eptr >= pp)            while (eptr >= pp)
3459              {              {
3460              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM35);
3461              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3462              eptr--;              eptr--;
3463              }              }
# Line 3488  for (;;) Line 3631  for (;;)
3631              }              }
3632            break;            break;
3633    
3634              case PT_ALNUM:
3635              for (i = 1; i <= min; i++)
3636                {
3637                if (eptr >= md->end_subject)
3638                  {
3639                  SCHECK_PARTIAL();
3640                  MRRETURN(MATCH_NOMATCH);
3641                  }
3642                GETCHARINCTEST(c, eptr);
3643                prop_category = UCD_CATEGORY(c);
3644                if ((prop_category == ucp_L || prop_category == ucp_N)
3645                       == prop_fail_result)
3646                  MRRETURN(MATCH_NOMATCH);
3647                }
3648              break;
3649    
3650              case PT_SPACE:    /* Perl space */
3651              for (i = 1; i <= min; i++)
3652                {
3653                if (eptr >= md->end_subject)
3654                  {
3655                  SCHECK_PARTIAL();
3656                  MRRETURN(MATCH_NOMATCH);
3657                  }
3658                GETCHARINCTEST(c, eptr);
3659                prop_category = UCD_CATEGORY(c);
3660                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3661                     c == CHAR_FF || c == CHAR_CR)
3662                       == prop_fail_result)
3663                  MRRETURN(MATCH_NOMATCH);
3664                }
3665              break;
3666    
3667              case PT_PXSPACE:  /* POSIX space */
3668              for (i = 1; i <= min; i++)
3669                {
3670                if (eptr >= md->end_subject)
3671                  {
3672                  SCHECK_PARTIAL();
3673                  MRRETURN(MATCH_NOMATCH);
3674                  }
3675                GETCHARINCTEST(c, eptr);
3676                prop_category = UCD_CATEGORY(c);
3677                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3678                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3679                       == prop_fail_result)
3680                  MRRETURN(MATCH_NOMATCH);
3681                }
3682              break;
3683    
3684              case PT_WORD:
3685              for (i = 1; i <= min; i++)
3686                {
3687                if (eptr >= md->end_subject)
3688                  {
3689                  SCHECK_PARTIAL();
3690                  MRRETURN(MATCH_NOMATCH);
3691                  }
3692                GETCHARINCTEST(c, eptr);
3693                prop_category = UCD_CATEGORY(c);
3694                if ((prop_category == ucp_L || prop_category == ucp_N ||
3695                     c == CHAR_UNDERSCORE)
3696                       == prop_fail_result)
3697                  MRRETURN(MATCH_NOMATCH);
3698                }
3699              break;
3700    
3701              /* This should not occur */
3702    
3703            default:            default:
3704            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
3705            }            }
# Line 3572  for (;;) Line 3784  for (;;)
3784            switch(c)            switch(c)
3785              {              {
3786              default: MRRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3787    
3788              case 0x000d:              case 0x000d:
3789              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3790              break;              break;
# Line 3848  for (;;) Line 4061  for (;;)
4061            switch(*eptr++)            switch(*eptr++)
4062              {              {
4063              default: MRRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4064    
4065              case 0x000d:              case 0x000d:
4066              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4067              break;              break;
4068    
4069              case 0x000a:              case 0x000a:
4070              break;              break;
4071    
# Line 4040  for (;;) Line 4255  for (;;)
4255            case PT_ANY:            case PT_ANY:
4256            for (fi = min;; fi++)            for (fi = min;; fi++)
4257              {              {
4258              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM36);
4259              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4260              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4261              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4048  for (;;) Line 4263  for (;;)
4263                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4264                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4265                }                }
4266              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4267              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4268              }              }
4269            /* Control never gets here */            /* Control never gets here */
# Line 4056  for (;;) Line 4271  for (;;)
4271            case PT_LAMP:            case PT_LAMP:
4272            for (fi = min;; fi++)            for (fi = min;; fi++)
4273              {              {
4274              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM37);
4275              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4276              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4277              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4064  for (;;) Line 4279  for (;;)
4279                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4280                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4281                }                }
4282              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4283              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4284              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4285                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 4076  for (;;) Line 4291  for (;;)
4291            case PT_GC:            case PT_GC:
4292            for (fi = min;; fi++)            for (fi = min;; fi++)
4293              {              {
4294              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM38);
4295              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4296              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4297              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4084  for (;;) Line 4299  for (;;)
4299                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4300                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4301                }                }
4302              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4303              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4304              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4305                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4094  for (;;) Line 4309  for (;;)
4309            case PT_PC:            case PT_PC:
4310            for (fi = min;; fi++)            for (fi = min;; fi++)
4311              {              {
4312              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM39);
4313              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4314              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4315              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4102  for (;;) Line 4317  for (;;)
4317                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4318                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4319                }                }
4320              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4321              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4322              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4323                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4112  for (;;) Line 4327  for (;;)
4327            case PT_SC:            case PT_SC:
4328            for (fi = min;; fi++)            for (fi = min;; fi++)
4329              {              {
4330              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM40);
4331              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4332              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4333              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4120  for (;;) Line 4335  for (;;)
4335                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4336                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4337                }                }
4338              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4339              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4340              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4341                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4342              }              }
4343            /* Control never gets here */            /* Control never gets here */
4344    
4345              case PT_ALNUM:
4346              for (fi = min;; fi++)
4347                {
4348                RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM59);
4349                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4350                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4351                if (eptr >= md->end_subject)
4352                  {
4353                  SCHECK_PARTIAL();
4354                  MRRETURN(MATCH_NOMATCH);
4355                  }
4356                GETCHARINCTEST(c, eptr);
4357                prop_category = UCD_CATEGORY(c);
4358                if ((prop_category == ucp_L || prop_category == ucp_N)
4359                       == prop_fail_result)
4360                  MRRETURN(MATCH_NOMATCH);
4361                }
4362              /* Control never gets here */
4363    
4364              case PT_SPACE:    /* Perl space */
4365              for (fi = min;; fi++)
4366                {
4367                RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM60);
4368                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4369                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4370                if (eptr >= md->end_subject)
4371                  {
4372                  SCHECK_PARTIAL();
4373                  MRRETURN(MATCH_NOMATCH);
4374                  }
4375                GETCHARINCTEST(c, eptr);
4376                prop_category = UCD_CATEGORY(c);
4377                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4378                     c == CHAR_FF || c == CHAR_CR)
4379                       == prop_fail_result)
4380                  MRRETURN(MATCH_NOMATCH);
4381                }
4382              /* Control never gets here */
4383    
4384              case PT_PXSPACE:  /* POSIX space */
4385              for (fi = min;; fi++)
4386                {
4387                RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM61);
4388                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4389                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4390                if (eptr >= md->end_subject)
4391                  {
4392                  SCHECK_PARTIAL();
4393                  MRRETURN(MATCH_NOMATCH);
4394                  }
4395                GETCHARINCTEST(c, eptr);
4396                prop_category = UCD_CATEGORY(c);
4397                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4398                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4399                       == prop_fail_result)
4400                  MRRETURN(MATCH_NOMATCH);
4401                }
4402              /* Control never gets here */
4403    
4404              case PT_WORD:
4405              for (fi = min;; fi++)
4406                {
4407                RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM62);
4408                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4409                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4410                if (eptr >= md->end_subject)
4411                  {
4412                  SCHECK_PARTIAL();
4413                  MRRETURN(MATCH_NOMATCH);
4414                  }
4415                GETCHARINCTEST(c, eptr);
4416                prop_category = UCD_CATEGORY(c);
4417                if ((prop_category == ucp_L ||
4418                     prop_category == ucp_N ||
4419                     c == CHAR_UNDERSCORE)
4420                       == prop_fail_result)
4421                  MRRETURN(MATCH_NOMATCH);
4422                }
4423              /* Control never gets here */
4424    
4425              /* This should never occur */
4426    
4427            default:            default:
4428            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
4429            }            }
# Line 4139  for (;;) Line 4436  for (;;)
4436          {          {
4437          for (fi = min;; fi++)          for (fi = min;; fi++)
4438            {            {
4439            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM41);
4440            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4441            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4442            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 4171  for (;;) Line 4468  for (;;)
4468          {          {
4469          for (fi = min;; fi++)          for (fi = min;; fi++)
4470            {            {
4471            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM42);
4472            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4473            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4474            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 4334  for (;;) Line 4631  for (;;)
4631          {          {
4632          for (fi = min;; fi++)          for (fi = min;; fi++)
4633            {            {
4634            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM43);
4635            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4636            if (fi >= max) MRRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4637            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 4473  for (;;) Line 4770  for (;;)
4770                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4771                break;                break;
4772                }                }
4773              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4774              if (prop_fail_result) break;              if (prop_fail_result) break;
4775              eptr+= len;              eptr+= len;
4776              }              }
# Line 4488  for (;;) Line 4785  for (;;)
4785                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4786                break;                break;
4787                }                }
4788              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4789              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4790              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4791                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 4507  for (;;) Line 4804  for (;;)
4804                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4805                break;                break;
4806                }                }
4807              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4808              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4809              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4810                break;                break;
# Line 4524  for (;;) Line 4821  for (;;)
4821                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4822                break;                break;
4823                }                }
4824              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4825              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4826              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4827                break;                break;
# Line 4541  for (;;) Line 4838  for (;;)
4838                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4839                break;                break;
4840                }                }
4841              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4842              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4843              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4844                break;                break;
4845              eptr+= len;              eptr+= len;
4846              }              }
4847            break;            break;
4848    
4849              case PT_ALNUM:
4850              for (i = min; i < max; i++)
4851                {
4852                int len = 1;
4853                if (eptr >= md->end_subject)
4854                  {
4855                  SCHECK_PARTIAL();
4856                  break;
4857                  }
4858                GETCHARLENTEST(c, eptr, len);
4859                prop_category = UCD_CATEGORY(c);
4860                if ((prop_category == ucp_L || prop_category == ucp_N)
4861                     == prop_fail_result)
4862                  break;
4863                eptr+= len;
4864                }
4865              break;
4866    
4867              case PT_SPACE:    /* Perl space */
4868              for (i = min; i < max; i++)
4869                {
4870                int len = 1;
4871                if (eptr >= md->end_subject)
4872                  {
4873                  SCHECK_PARTIAL();
4874                  break;
4875                  }
4876                GETCHARLENTEST(c, eptr, len);
4877                prop_category = UCD_CATEGORY(c);
4878                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4879                     c == CHAR_FF || c == CHAR_CR)
4880                     == prop_fail_result)
4881                  break;
4882                eptr+= len;
4883                }
4884              break;
4885    
4886              case PT_PXSPACE:  /* POSIX space */
4887              for (i = min; i < max; i++)
4888                {
4889                int len = 1;
4890                if (eptr >= md->end_subject)
4891                  {
4892                  SCHECK_PARTIAL();
4893                  break;
4894                  }
4895                GETCHARLENTEST(c, eptr, len);
4896                prop_category = UCD_CATEGORY(c);
4897                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4898                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4899                     == prop_fail_result)
4900                  break;
4901                eptr+= len;
4902                }
4903              break;
4904    
4905              case PT_WORD:
4906              for (i = min; i < max; i++)
4907                {
4908                int len = 1;
4909                if (eptr >= md->end_subject)
4910                  {
4911                  SCHECK_PARTIAL();
4912                  break;
4913                  }
4914                GETCHARLENTEST(c, eptr, len);
4915                prop_category = UCD_CATEGORY(c);
4916                if ((prop_category == ucp_L || prop_category == ucp_N ||
4917                     c == CHAR_UNDERSCORE) == prop_fail_result)
4918                  break;
4919                eptr+= len;
4920                }
4921              break;
4922    
4923              default:
4924              RRETURN(PCRE_ERROR_INTERNAL);
4925            }            }
4926    
4927          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
# Line 4555  for (;;) Line 4929  for (;;)
4929          if (possessive) continue;          if (possessive) continue;
4930          for(;;)          for(;;)
4931            {            {
4932            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM44);
4933            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4934            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
4935            if (utf8) BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
# Line 4596  for (;;) Line 4970  for (;;)
4970    
4971          for(;;)          for(;;)
4972            {            {
4973            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM45);
4974            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4975            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
4976            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
# Line 4880  for (;;) Line 5254  for (;;)
5254            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5255            }            }
5256    
5257          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run. If possessive, we are
5258            done (no backing up). Otherwise, match at this position; anything other
5259            than no match is immediately returned. For nomatch, back up one
5260            character, unless we are matching \R and the last thing matched was
5261            \r\n, in which case, back up two bytes. */
5262    
5263          if (possessive) continue;          if (possessive) continue;
5264          for(;;)          for(;;)
5265            {            {
5266            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM46);
5267            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5268            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5269            BACKCHAR(eptr);            BACKCHAR(eptr);
5270              if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
5271                  eptr[-1] == '\r') eptr--;
5272            }            }
5273          }          }
5274        else        else
# Line 5087  for (;;) Line 5467  for (;;)
5467            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5468            }            }
5469    
5470          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run. If possessive, we are
5471            done (no backing up). Otherwise, match at this position; anything other
5472            than no match is immediately returned. For nomatch, back up one
5473            character (byte), unless we are matching \R and the last thing matched
5474            was \r\n, in which case, back up two bytes. */
5475    
5476          if (possessive) continue;          if (possessive) continue;
5477          while (eptr >= pp)          while (eptr >= pp)
5478            {            {
5479            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM47);
           eptr--;  
5480            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5481              eptr--;
5482              if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
5483                  eptr[-1] == '\r') eptr--;
5484            }            }
5485          }          }
5486    
# Line 5133  switch (frame->Xwhere) Line 5519  switch (frame->Xwhere)
5519    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5520    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5521    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5522    LBL(53) LBL(54)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
5523  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5524    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5525    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
5526  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5527    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5528      LBL(59) LBL(60) LBL(61) LBL(62)
5529  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
5530  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
5531    default:    default:
# Line 5161  Undefine all the macros that were define Line 5548  Undefine all the macros that were define
5548  #undef ecode  #undef ecode
5549  #undef mstart  #undef mstart
5550  #undef offset_top  #undef offset_top
 #undef ims  
5551  #undef eptrb  #undef eptrb
5552  #undef flags  #undef flags
5553    
# Line 5179  Undefine all the macros that were define Line 5565  Undefine all the macros that were define
5565  #undef condition  #undef condition
5566  #undef prev_is_word  #undef prev_is_word
5567    
 #undef original_ims  
   
5568  #undef ctype  #undef ctype
5569  #undef length  #undef length
5570  #undef max  #undef max
# Line 5242  int first_byte = -1; Line 5626  int first_byte = -1;
5626  int req_byte = -1;  int req_byte = -1;
5627  int req_byte2 = -1;  int req_byte2 = -1;
5628  int newline;  int newline;
 unsigned long int ims;  
5629  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
5630  BOOL anchored;  BOOL anchored;
5631  BOOL startline;  BOOL startline;
# Line 5272  if ((options & ~PUBLIC_EXEC_OPTIONS) != Line 5655  if ((options & ~PUBLIC_EXEC_OPTIONS) !=
5655  if (re == NULL || subject == NULL ||  if (re == NULL || subject == NULL ||
5656     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5657  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5658    if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5659    
5660  /* This information is for finding all the numbers associated with a given  /* This information is for finding all the numbers associated with a given
5661  name, for condition testing. */  name, for condition testing. */
# Line 5342  end_subject = md->end_subject; Line 5726  end_subject = md->end_subject;
5726    
5727  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
5728  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5729    md->use_ucp = (re->options & PCRE_UCP) != 0;
5730  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5731    
5732  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
# Line 5431  defined (though never set). So there's n Line 5816  defined (though never set). So there's n
5816  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5817    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
5818    
5819  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Pass back the character offset and error
5820  back the character offset. */  code if a results vector is available. */
5821    
5822  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5823  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5824    {    {
5825    if (_pcre_valid_utf8((USPTR)subject, length) >= 0)    int errorcode;
5826      return PCRE_ERROR_BADUTF8;    int tb = _pcre_valid_utf8((USPTR)subject, length, &errorcode);
5827    if (start_offset > 0 && start_offset < length)    if (tb >= 0)
5828      {      {
5829      int tb = ((USPTR)subject)[start_offset];      if (offsetcount >= 2)
     if (tb > 127)  
5830        {        {
5831        tb &= 0xc0;        offsets[0] = tb;
5832        if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;        offsets[1] = errorcode;
5833        }        }
5834        return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
5835          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5836        }
5837      if (start_offset > 0 && start_offset < length)
5838        {
5839        tb = ((USPTR)subject)[start_offset] & 0xc0;
5840        if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
5841      }      }
5842    }    }
5843  #endif  #endif
5844    
 /* The ims options can vary during the matching as a result of the presence  
 of (?ims) items in the pattern. They are kept in a local variable so that  
 restoring at the exit of a group is easy. */  
   
 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);  
   
5845  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
5846  hold, we get a temporary chunk of working store to use during the matching.  hold, we get a temporary chunk of working store to use during the matching.
5847  Otherwise, we can use the vector supplied, rounding down its size to a multiple  Otherwise, we can use the vector supplied, rounding down its size to a multiple
# Line 5574  for(;;) Line 5959  for(;;)
5959    /* There are some optimizations that avoid running the match if a known    /* There are some optimizations that avoid running the match if a known
5960    starting point is not found, or if a known later character is not present.    starting point is not found, or if a known later character is not present.
5961    However, there is an option that disables these, for testing and for ensuring    However, there is an option that disables these, for testing and for ensuring
5962    that all callouts do actually occur. */    that all callouts do actually occur. The option can be set in the regex by
5963      (*NO_START_OPT) or passed in match-time options. */
5964    
5965    if ((options & PCRE_NO_START_OPTIMIZE) == 0)    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
5966      {      {
5967      /* Advance to a unique first byte if there is one. */      /* Advance to a unique first byte if there is one. */
5968    
# Line 5630  for(;;) Line 6016  for(;;)
6016        while (start_match < end_subject)        while (start_match < end_subject)
6017          {          {
6018          register unsigned int c = *start_match;          register unsigned int c = *start_match;
6019          if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;          if ((start_bits[c/8] & (1 << (c&7))) == 0)
6020            else break;            {
6021              start_match++;
6022    #ifdef SUPPORT_UTF8
6023              if (utf8)
6024                while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
6025                  start_match++;
6026    #endif
6027              }
6028            else break;
6029          }          }
6030        }        }
6031      }   /* Starting optimizations */      }   /* Starting optimizations */
# Line 5722  for(;;) Line 6116  for(;;)
6116    
6117    /* OK, we can now run the match. If "hitend" is set afterwards, remember the    /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6118    first starting point for which a partial match was found. */    first starting point for which a partial match was found. */
6119    
6120    md->start_match_ptr = start_match;    md->start_match_ptr = start_match;
6121    md->start_used_ptr = start_match;    md->start_used_ptr = start_match;
6122    md->match_call_count = 0;    md->match_call_count = 0;
6123    rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,    rc = match(start_match, md->start_code, start_match, NULL, 2, md, NULL,
6124      0, 0);      0, 0);
6125    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6126    
6127    switch(rc)    switch(rc)
6128      {      {
6129      /* NOMATCH and PRUNE advance by one character. If MATCH_SKIP_ARG reaches      /* SKIP passes back the next starting point explicitly, but if it is the
6130      this level it means that a MARK that matched the SKIP's arg was not found.      same as the match we have just done, treat it as NOMATCH. */
6131      We treat this as NOMATCH. THEN at this level acts exactly like PRUNE. */  
6132        case MATCH_SKIP:
6133        if (md->start_match_ptr != start_match)
6134          {
6135          new_start_match = md->start_match_ptr;
6136          break;
6137          }
6138        /* Fall through */
6139    
6140        /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6141        the SKIP's arg was not found. We also treat this as NOMATCH. */
6142    
6143        case MATCH_SKIP_ARG:
6144        /* Fall through */
6145    
6146        /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6147        exactly like PRUNE. */
6148    
6149      case MATCH_NOMATCH:      case MATCH_NOMATCH:
6150      case MATCH_PRUNE:      case MATCH_PRUNE:
     case MATCH_SKIP_ARG:  
6151      case MATCH_THEN:      case MATCH_THEN:
6152      new_start_match = start_match + 1;      new_start_match = start_match + 1;
6153  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 5748  for(;;) Line 6157  for(;;)
6157  #endif  #endif
6158      break;      break;
6159    
     /* SKIP passes back the next starting point explicitly. */  
   
     case MATCH_SKIP:  
     new_start_match = md->start_match_ptr;  
     break;  
   
6160      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6161    
6162      case MATCH_COMMIT:      case MATCH_COMMIT:
# Line 5850  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6253  if (rc == MATCH_MATCH || rc == MATCH_ACC
6253    
6254    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
6255      {      {
6256      offsets[0] = md->start_match_ptr - md->start_subject;      offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6257      offsets[1] = md->end_match_ptr - md->start_subject;      offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6258      }      }
6259    
6260    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
# Line 5866  if (using_temporary_offsets) Line 6269  if (using_temporary_offsets)
6269    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
6270    (pcre_free)(md->offset_vector);    (pcre_free)(md->offset_vector);
6271    }    }
6272    
6273  /* For anything other than nomatch or partial match, just return the code. */  /* For anything other than nomatch or partial match, just return the code. */
6274    
6275  if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)  if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
6276    {    {
6277    DPRINTF((">>>> error: returning %d\n", rc));    DPRINTF((">>>> error: returning %d\n", rc));
6278    return rc;    return rc;
6279    }    }
6280    
6281  /* Handle partial matches - disable any mark data */  /* Handle partial matches - disable any mark data */
6282    
6283  if (start_partial != NULL)  if (start_partial != NULL)
6284    {    {
6285    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
6286    md->mark = NULL;    md->mark = NULL;
6287    if (offsetcount > 1)    if (offsetcount > 1)
6288      {      {
6289      offsets[0] = start_partial - (USPTR)subject;      offsets[0] = (int)(start_partial - (USPTR)subject);
6290      offsets[1] = end_subject - (USPTR)subject;      offsets[1] = (int)(end_subject - (USPTR)subject);
6291      }      }
6292    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
6293    }    }
6294    
6295  /* This is the classic nomatch case */  /* This is the classic nomatch case */
6296    
6297  else  else
6298    {    {
6299    DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));    DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
6300    rc = PCRE_ERROR_NOMATCH;    rc = PCRE_ERROR_NOMATCH;
6301    }    }
6302    
6303  /* Return the MARK data if it has been requested. */  /* Return the MARK data if it has been requested. */
6304    
6305  RETURN_MARK:  RETURN_MARK:
6306    
6307  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6308    *(extra_data->mark) = (unsigned char *)(md->mark);    *(extra_data->mark) = (unsigned char *)(md->mark);
6309  return rc;  return rc;
6310  }  }
6311    
6312  /* End of pcre_exec.c */  /* End of pcre_exec.c */

Legend:
Removed from v.511  
changed lines
  Added in v.602

  ViewVC Help
Powered by ViewVC 1.1.5