/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 517 by ph10, Wed May 5 10:44:20 2010 UTC revision 600 by ph10, Mon May 9 08:54:11 2011 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2010 University of Cambridge             Copyright (c) 1997-2011 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 132  while (length-- > 0) Line 132  while (length-- > 0)
132  *          Match a back-reference                *  *          Match a back-reference                *
133  *************************************************/  *************************************************/
134    
135  /* If a back reference hasn't been set, the length that is passed is greater  /* Normally, if a back reference hasn't been set, the length that is passed is
136  than the number of characters left in the string, so the match fails.  negative, so the match always fails. However, in JavaScript compatibility mode,
137    the length passed is zero. Note that in caseless UTF-8 mode, the number of
138    subject bytes matched may be different to the number of reference bytes.
139    
140  Arguments:  Arguments:
141    offset      index into the offset vector    offset      index into the offset vector
142    eptr        points into the subject    eptr        pointer into the subject
143    length      length to be matched    length      length of reference to be matched (number of bytes)
144    md          points to match data block    md          points to match data block
145    ims         the ims flags    ims         the ims flags
146    
147  Returns:      TRUE if matched  Returns:      < 0 if not matched, otherwise the number of subject bytes matched
148  */  */
149    
150  static BOOL  static int
151  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
152    unsigned long int ims)    unsigned long int ims)
153  {  {
154  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR eptr_start = eptr;
155    register USPTR p = md->start_subject + md->offset_vector[offset];
156    
157  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
158  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 164  pchars(p, length, FALSE, md); Line 167  pchars(p, length, FALSE, md);
167  printf("\n");  printf("\n");
168  #endif  #endif
169    
170  /* Always fail if not enough characters left */  /* Always fail if reference not set (and not JavaScript compatible). */
171    
172  if (length > md->end_subject - eptr) return FALSE;  if (length < 0) return -1;
173    
174  /* Separate the caseless case for speed. In UTF-8 mode we can only do this  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175  properly if Unicode properties are supported. Otherwise, we can check only  properly if Unicode properties are supported. Otherwise, we can check only
# Line 178  if ((ims & PCRE_CASELESS) != 0) Line 181  if ((ims & PCRE_CASELESS) != 0)
181  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
182    if (md->utf8)    if (md->utf8)
183      {      {
184      USPTR endptr = eptr + length;      /* Match characters up to the end of the reference. NOTE: the number of
185      while (eptr < endptr)      bytes matched may differ, because there are some characters whose upper and
186        lower case versions code as different numbers of bytes. For example, U+023A
187        (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188        a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189        the latter. It is important, therefore, to check the length along the
190        reference, not along the subject (earlier code did this wrong). */
191    
192        USPTR endptr = p + length;
193        while (p < endptr)
194        {        {
195        int c, d;        int c, d;
196          if (eptr >= md->end_subject) return -1;
197        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
198        GETCHARINC(d, p);        GETCHARINC(d, p);
199        if (c != d && c != UCD_OTHERCASE(d)) return FALSE;        if (c != d && c != UCD_OTHERCASE(d)) return -1;
200        }        }
201      }      }
202    else    else
# Line 193  if ((ims & PCRE_CASELESS) != 0) Line 205  if ((ims & PCRE_CASELESS) != 0)
205    
206    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
207    is no UCP support. */    is no UCP support. */
208        {
209    while (length-- > 0)      if (eptr + length > md->end_subject) return -1;
210      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }      while (length-- > 0)
211          { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
212        }
213    }    }
214    
215  /* In the caseful case, we can just compare the bytes, whether or not we  /* In the caseful case, we can just compare the bytes, whether or not we
216  are in UTF-8 mode. */  are in UTF-8 mode. */
217    
218  else  else
219    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    {
220      if (eptr + length > md->end_subject) return -1;
221      while (length-- > 0) if (*p++ != *eptr++) return -1;
222      }
223    
224  return TRUE;  return eptr - eptr_start;
225  }  }
226    
227    
# Line 255  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 272  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
272         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
273         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
274         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
275         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58 };         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
276           RM61,  RM62 };
277    
278  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
279  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 293  argument of match(), which never changes Line 311  argument of match(), which never changes
311    
312  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
313    {\    {\
314    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
315      if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
316    frame->Xwhere = rw; \    frame->Xwhere = rw; \
317    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
318    newframe->Xecode = rb;\    newframe->Xecode = rb;\
# Line 314  argument of match(), which never changes Line 333  argument of match(), which never changes
333    
334  #define RRETURN(ra)\  #define RRETURN(ra)\
335    {\    {\
336    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
337    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
338    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
339    if (frame != NULL)\    if (frame != NULL)\
340      {\      {\
341      rrc = ra;\      rrc = ra;\
# Line 420  immediately. The second one is used when Line 439  immediately. The second one is used when
439  the subject. */  the subject. */
440    
441  #define CHECK_PARTIAL()\  #define CHECK_PARTIAL()\
442    if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\    if (md->partial != 0 && eptr >= md->end_subject && \
443      {\        eptr > md->start_used_ptr) \
444      md->hitend = TRUE;\      { \
445      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\      md->hitend = TRUE; \
446        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
447      }      }
448    
449  #define SCHECK_PARTIAL()\  #define SCHECK_PARTIAL()\
450    if (md->partial != 0 && eptr > mstart)\    if (md->partial != 0 && eptr > md->start_used_ptr) \
451      {\      { \
452      md->hitend = TRUE;\      md->hitend = TRUE; \
453      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
454      }      }
455    
456    
# Line 486  heap storage. Set up the top-level frame Line 506  heap storage. Set up the top-level frame
506  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  heap whenever RMATCH() does a "recursion". See the macro definitions above. */
507    
508  #ifdef NO_RECURSE  #ifdef NO_RECURSE
509  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
510    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
511  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
512    
513  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 707  for (;;) Line 728  for (;;)
728      case OP_FAIL:      case OP_FAIL:
729      MRRETURN(MATCH_NOMATCH);      MRRETURN(MATCH_NOMATCH);
730    
731        /* COMMIT overrides PRUNE, SKIP, and THEN */
732    
733      case OP_COMMIT:      case OP_COMMIT:
734      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
735        ims, eptrb, flags, RM52);        ims, eptrb, flags, RM52);
736      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
737            rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
738            rrc != MATCH_THEN)
739          RRETURN(rrc);
740      MRRETURN(MATCH_COMMIT);      MRRETURN(MATCH_COMMIT);
741    
742        /* PRUNE overrides THEN */
743    
744      case OP_PRUNE:      case OP_PRUNE:
745      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
746        ims, eptrb, flags, RM51);        ims, eptrb, flags, RM51);
747      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
748      MRRETURN(MATCH_PRUNE);      MRRETURN(MATCH_PRUNE);
749    
750      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
751      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
752        ims, eptrb, flags, RM56);        ims, eptrb, flags, RM56);
753      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
754      md->mark = ecode + 2;      md->mark = ecode + 2;
755      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
756    
757        /* SKIP overrides PRUNE and THEN */
758    
759      case OP_SKIP:      case OP_SKIP:
760      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
761        ims, eptrb, flags, RM53);        ims, eptrb, flags, RM53);
762      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
763          RRETURN(rrc);
764      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
765      MRRETURN(MATCH_SKIP);      MRRETURN(MATCH_SKIP);
766    
767      case OP_SKIP_ARG:      case OP_SKIP_ARG:
768      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
769        ims, eptrb, flags, RM57);        ims, eptrb, flags, RM57);
770      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
771          RRETURN(rrc);
772    
773      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
774      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
# Line 746  for (;;) Line 778  for (;;)
778      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
779      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
780    
781        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
782        the alt that is at the start of the current branch. This makes it possible
783        to skip back past alternatives that precede the THEN within the current
784        branch. */
785    
786      case OP_THEN:      case OP_THEN:
787      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
788        ims, eptrb, flags, RM54);        ims, eptrb, flags, RM54);
789      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
790        md->start_match_ptr = ecode - GET(ecode, 1);
791      MRRETURN(MATCH_THEN);      MRRETURN(MATCH_THEN);
792    
793      case OP_THEN_ARG:      case OP_THEN_ARG:
794      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
795        ims, eptrb, flags, RM58);        offset_top, md, ims, eptrb, flags, RM58);
796      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
797      md->mark = ecode + 2;      md->start_match_ptr = ecode - GET(ecode, 1);
798        md->mark = ecode + LINK_SIZE + 2;
799      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
800    
801      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
# Line 793  for (;;) Line 832  for (;;)
832        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
833    
834        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
835        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
836            (int)(eptr - md->start_subject);
837    
838        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
839        do        do
840          {          {
841          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
842            ims, eptrb, flags, RM1);            ims, eptrb, flags, RM1);
843          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
844                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
845              RRETURN(rrc);
846          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
847          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
848          }          }
# Line 861  for (;;) Line 903  for (;;)
903    
904        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
905          eptrb, flags, RM2);          eptrb, flags, RM2);
906        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
907              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
908            RRETURN(rrc);
909        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
910        }        }
911      /* Control never reaches here. */      /* Control never reaches here. */
# Line 888  for (;;) Line 932  for (;;)
932          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
933          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
934          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
935          cb.subject_length   = md->end_subject - md->start_subject;          cb.subject_length   = (int)(md->end_subject - md->start_subject);
936          cb.start_match      = mstart - md->start_subject;          cb.start_match      = (int)(mstart - md->start_subject);
937          cb.current_position = eptr - md->start_subject;          cb.current_position = (int)(eptr - md->start_subject);
938          cb.pattern_position = GET(ecode, LINK_SIZE + 3);          cb.pattern_position = GET(ecode, LINK_SIZE + 3);
939          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
940          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
# Line 1062  for (;;) Line 1106  for (;;)
1106          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1107          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1108          }          }
1109        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)        else if (rrc != MATCH_NOMATCH &&
1110                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1111          {          {
1112          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1113          }          }
# Line 1116  for (;;) Line 1161  for (;;)
1161        {        {
1162        md->offset_vector[offset] =        md->offset_vector[offset] =
1163          md->offset_vector[md->offset_end - number];          md->offset_vector[md->offset_end - number];
1164        md->offset_vector[offset+1] = eptr - md->start_subject;        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1165        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1166        }        }
1167      ecode += 3;      ecode += 3;
# Line 1190  for (;;) Line 1235  for (;;)
1235          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1236          break;          break;
1237          }          }
1238        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1239              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1240            RRETURN(rrc);
1241        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1242        }        }
1243      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1224  for (;;) Line 1271  for (;;)
1271          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1272          break;          break;
1273          }          }
1274        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1275              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1276            RRETURN(rrc);
1277        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1278        }        }
1279      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1279  for (;;) Line 1328  for (;;)
1328        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1329        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1330        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1331        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1332        cb.start_match      = mstart - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1333        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1334        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1335        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1336        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
# Line 1361  for (;;) Line 1410  for (;;)
1410              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1411            MRRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1412            }            }
1413          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH &&
1414                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1415            {            {
1416            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1417            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1404  for (;;) Line 1454  for (;;)
1454          mstart = md->start_match_ptr;          mstart = md->start_match_ptr;
1455          break;          break;
1456          }          }
1457        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1458              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1459            RRETURN(rrc);
1460        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1461        }        }
1462      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1558  for (;;) Line 1610  for (;;)
1610          {          {
1611          md->offset_vector[offset] =          md->offset_vector[offset] =
1612            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1613          md->offset_vector[offset+1] = eptr - md->start_subject;          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1614          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1615          }          }
1616    
# Line 1670  for (;;) Line 1722  for (;;)
1722        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1723          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1724        else        else
1725          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }          {
1726            if (md->noteol) MRRETURN(MATCH_NOMATCH);
1727            SCHECK_PARTIAL();
1728            }
1729        ecode++;        ecode++;
1730        break;        break;
1731        }        }
1732      else      else  /* Not multiline */
1733        {        {
1734        if (md->noteol) MRRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1735        if (!md->endonly)        if (!md->endonly) goto ASSERT_NL_OR_EOS;
         {  
         if (eptr != md->end_subject &&  
             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))  
           MRRETURN(MATCH_NOMATCH);  
         ecode++;  
         break;  
         }  
1736        }        }
1737    
1738      /* ... else fall through for endonly */      /* ... else fall through for endonly */
1739    
1740      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1741    
1742      case OP_EOD:      case OP_EOD:
1743      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1744        SCHECK_PARTIAL();
1745      ecode++;      ecode++;
1746      break;      break;
1747    
1748      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1749    
1750      case OP_EODN:      case OP_EODN:
1751      if (eptr != md->end_subject &&      ASSERT_NL_OR_EOS:
1752        if (eptr < md->end_subject &&
1753          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1754        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1755    
1756        /* Either at end of string or \n before end. */
1757    
1758        SCHECK_PARTIAL();
1759      ecode++;      ecode++;
1760      break;      break;
1761    
# Line 1718  for (;;) Line 1773  for (;;)
1773  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1774        if (utf8)        if (utf8)
1775          {          {
1776            /* Get status of previous character */
1777    
1778          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1779            {            {
1780            USPTR lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1781            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1782            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1783            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1784    #ifdef SUPPORT_UCP
1785              if (md->use_ucp)
1786                {
1787                if (c == '_') prev_is_word = TRUE; else
1788                  {
1789                  int cat = UCD_CATEGORY(c);
1790                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1791                  }
1792                }
1793              else
1794    #endif
1795            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1796            }            }
1797    
1798            /* Get status of next character */
1799    
1800          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1801            {            {
1802            SCHECK_PARTIAL();            SCHECK_PARTIAL();
# Line 1734  for (;;) Line 1805  for (;;)
1805          else          else
1806            {            {
1807            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1808    #ifdef SUPPORT_UCP
1809              if (md->use_ucp)
1810                {
1811                if (c == '_') cur_is_word = TRUE; else
1812                  {
1813                  int cat = UCD_CATEGORY(c);
1814                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1815                  }
1816                }
1817              else
1818    #endif
1819            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1820            }            }
1821          }          }
1822        else        else
1823  #endif  #endif
1824    
1825        /* Not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1826          consistency with the behaviour of \w we do use it in this case. */
1827    
1828          {          {
1829            /* Get status of previous character */
1830    
1831          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1832            {            {
1833            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1834    #ifdef SUPPORT_UCP
1835              if (md->use_ucp)
1836                {
1837                c = eptr[-1];
1838                if (c == '_') prev_is_word = TRUE; else
1839                  {
1840                  int cat = UCD_CATEGORY(c);
1841                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1842                  }
1843                }
1844              else
1845    #endif
1846            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1847            }            }
1848    
1849            /* Get status of next character */
1850    
1851          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1852            {            {
1853            SCHECK_PARTIAL();            SCHECK_PARTIAL();
1854            cur_is_word = FALSE;            cur_is_word = FALSE;
1855            }            }
1856          else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          else
1857    #ifdef SUPPORT_UCP
1858            if (md->use_ucp)
1859              {
1860              c = *eptr;
1861              if (c == '_') cur_is_word = TRUE; else
1862                {
1863                int cat = UCD_CATEGORY(c);
1864                cur_is_word = (cat == ucp_L || cat == ucp_N);
1865                }
1866              }
1867            else
1868    #endif
1869            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1870          }          }
1871    
1872        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
# Line 1904  for (;;) Line 2017  for (;;)
2017      switch(c)      switch(c)
2018        {        {
2019        default: MRRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
2020    
2021        case 0x000d:        case 0x000d:
2022        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2023        break;        break;
# Line 2076  for (;;) Line 2190  for (;;)
2190          if ((ecode[2] != prop->script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2191            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2192          break;          break;
2193    
2194          /* These are specials */          /* These are specials */
2195    
2196          case PT_ALNUM:          case PT_ALNUM:
2197          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2198               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2199            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2200          break;          break;
2201    
2202          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2203          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2204               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2205                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2206            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2207          break;          break;
2208    
2209          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2210          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2211               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2212               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2213                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2214            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2215          break;          break;
2216    
2217          case PT_WORD:          case PT_WORD:
2218          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2219               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2220               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2221            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2222          break;          break;
2223    
2224          /* This should never occur */          /* This should never occur */
2225    
2226          default:          default:
# Line 2156  for (;;) Line 2270  for (;;)
2270      loops). */      loops). */
2271    
2272      case OP_REF:      case OP_REF:
2273        {      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2274        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      ecode += 3;
       ecode += 3;  
2275    
2276        /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2277    
2278        (a) In the default, Perl-compatible state, set the length to be longer      (a) In the default, Perl-compatible state, set the length negative;
2279        than the amount of subject left; this ensures that every attempt at a      this ensures that every attempt at a match fails. We can't just fail
2280        match fails. We can't just fail here, because of the possibility of      here, because of the possibility of quantifiers with zero minima.
       quantifiers with zero minima.  
2281    
2282        (b) If the JavaScript compatibility flag is set, set the length to zero      (b) If the JavaScript compatibility flag is set, set the length to zero
2283        so that the back reference matches an empty string.      so that the back reference matches an empty string.
2284    
2285        Otherwise, set the length to the length of what was matched by the      Otherwise, set the length to the length of what was matched by the
2286        referenced subpattern. */      referenced subpattern. */
2287    
2288        if (offset >= offset_top || md->offset_vector[offset] < 0)      if (offset >= offset_top || md->offset_vector[offset] < 0)
2289          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;        length = (md->jscript_compat)? 0 : -1;
2290        else      else
2291          length = md->offset_vector[offset+1] - md->offset_vector[offset];        length = md->offset_vector[offset+1] - md->offset_vector[offset];
2292    
2293        /* Set up for repetition, or handle the non-repeated case */      /* Set up for repetition, or handle the non-repeated case */
2294    
2295        switch (*ecode)      switch (*ecode)
2296          {        {
2297          case OP_CRSTAR:        case OP_CRSTAR:
2298          case OP_CRMINSTAR:        case OP_CRMINSTAR:
2299          case OP_CRPLUS:        case OP_CRPLUS:
2300          case OP_CRMINPLUS:        case OP_CRMINPLUS:
2301          case OP_CRQUERY:        case OP_CRQUERY:
2302          case OP_CRMINQUERY:        case OP_CRMINQUERY:
2303          c = *ecode++ - OP_CRSTAR;        c = *ecode++ - OP_CRSTAR;
2304          minimize = (c & 1) != 0;        minimize = (c & 1) != 0;
2305          min = rep_min[c];                 /* Pick up values from tables; */        min = rep_min[c];                 /* Pick up values from tables; */
2306          max = rep_max[c];                 /* zero for max => infinity */        max = rep_max[c];                 /* zero for max => infinity */
2307          if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2308          break;        break;
2309    
2310          case OP_CRRANGE:        case OP_CRRANGE:
2311          case OP_CRMINRANGE:        case OP_CRMINRANGE:
2312          minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2313          min = GET2(ecode, 1);        min = GET2(ecode, 1);
2314          max = GET2(ecode, 3);        max = GET2(ecode, 3);
2315          if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2316          ecode += 5;        ecode += 5;
2317          break;        break;
2318    
2319          default:               /* No repeat follows */        default:               /* No repeat follows */
2320          if (!match_ref(offset, eptr, length, md, ims))        if ((length = match_ref(offset, eptr, length, md, ims)) < 0)
2321            {          {
2322            CHECK_PARTIAL();          CHECK_PARTIAL();
2323            MRRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
           }  
         eptr += length;  
         continue;              /* With the main loop */  
2324          }          }
2325          eptr += length;
2326          continue;              /* With the main loop */
2327          }
2328    
2329        /* If the length of the reference is zero, just continue with the      /* Handle repeated back references. If the length of the reference is
2330        main loop. */      zero, just continue with the main loop. */
2331    
2332        if (length == 0) continue;      if (length == 0) continue;
2333    
2334        /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2335        the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
2336        address of eptr, so that eptr can be a register variable. */      address of eptr, so that eptr can be a register variable. */
2337    
2338        for (i = 1; i <= min; i++)      for (i = 1; i <= min; i++)
2339          {
2340          int slength;
2341          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2342          {          {
2343          if (!match_ref(offset, eptr, length, md, ims))          CHECK_PARTIAL();
2344            {          MRRETURN(MATCH_NOMATCH);
           CHECK_PARTIAL();  
           MRRETURN(MATCH_NOMATCH);  
           }  
         eptr += length;  
2345          }          }
2346          eptr += slength;
2347          }
2348    
2349        /* If min = max, continue at the same level without recursion.      /* If min = max, continue at the same level without recursion.
2350        They are not both allowed to be zero. */      They are not both allowed to be zero. */
2351    
2352        if (min == max) continue;      if (min == max) continue;
2353    
2354        /* If minimizing, keep trying and advancing the pointer */      /* If minimizing, keep trying and advancing the pointer */
2355    
2356        if (minimize)      if (minimize)
2357          {
2358          for (fi = min;; fi++)
2359          {          {
2360          for (fi = min;; fi++)          int slength;
2361            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2362            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2363            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2364            if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2365            {            {
2366            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);            CHECK_PARTIAL();
2367            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            MRRETURN(MATCH_NOMATCH);
           if (fi >= max) MRRETURN(MATCH_NOMATCH);  
           if (!match_ref(offset, eptr, length, md, ims))  
             {  
             CHECK_PARTIAL();  
             MRRETURN(MATCH_NOMATCH);  
             }  
           eptr += length;  
2368            }            }
2369          /* Control never gets here */          eptr += slength;
2370          }          }
2371          /* Control never gets here */
2372          }
2373    
2374        /* If maximizing, find the longest string and work backwards */      /* If maximizing, find the longest string and work backwards */
2375    
2376        else      else
2377          {
2378          pp = eptr;
2379          for (i = min; i < max; i++)
2380          {          {
2381          pp = eptr;          int slength;
2382          for (i = min; i < max; i++)          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2383            {            {
2384            if (!match_ref(offset, eptr, length, md, ims))            CHECK_PARTIAL();
2385              {            break;
             CHECK_PARTIAL();  
             break;  
             }  
           eptr += length;  
           }  
         while (eptr >= pp)  
           {  
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);  
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
           eptr -= length;  
2386            }            }
2387          MRRETURN(MATCH_NOMATCH);          eptr += slength;
2388            }
2389          while (eptr >= pp)
2390            {
2391            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2392            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2393            eptr -= length;
2394          }          }
2395          MRRETURN(MATCH_NOMATCH);
2396        }        }
2397      /* Control never gets here */      /* Control never gets here */
2398    
# Line 3524  for (;;) Line 3638  for (;;)
3638                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3639              }              }
3640            break;            break;
3641    
3642            case PT_ALNUM:            case PT_ALNUM:
3643            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3644              {              {
# Line 3534  for (;;) Line 3648  for (;;)
3648                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3649                }                }
3650              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3651              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3652              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((prop_category == ucp_L || prop_category == ucp_N)
3653                     == prop_fail_result)                     == prop_fail_result)
3654                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3655              }              }
3656            break;            break;
3657    
3658            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
3659            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3660              {              {
# Line 3550  for (;;) Line 3664  for (;;)
3664                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3665                }                }
3666              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3667              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3668              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3669                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
3670                     == prop_fail_result)                     == prop_fail_result)
3671                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3672              }              }
3673            break;            break;
3674    
3675            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
3676            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3677              {              {
# Line 3567  for (;;) Line 3681  for (;;)
3681                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3682                }                }
3683              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3684              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3685              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3686                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3687                     == prop_fail_result)                     == prop_fail_result)
3688                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3689              }              }
3690            break;            break;
3691    
3692            case PT_WORD:            case PT_WORD:
3693            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3694              {              {
3695              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 3584  for (;;) Line 3698  for (;;)
3698                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3699                }                }
3700              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3701              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3702              if ((prop_category == ucp_L || prop_category == ucp_N ||              if ((prop_category == ucp_L || prop_category == ucp_N ||
3703                   c == CHAR_UNDERSCORE)                   c == CHAR_UNDERSCORE)
3704                     == prop_fail_result)                     == prop_fail_result)
3705                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3706              }              }
3707            break;            break;
3708    
3709            /* This should not occur */            /* This should not occur */
3710    
3711            default:            default:
# Line 3678  for (;;) Line 3792  for (;;)
3792            switch(c)            switch(c)
3793              {              {
3794              default: MRRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3795    
3796              case 0x000d:              case 0x000d:
3797              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3798              break;              break;
# Line 3954  for (;;) Line 4069  for (;;)
4069            switch(*eptr++)            switch(*eptr++)
4070              {              {
4071              default: MRRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4072    
4073              case 0x000d:              case 0x000d:
4074              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4075              break;              break;
4076    
4077              case 0x000a:              case 0x000a:
4078              break;              break;
4079    
# Line 4154  for (;;) Line 4271  for (;;)
4271                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4272                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4273                }                }
4274              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4275              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4276              }              }
4277            /* Control never gets here */            /* Control never gets here */
# Line 4170  for (;;) Line 4287  for (;;)
4287                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4288                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4289                }                }
4290              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4291              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4292              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4293                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 4190  for (;;) Line 4307  for (;;)
4307                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4308                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4309                }                }
4310              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4311              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4312              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4313                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4208  for (;;) Line 4325  for (;;)
4325                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4326                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4327                }                }
4328              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4329              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4330              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4331                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4226  for (;;) Line 4343  for (;;)
4343                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4344                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4345                }                }
4346              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4347              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4348              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4349                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4236  for (;;) Line 4353  for (;;)
4353            case PT_ALNUM:            case PT_ALNUM:
4354            for (fi = min;; fi++)            for (fi = min;; fi++)
4355              {              {
4356              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
4357              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4358              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4359              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4244  for (;;) Line 4361  for (;;)
4361                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4362                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4363                }                }
4364              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4365              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4366              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((prop_category == ucp_L || prop_category == ucp_N)
4367                     == prop_fail_result)                     == prop_fail_result)
4368                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4369              }              }
4370            /* Control never gets here */            /* Control never gets here */
4371    
4372            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
4373            for (fi = min;; fi++)            for (fi = min;; fi++)
4374              {              {
4375              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
4376              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4377              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4378              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4263  for (;;) Line 4380  for (;;)
4380                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4381                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4382                }                }
4383              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4384              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4385              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4386                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
4387                     == prop_fail_result)                     == prop_fail_result)
4388                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4389              }              }
4390            /* Control never gets here */            /* Control never gets here */
4391    
4392            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
4393            for (fi = min;; fi++)            for (fi = min;; fi++)
4394              {              {
4395              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
4396              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4397              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4398              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4283  for (;;) Line 4400  for (;;)
4400                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4401                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4402                }                }
4403              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4404              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4405              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4406                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4407                     == prop_fail_result)                     == prop_fail_result)
4408                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4409              }              }
4410            /* Control never gets here */            /* Control never gets here */
4411    
4412            case PT_WORD:            case PT_WORD:
4413            for (fi = min;; fi++)            for (fi = min;; fi++)
4414              {              {
4415              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
4416              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4417              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4418              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4303  for (;;) Line 4420  for (;;)
4420                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4421                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4422                }                }
4423              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4424              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4425              if ((prop_category == ucp_L ||              if ((prop_category == ucp_L ||
4426                   prop_category == ucp_N ||                   prop_category == ucp_N ||
4427                   c == CHAR_UNDERSCORE)                   c == CHAR_UNDERSCORE)
4428                     == prop_fail_result)                     == prop_fail_result)
4429                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4430              }              }
4431            /* Control never gets here */            /* Control never gets here */
4432    
4433            /* This should never occur */            /* This should never occur */
4434    
4435            default:            default:
4436            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
4437            }            }
# Line 4661  for (;;) Line 4778  for (;;)
4778                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4779                break;                break;
4780                }                }
4781              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4782              if (prop_fail_result) break;              if (prop_fail_result) break;
4783              eptr+= len;              eptr+= len;
4784              }              }
# Line 4676  for (;;) Line 4793  for (;;)
4793                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4794                break;                break;
4795                }                }
4796              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4797              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4798              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4799                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 4695  for (;;) Line 4812  for (;;)
4812                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4813                break;                break;
4814                }                }
4815              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4816              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4817              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4818                break;                break;
# Line 4712  for (;;) Line 4829  for (;;)
4829                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4830                break;                break;
4831                }                }
4832              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4833              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4834              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4835                break;                break;
# Line 4729  for (;;) Line 4846  for (;;)
4846                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4847                break;                break;
4848                }                }
4849              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4850              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4851              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4852                break;                break;
4853              eptr+= len;              eptr+= len;
4854              }              }
4855            break;            break;
4856    
4857            case PT_ALNUM:            case PT_ALNUM:
4858            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4859              {              {
# Line 4746  for (;;) Line 4863  for (;;)
4863                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4864                break;                break;
4865                }                }
4866              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4867              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4868              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((prop_category == ucp_L || prop_category == ucp_N)
4869                   == prop_fail_result)                   == prop_fail_result)
4870                break;                break;
4871              eptr+= len;              eptr+= len;
# Line 4764  for (;;) Line 4881  for (;;)
4881                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4882                break;                break;
4883                }                }
4884              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4885              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4886              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4887                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
4888                   == prop_fail_result)                   == prop_fail_result)
4889                break;                break;
4890              eptr+= len;              eptr+= len;
# Line 4783  for (;;) Line 4900  for (;;)
4900                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4901                break;                break;
4902                }                }
4903              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4904              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4905              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4906                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4907                   == prop_fail_result)                   == prop_fail_result)
4908                break;                break;
4909              eptr+= len;              eptr+= len;
# Line 4802  for (;;) Line 4919  for (;;)
4919                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4920                break;                break;
4921                }                }
4922              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4923              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4924              if ((prop_category == ucp_L || prop_category == ucp_N ||              if ((prop_category == ucp_L || prop_category == ucp_N ||
4925                   c == CHAR_UNDERSCORE) == prop_fail_result)                   c == CHAR_UNDERSCORE) == prop_fail_result)
# Line 5145  for (;;) Line 5262  for (;;)
5262            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5263            }            }
5264    
5265          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run. If possessive, we are
5266            done (no backing up). Otherwise, match at this position; anything other
5267            than no match is immediately returned. For nomatch, back up one
5268            character, unless we are matching \R and the last thing matched was
5269            \r\n, in which case, back up two bytes. */
5270    
5271          if (possessive) continue;          if (possessive) continue;
5272          for(;;)          for(;;)
# Line 5154  for (;;) Line 5275  for (;;)
5275            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5276            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5277            BACKCHAR(eptr);            BACKCHAR(eptr);
5278              if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
5279                  eptr[-1] == '\r') eptr--;
5280            }            }
5281          }          }
5282        else        else
# Line 5352  for (;;) Line 5475  for (;;)
5475            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5476            }            }
5477    
5478          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run. If possessive, we are
5479            done (no backing up). Otherwise, match at this position; anything other
5480            than no match is immediately returned. For nomatch, back up one
5481            character (byte), unless we are matching \R and the last thing matched
5482            was \r\n, in which case, back up two bytes. */
5483    
5484          if (possessive) continue;          if (possessive) continue;
5485          while (eptr >= pp)          while (eptr >= pp)
5486            {            {
5487            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
           eptr--;  
5488            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5489              eptr--;
5490              if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
5491                  eptr[-1] == '\r') eptr--;
5492            }            }
5493          }          }
5494    
# Line 5404  switch (frame->Xwhere) Line 5533  switch (frame->Xwhere)
5533    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
5534  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5535    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5536      LBL(59) LBL(60) LBL(61) LBL(62)
5537  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
5538  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
5539    default:    default:
# Line 5537  if ((options & ~PUBLIC_EXEC_OPTIONS) != Line 5667  if ((options & ~PUBLIC_EXEC_OPTIONS) !=
5667  if (re == NULL || subject == NULL ||  if (re == NULL || subject == NULL ||
5668     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5669  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5670    if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5671    
5672  /* This information is for finding all the numbers associated with a given  /* This information is for finding all the numbers associated with a given
5673  name, for condition testing. */  name, for condition testing. */
# Line 5607  end_subject = md->end_subject; Line 5738  end_subject = md->end_subject;
5738    
5739  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
5740  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5741    md->use_ucp = (re->options & PCRE_UCP) != 0;
5742  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5743    
5744  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
# Line 5696  defined (though never set). So there's n Line 5828  defined (though never set). So there's n
5828  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5829    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
5830    
5831  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Pass back the character offset and error
5832  back the character offset. */  code if a results vector is available. */
5833    
5834  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5835  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5836    {    {
5837    if (_pcre_valid_utf8((USPTR)subject, length) >= 0)    int errorcode;
5838      return PCRE_ERROR_BADUTF8;    int tb = _pcre_valid_utf8((USPTR)subject, length, &errorcode);
5839    if (start_offset > 0 && start_offset < length)    if (tb >= 0)
5840      {      {
5841      int tb = ((USPTR)subject)[start_offset];      if (offsetcount >= 2)
     if (tb > 127)  
5842        {        {
5843        tb &= 0xc0;        offsets[0] = tb;
5844        if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;        offsets[1] = errorcode;
5845        }        }
5846        return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
5847          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5848        }
5849      if (start_offset > 0 && start_offset < length)
5850        {
5851        tb = ((USPTR)subject)[start_offset] & 0xc0;
5852        if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
5853      }      }
5854    }    }
5855  #endif  #endif
# Line 5839  for(;;) Line 5977  for(;;)
5977    /* There are some optimizations that avoid running the match if a known    /* There are some optimizations that avoid running the match if a known
5978    starting point is not found, or if a known later character is not present.    starting point is not found, or if a known later character is not present.
5979    However, there is an option that disables these, for testing and for ensuring    However, there is an option that disables these, for testing and for ensuring
5980    that all callouts do actually occur. */    that all callouts do actually occur. The option can be set in the regex by
5981      (*NO_START_OPT) or passed in match-time options. */
5982    
5983    if ((options & PCRE_NO_START_OPTIMIZE) == 0)    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
5984      {      {
5985      /* Advance to a unique first byte if there is one. */      /* Advance to a unique first byte if there is one. */
5986    
# Line 5895  for(;;) Line 6034  for(;;)
6034        while (start_match < end_subject)        while (start_match < end_subject)
6035          {          {
6036          register unsigned int c = *start_match;          register unsigned int c = *start_match;
6037          if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;          if ((start_bits[c/8] & (1 << (c&7))) == 0)
6038            else break;            {
6039              start_match++;
6040    #ifdef SUPPORT_UTF8
6041              if (utf8)
6042                while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
6043                  start_match++;
6044    #endif
6045              }
6046            else break;
6047          }          }
6048        }        }
6049      }   /* Starting optimizations */      }   /* Starting optimizations */
# Line 5997  for(;;) Line 6144  for(;;)
6144    
6145    switch(rc)    switch(rc)
6146      {      {
6147      /* NOMATCH and PRUNE advance by one character. If MATCH_SKIP_ARG reaches      /* SKIP passes back the next starting point explicitly, but if it is the
6148      this level it means that a MARK that matched the SKIP's arg was not found.      same as the match we have just done, treat it as NOMATCH. */
6149      We treat this as NOMATCH. THEN at this level acts exactly like PRUNE. */  
6150        case MATCH_SKIP:
6151        if (md->start_match_ptr != start_match)
6152          {
6153          new_start_match = md->start_match_ptr;
6154          break;
6155          }
6156        /* Fall through */
6157    
6158        /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6159        the SKIP's arg was not found. We also treat this as NOMATCH. */
6160    
6161        case MATCH_SKIP_ARG:
6162        /* Fall through */
6163    
6164        /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6165        exactly like PRUNE. */
6166    
6167      case MATCH_NOMATCH:      case MATCH_NOMATCH:
6168      case MATCH_PRUNE:      case MATCH_PRUNE:
     case MATCH_SKIP_ARG:  
6169      case MATCH_THEN:      case MATCH_THEN:
6170      new_start_match = start_match + 1;      new_start_match = start_match + 1;
6171  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 6013  for(;;) Line 6175  for(;;)
6175  #endif  #endif
6176      break;      break;
6177    
     /* SKIP passes back the next starting point explicitly. */  
   
     case MATCH_SKIP:  
     new_start_match = md->start_match_ptr;  
     break;  
   
6178      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6179    
6180      case MATCH_COMMIT:      case MATCH_COMMIT:
# Line 6115  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6271  if (rc == MATCH_MATCH || rc == MATCH_ACC
6271    
6272    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
6273      {      {
6274      offsets[0] = md->start_match_ptr - md->start_subject;      offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6275      offsets[1] = md->end_match_ptr - md->start_subject;      offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6276      }      }
6277    
6278    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
# Line 6148  if (start_partial != NULL) Line 6304  if (start_partial != NULL)
6304    md->mark = NULL;    md->mark = NULL;
6305    if (offsetcount > 1)    if (offsetcount > 1)
6306      {      {
6307      offsets[0] = start_partial - (USPTR)subject;      offsets[0] = (int)(start_partial - (USPTR)subject);
6308      offsets[1] = end_subject - (USPTR)subject;      offsets[1] = (int)(end_subject - (USPTR)subject);
6309      }      }
6310    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
6311    }    }

Legend:
Removed from v.517  
changed lines
  Added in v.600

  ViewVC Help
Powered by ViewVC 1.1.5