/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 518 by ph10, Tue May 18 15:47:01 2010 UTC revision 595 by ph10, Mon May 2 10:33:29 2011 UTC
# Line 132  while (length-- > 0) Line 132  while (length-- > 0)
132  *          Match a back-reference                *  *          Match a back-reference                *
133  *************************************************/  *************************************************/
134    
135  /* If a back reference hasn't been set, the length that is passed is greater  /* Normally, if a back reference hasn't been set, the length that is passed is
136  than the number of characters left in the string, so the match fails.  negative, so the match always fails. However, in JavaScript compatibility mode,
137    the length passed is zero. Note that in caseless UTF-8 mode, the number of
138    subject bytes matched may be different to the number of reference bytes.
139    
140  Arguments:  Arguments:
141    offset      index into the offset vector    offset      index into the offset vector
142    eptr        points into the subject    eptr        pointer into the subject
143    length      length to be matched    length      length of reference to be matched (number of bytes)
144    md          points to match data block    md          points to match data block
145    ims         the ims flags    ims         the ims flags
146    
147  Returns:      TRUE if matched  Returns:      < 0 if not matched, otherwise the number of subject bytes matched
148  */  */
149    
150  static BOOL  static int
151  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
152    unsigned long int ims)    unsigned long int ims)
153  {  {
154  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR eptr_start = eptr;
155    register USPTR p = md->start_subject + md->offset_vector[offset];
156    
157  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
158  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 164  pchars(p, length, FALSE, md); Line 167  pchars(p, length, FALSE, md);
167  printf("\n");  printf("\n");
168  #endif  #endif
169    
170  /* Always fail if not enough characters left */  /* Always fail if reference not set (and not JavaScript compatible). */
171    
172  if (length > md->end_subject - eptr) return FALSE;  if (length < 0) return -1;
173    
174  /* Separate the caseless case for speed. In UTF-8 mode we can only do this  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175  properly if Unicode properties are supported. Otherwise, we can check only  properly if Unicode properties are supported. Otherwise, we can check only
# Line 178  if ((ims & PCRE_CASELESS) != 0) Line 181  if ((ims & PCRE_CASELESS) != 0)
181  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
182    if (md->utf8)    if (md->utf8)
183      {      {
184      USPTR endptr = eptr + length;      /* Match characters up to the end of the reference. NOTE: the number of
185      while (eptr < endptr)      bytes matched may differ, because there are some characters whose upper and
186        lower case versions code as different numbers of bytes. For example, U+023A
187        (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188        a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189        the latter. It is important, therefore, to check the length along the
190        reference, not along the subject (earlier code did this wrong). */
191    
192        USPTR endptr = p + length;
193        while (p < endptr)
194        {        {
195        int c, d;        int c, d;
196        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
197        GETCHARINC(d, p);        GETCHARINC(d, p);
198        if (c != d && c != UCD_OTHERCASE(d)) return FALSE;        if (c != d && c != UCD_OTHERCASE(d)) return -1;
199        }        }
200      }      }
201    else    else
# Line 195  if ((ims & PCRE_CASELESS) != 0) Line 206  if ((ims & PCRE_CASELESS) != 0)
206    is no UCP support. */    is no UCP support. */
207    
208    while (length-- > 0)    while (length-- > 0)
209      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }      { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
210    }    }
211    
212  /* In the caseful case, we can just compare the bytes, whether or not we  /* In the caseful case, we can just compare the bytes, whether or not we
213  are in UTF-8 mode. */  are in UTF-8 mode. */
214    
215  else  else
216    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return -1; }
217    
218  return TRUE;  return eptr - eptr_start;
219  }  }
220    
221    
# Line 255  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 266  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
266         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
267         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
268         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
269         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58 };         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
270           RM61,  RM62 };
271    
272  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
273  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 293  argument of match(), which never changes Line 305  argument of match(), which never changes
305    
306  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
307    {\    {\
308    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
309      if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
310    frame->Xwhere = rw; \    frame->Xwhere = rw; \
311    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
312    newframe->Xecode = rb;\    newframe->Xecode = rb;\
# Line 314  argument of match(), which never changes Line 327  argument of match(), which never changes
327    
328  #define RRETURN(ra)\  #define RRETURN(ra)\
329    {\    {\
330    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
331    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
332    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
333    if (frame != NULL)\    if (frame != NULL)\
334      {\      {\
335      rrc = ra;\      rrc = ra;\
# Line 420  immediately. The second one is used when Line 433  immediately. The second one is used when
433  the subject. */  the subject. */
434    
435  #define CHECK_PARTIAL()\  #define CHECK_PARTIAL()\
436    if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\    if (md->partial != 0 && eptr >= md->end_subject && \
437      {\        eptr > md->start_used_ptr) \
438      md->hitend = TRUE;\      { \
439      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\      md->hitend = TRUE; \
440        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
441      }      }
442    
443  #define SCHECK_PARTIAL()\  #define SCHECK_PARTIAL()\
444    if (md->partial != 0 && eptr > mstart)\    if (md->partial != 0 && eptr > md->start_used_ptr) \
445      {\      { \
446      md->hitend = TRUE;\      md->hitend = TRUE; \
447      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
448      }      }
449    
450    
# Line 486  heap storage. Set up the top-level frame Line 500  heap storage. Set up the top-level frame
500  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  heap whenever RMATCH() does a "recursion". See the macro definitions above. */
501    
502  #ifdef NO_RECURSE  #ifdef NO_RECURSE
503  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
504    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
505  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
506    
507  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 707  for (;;) Line 722  for (;;)
722      case OP_FAIL:      case OP_FAIL:
723      MRRETURN(MATCH_NOMATCH);      MRRETURN(MATCH_NOMATCH);
724    
725        /* COMMIT overrides PRUNE, SKIP, and THEN */
726    
727      case OP_COMMIT:      case OP_COMMIT:
728      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
729        ims, eptrb, flags, RM52);        ims, eptrb, flags, RM52);
730      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
731            rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
732            rrc != MATCH_THEN)
733          RRETURN(rrc);
734      MRRETURN(MATCH_COMMIT);      MRRETURN(MATCH_COMMIT);
735    
736        /* PRUNE overrides THEN */
737    
738      case OP_PRUNE:      case OP_PRUNE:
739      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
740        ims, eptrb, flags, RM51);        ims, eptrb, flags, RM51);
741      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
742      MRRETURN(MATCH_PRUNE);      MRRETURN(MATCH_PRUNE);
743    
744      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
745      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
746        ims, eptrb, flags, RM56);        ims, eptrb, flags, RM56);
747      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
748      md->mark = ecode + 2;      md->mark = ecode + 2;
749      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
750    
751        /* SKIP overrides PRUNE and THEN */
752    
753      case OP_SKIP:      case OP_SKIP:
754      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
755        ims, eptrb, flags, RM53);        ims, eptrb, flags, RM53);
756      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
757          RRETURN(rrc);
758      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
759      MRRETURN(MATCH_SKIP);      MRRETURN(MATCH_SKIP);
760    
761      case OP_SKIP_ARG:      case OP_SKIP_ARG:
762      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
763        ims, eptrb, flags, RM57);        ims, eptrb, flags, RM57);
764      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
765          RRETURN(rrc);
766    
767      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
768      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
# Line 746  for (;;) Line 772  for (;;)
772      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
773      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
774    
775        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
776        the alt that is at the start of the current branch. This makes it possible
777        to skip back past alternatives that precede the THEN within the current
778        branch. */
779    
780      case OP_THEN:      case OP_THEN:
781      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
782        ims, eptrb, flags, RM54);        ims, eptrb, flags, RM54);
783      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
784        md->start_match_ptr = ecode - GET(ecode, 1);
785      MRRETURN(MATCH_THEN);      MRRETURN(MATCH_THEN);
786    
787      case OP_THEN_ARG:      case OP_THEN_ARG:
788      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
789        ims, eptrb, flags, RM58);        offset_top, md, ims, eptrb, flags, RM58);
790      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
791      md->mark = ecode + 2;      md->start_match_ptr = ecode - GET(ecode, 1);
792        md->mark = ecode + LINK_SIZE + 2;
793      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
794    
795      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
# Line 793  for (;;) Line 826  for (;;)
826        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
827    
828        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
829        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
830            (int)(eptr - md->start_subject);
831    
832        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
833        do        do
834          {          {
835          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
836            ims, eptrb, flags, RM1);            ims, eptrb, flags, RM1);
837          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
838                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
839              RRETURN(rrc);
840          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
841          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
842          }          }
# Line 861  for (;;) Line 897  for (;;)
897    
898        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
899          eptrb, flags, RM2);          eptrb, flags, RM2);
900        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
901              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
902            RRETURN(rrc);
903        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
904        }        }
905      /* Control never reaches here. */      /* Control never reaches here. */
# Line 888  for (;;) Line 926  for (;;)
926          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
927          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
928          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
929          cb.subject_length   = md->end_subject - md->start_subject;          cb.subject_length   = (int)(md->end_subject - md->start_subject);
930          cb.start_match      = mstart - md->start_subject;          cb.start_match      = (int)(mstart - md->start_subject);
931          cb.current_position = eptr - md->start_subject;          cb.current_position = (int)(eptr - md->start_subject);
932          cb.pattern_position = GET(ecode, LINK_SIZE + 3);          cb.pattern_position = GET(ecode, LINK_SIZE + 3);
933          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
934          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
# Line 1062  for (;;) Line 1100  for (;;)
1100          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1101          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1102          }          }
1103        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)        else if (rrc != MATCH_NOMATCH &&
1104                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1105          {          {
1106          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1107          }          }
# Line 1116  for (;;) Line 1155  for (;;)
1155        {        {
1156        md->offset_vector[offset] =        md->offset_vector[offset] =
1157          md->offset_vector[md->offset_end - number];          md->offset_vector[md->offset_end - number];
1158        md->offset_vector[offset+1] = eptr - md->start_subject;        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1159        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1160        }        }
1161      ecode += 3;      ecode += 3;
# Line 1190  for (;;) Line 1229  for (;;)
1229          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1230          break;          break;
1231          }          }
1232        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1233              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1234            RRETURN(rrc);
1235        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1236        }        }
1237      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1224  for (;;) Line 1265  for (;;)
1265          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1266          break;          break;
1267          }          }
1268        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1269              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1270            RRETURN(rrc);
1271        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1272        }        }
1273      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1279  for (;;) Line 1322  for (;;)
1322        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1323        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1324        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1325        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1326        cb.start_match      = mstart - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1327        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1328        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1329        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1330        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
# Line 1361  for (;;) Line 1404  for (;;)
1404              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1405            MRRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1406            }            }
1407          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH &&
1408                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1409            {            {
1410            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1411            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1404  for (;;) Line 1448  for (;;)
1448          mstart = md->start_match_ptr;          mstart = md->start_match_ptr;
1449          break;          break;
1450          }          }
1451        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1452              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1453            RRETURN(rrc);
1454        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1455        }        }
1456      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1558  for (;;) Line 1604  for (;;)
1604          {          {
1605          md->offset_vector[offset] =          md->offset_vector[offset] =
1606            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1607          md->offset_vector[offset+1] = eptr - md->start_subject;          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1608          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1609          }          }
1610    
# Line 1670  for (;;) Line 1716  for (;;)
1716        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1717          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1718        else        else
1719          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }          {
1720            if (md->noteol) MRRETURN(MATCH_NOMATCH);
1721            SCHECK_PARTIAL();
1722            }
1723        ecode++;        ecode++;
1724        break;        break;
1725        }        }
1726      else      else  /* Not multiline */
1727        {        {
1728        if (md->noteol) MRRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1729        if (!md->endonly)        if (!md->endonly) goto ASSERT_NL_OR_EOS;
         {  
         if (eptr != md->end_subject &&  
             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))  
           MRRETURN(MATCH_NOMATCH);  
         ecode++;  
         break;  
         }  
1730        }        }
1731    
1732      /* ... else fall through for endonly */      /* ... else fall through for endonly */
1733    
1734      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1735    
1736      case OP_EOD:      case OP_EOD:
1737      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1738        SCHECK_PARTIAL();
1739      ecode++;      ecode++;
1740      break;      break;
1741    
1742      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1743    
1744      case OP_EODN:      case OP_EODN:
1745      if (eptr != md->end_subject &&      ASSERT_NL_OR_EOS:
1746        if (eptr < md->end_subject &&
1747          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1748        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1749    
1750        /* Either at end of string or \n before end. */
1751    
1752        SCHECK_PARTIAL();
1753      ecode++;      ecode++;
1754      break;      break;
1755    
# Line 1719  for (;;) Line 1768  for (;;)
1768        if (utf8)        if (utf8)
1769          {          {
1770          /* Get status of previous character */          /* Get status of previous character */
1771    
1772          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1773            {            {
1774            USPTR lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1775            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1776            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1777            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1778  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1779            if (md->use_ucp)            if (md->use_ucp)
1780              {              {
1781              if (c == '_') prev_is_word = TRUE; else              if (c == '_') prev_is_word = TRUE; else
1782                {                {
1783                int cat = UCD_CATEGORY(c);                int cat = UCD_CATEGORY(c);
1784                prev_is_word = (cat == ucp_L || cat == ucp_N);                prev_is_word = (cat == ucp_L || cat == ucp_N);
1785                }                }
1786              }              }
1787            else            else
1788  #endif  #endif
1789            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1790            }            }
1791    
1792          /* Get status of next character */          /* Get status of next character */
1793    
1794          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1795            {            {
1796            SCHECK_PARTIAL();            SCHECK_PARTIAL();
# Line 1750  for (;;) Line 1799  for (;;)
1799          else          else
1800            {            {
1801            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1802  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1803            if (md->use_ucp)            if (md->use_ucp)
1804              {              {
1805              if (c == '_') cur_is_word = TRUE; else              if (c == '_') cur_is_word = TRUE; else
1806                {                {
1807                int cat = UCD_CATEGORY(c);                int cat = UCD_CATEGORY(c);
1808                cur_is_word = (cat == ucp_L || cat == ucp_N);                cur_is_word = (cat == ucp_L || cat == ucp_N);
1809                }                }
1810              }              }
1811            else            else
1812  #endif  #endif
1813            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1814            }            }
1815          }          }
1816        else        else
1817  #endif  #endif
1818    
1819        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1820        consistency with the behaviour of \w we do use it in this case. */        consistency with the behaviour of \w we do use it in this case. */
1821    
1822          {          {
1823          /* Get status of previous character */          /* Get status of previous character */
1824    
1825          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1826            {            {
1827            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1828  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1829            if (md->use_ucp)            if (md->use_ucp)
1830              {              {
1831              c = eptr[-1];              c = eptr[-1];
1832              if (c == '_') prev_is_word = TRUE; else              if (c == '_') prev_is_word = TRUE; else
1833                {                {
1834                int cat = UCD_CATEGORY(c);                int cat = UCD_CATEGORY(c);
1835                prev_is_word = (cat == ucp_L || cat == ucp_N);                prev_is_word = (cat == ucp_L || cat == ucp_N);
1836                }                }
1837              }              }
1838            else            else
1839  #endif  #endif
1840            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1841            }            }
1842    
1843          /* Get status of next character */          /* Get status of next character */
1844    
1845          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1846            {            {
1847            SCHECK_PARTIAL();            SCHECK_PARTIAL();
1848            cur_is_word = FALSE;            cur_is_word = FALSE;
1849            }            }
1850          else          else
1851  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1852          if (md->use_ucp)          if (md->use_ucp)
1853            {            {
1854            c = *eptr;            c = *eptr;
1855            if (c == '_') cur_is_word = TRUE; else            if (c == '_') cur_is_word = TRUE; else
1856              {              {
1857              int cat = UCD_CATEGORY(c);              int cat = UCD_CATEGORY(c);
1858              cur_is_word = (cat == ucp_L || cat == ucp_N);              cur_is_word = (cat == ucp_L || cat == ucp_N);
1859              }              }
1860            }            }
1861          else          else
1862  #endif  #endif
1863          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1864          }          }
1865    
# Line 2134  for (;;) Line 2183  for (;;)
2183          if ((ecode[2] != prop->script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2184            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2185          break;          break;
2186    
2187          /* These are specials */          /* These are specials */
2188    
2189          case PT_ALNUM:          case PT_ALNUM:
2190          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2191               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2192            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2193          break;          break;
2194    
2195          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2196          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2197               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2198                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2199            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2200          break;          break;
2201    
2202          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2203          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2204               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2205               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2206                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2207            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2208          break;          break;
2209    
2210          case PT_WORD:          case PT_WORD:
2211          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2212               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2213               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2214            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2215          break;          break;
2216    
2217          /* This should never occur */          /* This should never occur */
2218    
2219          default:          default:
# Line 2214  for (;;) Line 2263  for (;;)
2263      loops). */      loops). */
2264    
2265      case OP_REF:      case OP_REF:
2266        {      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2267        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      ecode += 3;
       ecode += 3;  
2268    
2269        /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2270    
2271        (a) In the default, Perl-compatible state, set the length to be longer      (a) In the default, Perl-compatible state, set the length negative;
2272        than the amount of subject left; this ensures that every attempt at a      this ensures that every attempt at a match fails. We can't just fail
2273        match fails. We can't just fail here, because of the possibility of      here, because of the possibility of quantifiers with zero minima.
       quantifiers with zero minima.  
2274    
2275        (b) If the JavaScript compatibility flag is set, set the length to zero      (b) If the JavaScript compatibility flag is set, set the length to zero
2276        so that the back reference matches an empty string.      so that the back reference matches an empty string.
2277    
2278        Otherwise, set the length to the length of what was matched by the      Otherwise, set the length to the length of what was matched by the
2279        referenced subpattern. */      referenced subpattern. */
2280    
2281        if (offset >= offset_top || md->offset_vector[offset] < 0)      if (offset >= offset_top || md->offset_vector[offset] < 0)
2282          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;        length = (md->jscript_compat)? 0 : -1;
2283        else      else
2284          length = md->offset_vector[offset+1] - md->offset_vector[offset];        length = md->offset_vector[offset+1] - md->offset_vector[offset];
2285    
2286        /* Set up for repetition, or handle the non-repeated case */      /* Set up for repetition, or handle the non-repeated case */
2287    
2288        switch (*ecode)      switch (*ecode)
2289          {        {
2290          case OP_CRSTAR:        case OP_CRSTAR:
2291          case OP_CRMINSTAR:        case OP_CRMINSTAR:
2292          case OP_CRPLUS:        case OP_CRPLUS:
2293          case OP_CRMINPLUS:        case OP_CRMINPLUS:
2294          case OP_CRQUERY:        case OP_CRQUERY:
2295          case OP_CRMINQUERY:        case OP_CRMINQUERY:
2296          c = *ecode++ - OP_CRSTAR;        c = *ecode++ - OP_CRSTAR;
2297          minimize = (c & 1) != 0;        minimize = (c & 1) != 0;
2298          min = rep_min[c];                 /* Pick up values from tables; */        min = rep_min[c];                 /* Pick up values from tables; */
2299          max = rep_max[c];                 /* zero for max => infinity */        max = rep_max[c];                 /* zero for max => infinity */
2300          if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2301          break;        break;
2302    
2303          case OP_CRRANGE:        case OP_CRRANGE:
2304          case OP_CRMINRANGE:        case OP_CRMINRANGE:
2305          minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2306          min = GET2(ecode, 1);        min = GET2(ecode, 1);
2307          max = GET2(ecode, 3);        max = GET2(ecode, 3);
2308          if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2309          ecode += 5;        ecode += 5;
2310          break;        break;
2311    
2312          default:               /* No repeat follows */        default:               /* No repeat follows */
2313          if (!match_ref(offset, eptr, length, md, ims))        if ((length = match_ref(offset, eptr, length, md, ims)) < 0)
2314            {          {
2315            CHECK_PARTIAL();          CHECK_PARTIAL();
2316            MRRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
           }  
         eptr += length;  
         continue;              /* With the main loop */  
2317          }          }
2318          eptr += length;
2319          continue;              /* With the main loop */
2320          }
2321    
2322        /* If the length of the reference is zero, just continue with the      /* Handle repeated back references. If the length of the reference is
2323        main loop. */      zero, just continue with the main loop. */
2324    
2325        if (length == 0) continue;      if (length == 0) continue;
2326    
2327        /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2328        the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
2329        address of eptr, so that eptr can be a register variable. */      address of eptr, so that eptr can be a register variable. */
2330    
2331        for (i = 1; i <= min; i++)      for (i = 1; i <= min; i++)
2332          {
2333          int slength;
2334          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2335          {          {
2336          if (!match_ref(offset, eptr, length, md, ims))          CHECK_PARTIAL();
2337            {          MRRETURN(MATCH_NOMATCH);
           CHECK_PARTIAL();  
           MRRETURN(MATCH_NOMATCH);  
           }  
         eptr += length;  
2338          }          }
2339          eptr += slength;
2340          }
2341    
2342        /* If min = max, continue at the same level without recursion.      /* If min = max, continue at the same level without recursion.
2343        They are not both allowed to be zero. */      They are not both allowed to be zero. */
2344    
2345        if (min == max) continue;      if (min == max) continue;
2346    
2347        /* If minimizing, keep trying and advancing the pointer */      /* If minimizing, keep trying and advancing the pointer */
2348    
2349        if (minimize)      if (minimize)
2350          {
2351          for (fi = min;; fi++)
2352          {          {
2353          for (fi = min;; fi++)          int slength;
2354            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2355            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2356            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2357            if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2358            {            {
2359            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);            CHECK_PARTIAL();
2360            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            MRRETURN(MATCH_NOMATCH);
           if (fi >= max) MRRETURN(MATCH_NOMATCH);  
           if (!match_ref(offset, eptr, length, md, ims))  
             {  
             CHECK_PARTIAL();  
             MRRETURN(MATCH_NOMATCH);  
             }  
           eptr += length;  
2361            }            }
2362          /* Control never gets here */          eptr += slength;
2363          }          }
2364          /* Control never gets here */
2365          }
2366    
2367        /* If maximizing, find the longest string and work backwards */      /* If maximizing, find the longest string and work backwards */
2368    
2369        else      else
2370          {
2371          pp = eptr;
2372          for (i = min; i < max; i++)
2373          {          {
2374          pp = eptr;          int slength;
2375          for (i = min; i < max; i++)          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2376            {            {
2377            if (!match_ref(offset, eptr, length, md, ims))            CHECK_PARTIAL();
2378              {            break;
             CHECK_PARTIAL();  
             break;  
             }  
           eptr += length;  
           }  
         while (eptr >= pp)  
           {  
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);  
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
           eptr -= length;  
2379            }            }
2380          MRRETURN(MATCH_NOMATCH);          eptr += slength;
2381            }
2382          while (eptr >= pp)
2383            {
2384            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2385            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2386            eptr -= length;
2387          }          }
2388          MRRETURN(MATCH_NOMATCH);
2389        }        }
2390      /* Control never gets here */      /* Control never gets here */
2391    
# Line 3582  for (;;) Line 3631  for (;;)
3631                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3632              }              }
3633            break;            break;
3634    
3635            case PT_ALNUM:            case PT_ALNUM:
3636            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3637              {              {
# Line 3592  for (;;) Line 3641  for (;;)
3641                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3642                }                }
3643              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3644              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3645              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((prop_category == ucp_L || prop_category == ucp_N)
3646                     == prop_fail_result)                     == prop_fail_result)
3647                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3648              }              }
3649            break;            break;
3650    
3651            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
3652            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3653              {              {
# Line 3608  for (;;) Line 3657  for (;;)
3657                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3658                }                }
3659              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3660              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3661              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3662                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
3663                     == prop_fail_result)                     == prop_fail_result)
3664                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3665              }              }
3666            break;            break;
3667    
3668            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
3669            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3670              {              {
# Line 3625  for (;;) Line 3674  for (;;)
3674                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3675                }                }
3676              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3677              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3678              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3679                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3680                     == prop_fail_result)                     == prop_fail_result)
3681                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3682              }              }
3683            break;            break;
3684    
3685            case PT_WORD:            case PT_WORD:
3686            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3687              {              {
3688              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 3642  for (;;) Line 3691  for (;;)
3691                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3692                }                }
3693              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3694              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3695              if ((prop_category == ucp_L || prop_category == ucp_N ||              if ((prop_category == ucp_L || prop_category == ucp_N ||
3696                   c == CHAR_UNDERSCORE)                   c == CHAR_UNDERSCORE)
3697                     == prop_fail_result)                     == prop_fail_result)
3698                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3699              }              }
3700            break;            break;
3701    
3702            /* This should not occur */            /* This should not occur */
3703    
3704            default:            default:
# Line 4212  for (;;) Line 4261  for (;;)
4261                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4262                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4263                }                }
4264              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4265              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4266              }              }
4267            /* Control never gets here */            /* Control never gets here */
# Line 4228  for (;;) Line 4277  for (;;)
4277                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4278                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4279                }                }
4280              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4281              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4282              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4283                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 4248  for (;;) Line 4297  for (;;)
4297                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4298                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4299                }                }
4300              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4301              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4302              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4303                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4266  for (;;) Line 4315  for (;;)
4315                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4316                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4317                }                }
4318              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4319              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4320              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4321                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4284  for (;;) Line 4333  for (;;)
4333                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4334                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4335                }                }
4336              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4337              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4338              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4339                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4294  for (;;) Line 4343  for (;;)
4343            case PT_ALNUM:            case PT_ALNUM:
4344            for (fi = min;; fi++)            for (fi = min;; fi++)
4345              {              {
4346              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
4347              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4348              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4349              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4302  for (;;) Line 4351  for (;;)
4351                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4352                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4353                }                }
4354              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4355              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4356              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((prop_category == ucp_L || prop_category == ucp_N)
4357                     == prop_fail_result)                     == prop_fail_result)
4358                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4359              }              }
4360            /* Control never gets here */            /* Control never gets here */
4361    
4362            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
4363            for (fi = min;; fi++)            for (fi = min;; fi++)
4364              {              {
4365              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
4366              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4367              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4368              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4321  for (;;) Line 4370  for (;;)
4370                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4371                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4372                }                }
4373              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4374              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4375              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4376                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
4377                     == prop_fail_result)                     == prop_fail_result)
4378                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4379              }              }
4380            /* Control never gets here */            /* Control never gets here */
4381    
4382            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
4383            for (fi = min;; fi++)            for (fi = min;; fi++)
4384              {              {
4385              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
4386              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4387              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4388              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4341  for (;;) Line 4390  for (;;)
4390                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4391                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4392                }                }
4393              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4394              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4395              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4396                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4397                     == prop_fail_result)                     == prop_fail_result)
4398                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4399              }              }
4400            /* Control never gets here */            /* Control never gets here */
4401    
4402            case PT_WORD:            case PT_WORD:
4403            for (fi = min;; fi++)            for (fi = min;; fi++)
4404              {              {
4405              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
4406              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4407              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4408              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4361  for (;;) Line 4410  for (;;)
4410                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4411                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4412                }                }
4413              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4414              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4415              if ((prop_category == ucp_L ||              if ((prop_category == ucp_L ||
4416                   prop_category == ucp_N ||                   prop_category == ucp_N ||
4417                   c == CHAR_UNDERSCORE)                   c == CHAR_UNDERSCORE)
4418                     == prop_fail_result)                     == prop_fail_result)
4419                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4420              }              }
4421            /* Control never gets here */            /* Control never gets here */
4422    
4423            /* This should never occur */            /* This should never occur */
4424    
4425            default:            default:
4426            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
4427            }            }
# Line 4719  for (;;) Line 4768  for (;;)
4768                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4769                break;                break;
4770                }                }
4771              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4772              if (prop_fail_result) break;              if (prop_fail_result) break;
4773              eptr+= len;              eptr+= len;
4774              }              }
# Line 4734  for (;;) Line 4783  for (;;)
4783                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4784                break;                break;
4785                }                }
4786              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4787              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4788              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4789                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 4753  for (;;) Line 4802  for (;;)
4802                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4803                break;                break;
4804                }                }
4805              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4806              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4807              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4808                break;                break;
# Line 4770  for (;;) Line 4819  for (;;)
4819                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4820                break;                break;
4821                }                }
4822              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4823              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4824              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4825                break;                break;
# Line 4787  for (;;) Line 4836  for (;;)
4836                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4837                break;                break;
4838                }                }
4839              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4840              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4841              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4842                break;                break;
4843              eptr+= len;              eptr+= len;
4844              }              }
4845            break;            break;
4846    
4847            case PT_ALNUM:            case PT_ALNUM:
4848            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4849              {              {
# Line 4804  for (;;) Line 4853  for (;;)
4853                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4854                break;                break;
4855                }                }
4856              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4857              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4858              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((prop_category == ucp_L || prop_category == ucp_N)
4859                   == prop_fail_result)                   == prop_fail_result)
4860                break;                break;
4861              eptr+= len;              eptr+= len;
# Line 4822  for (;;) Line 4871  for (;;)
4871                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4872                break;                break;
4873                }                }
4874              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4875              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4876              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4877                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
4878                   == prop_fail_result)                   == prop_fail_result)
4879                break;                break;
4880              eptr+= len;              eptr+= len;
# Line 4841  for (;;) Line 4890  for (;;)
4890                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4891                break;                break;
4892                }                }
4893              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4894              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4895              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4896                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4897                   == prop_fail_result)                   == prop_fail_result)
4898                break;                break;
4899              eptr+= len;              eptr+= len;
# Line 4860  for (;;) Line 4909  for (;;)
4909                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4910                break;                break;
4911                }                }
4912              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4913              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4914              if ((prop_category == ucp_L || prop_category == ucp_N ||              if ((prop_category == ucp_L || prop_category == ucp_N ||
4915                   c == CHAR_UNDERSCORE) == prop_fail_result)                   c == CHAR_UNDERSCORE) == prop_fail_result)
# Line 5462  switch (frame->Xwhere) Line 5511  switch (frame->Xwhere)
5511    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
5512  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5513    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5514      LBL(59) LBL(60) LBL(61) LBL(62)
5515  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
5516  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
5517    default:    default:
# Line 5595  if ((options & ~PUBLIC_EXEC_OPTIONS) != Line 5645  if ((options & ~PUBLIC_EXEC_OPTIONS) !=
5645  if (re == NULL || subject == NULL ||  if (re == NULL || subject == NULL ||
5646     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5647  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5648    if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5649    
5650  /* This information is for finding all the numbers associated with a given  /* This information is for finding all the numbers associated with a given
5651  name, for condition testing. */  name, for condition testing. */
# Line 5761  back the character offset. */ Line 5812  back the character offset. */
5812  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5813  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5814    {    {
5815    if (_pcre_valid_utf8((USPTR)subject, length) >= 0)    int tb;
5816      return PCRE_ERROR_BADUTF8;    if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0)
5817        return (tb == length && md->partial > 1)?
5818          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5819    if (start_offset > 0 && start_offset < length)    if (start_offset > 0 && start_offset < length)
5820      {      {
5821      int tb = ((USPTR)subject)[start_offset];      tb = ((USPTR)subject)[start_offset] & 0xc0;
5822      if (tb > 127)      if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
       {  
       tb &= 0xc0;  
       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;  
       }  
5823      }      }
5824    }    }
5825  #endif  #endif
# Line 5898  for(;;) Line 5947  for(;;)
5947    /* There are some optimizations that avoid running the match if a known    /* There are some optimizations that avoid running the match if a known
5948    starting point is not found, or if a known later character is not present.    starting point is not found, or if a known later character is not present.
5949    However, there is an option that disables these, for testing and for ensuring    However, there is an option that disables these, for testing and for ensuring
5950    that all callouts do actually occur. */    that all callouts do actually occur. The option can be set in the regex by
5951      (*NO_START_OPT) or passed in match-time options. */
5952    
5953    if ((options & PCRE_NO_START_OPTIMIZE) == 0)    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
5954      {      {
5955      /* Advance to a unique first byte if there is one. */      /* Advance to a unique first byte if there is one. */
5956    
# Line 5954  for(;;) Line 6004  for(;;)
6004        while (start_match < end_subject)        while (start_match < end_subject)
6005          {          {
6006          register unsigned int c = *start_match;          register unsigned int c = *start_match;
6007          if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;          if ((start_bits[c/8] & (1 << (c&7))) == 0)
6008            else break;            {
6009              start_match++;
6010    #ifdef SUPPORT_UTF8
6011              if (utf8)
6012                while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
6013                  start_match++;
6014    #endif
6015              }
6016            else break;
6017          }          }
6018        }        }
6019      }   /* Starting optimizations */      }   /* Starting optimizations */
# Line 6056  for(;;) Line 6114  for(;;)
6114    
6115    switch(rc)    switch(rc)
6116      {      {
6117      /* NOMATCH and PRUNE advance by one character. If MATCH_SKIP_ARG reaches      /* SKIP passes back the next starting point explicitly, but if it is the
6118      this level it means that a MARK that matched the SKIP's arg was not found.      same as the match we have just done, treat it as NOMATCH. */
6119      We treat this as NOMATCH. THEN at this level acts exactly like PRUNE. */  
6120        case MATCH_SKIP:
6121        if (md->start_match_ptr != start_match)
6122          {
6123          new_start_match = md->start_match_ptr;
6124          break;
6125          }
6126        /* Fall through */
6127    
6128        /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6129        the SKIP's arg was not found. We also treat this as NOMATCH. */
6130    
6131        case MATCH_SKIP_ARG:
6132        /* Fall through */
6133    
6134        /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6135        exactly like PRUNE. */
6136    
6137      case MATCH_NOMATCH:      case MATCH_NOMATCH:
6138      case MATCH_PRUNE:      case MATCH_PRUNE:
     case MATCH_SKIP_ARG:  
6139      case MATCH_THEN:      case MATCH_THEN:
6140      new_start_match = start_match + 1;      new_start_match = start_match + 1;
6141  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 6072  for(;;) Line 6145  for(;;)
6145  #endif  #endif
6146      break;      break;
6147    
     /* SKIP passes back the next starting point explicitly. */  
   
     case MATCH_SKIP:  
     new_start_match = md->start_match_ptr;  
     break;  
   
6148      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6149    
6150      case MATCH_COMMIT:      case MATCH_COMMIT:
# Line 6174  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6241  if (rc == MATCH_MATCH || rc == MATCH_ACC
6241    
6242    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
6243      {      {
6244      offsets[0] = md->start_match_ptr - md->start_subject;      offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6245      offsets[1] = md->end_match_ptr - md->start_subject;      offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6246      }      }
6247    
6248    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
# Line 6207  if (start_partial != NULL) Line 6274  if (start_partial != NULL)
6274    md->mark = NULL;    md->mark = NULL;
6275    if (offsetcount > 1)    if (offsetcount > 1)
6276      {      {
6277      offsets[0] = start_partial - (USPTR)subject;      offsets[0] = (int)(start_partial - (USPTR)subject);
6278      offsets[1] = end_subject - (USPTR)subject;      offsets[1] = (int)(end_subject - (USPTR)subject);
6279      }      }
6280    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
6281    }    }

Legend:
Removed from v.518  
changed lines
  Added in v.595

  ViewVC Help
Powered by ViewVC 1.1.5