/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 518 by ph10, Tue May 18 15:47:01 2010 UTC revision 567 by ph10, Sat Nov 6 17:10:00 2010 UTC
# Line 255  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 255  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
255         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58 };         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259           RM61,  RM62 };
260    
261  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
262  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 293  argument of match(), which never changes Line 294  argument of match(), which never changes
294    
295  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296    {\    {\
297    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
298      if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299    frame->Xwhere = rw; \    frame->Xwhere = rw; \
300    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
301    newframe->Xecode = rb;\    newframe->Xecode = rb;\
# Line 314  argument of match(), which never changes Line 316  argument of match(), which never changes
316    
317  #define RRETURN(ra)\  #define RRETURN(ra)\
318    {\    {\
319    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
320    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
321    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
322    if (frame != NULL)\    if (frame != NULL)\
323      {\      {\
324      rrc = ra;\      rrc = ra;\
# Line 420  immediately. The second one is used when Line 422  immediately. The second one is used when
422  the subject. */  the subject. */
423    
424  #define CHECK_PARTIAL()\  #define CHECK_PARTIAL()\
425    if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\    if (md->partial != 0 && eptr >= md->end_subject && \
426      {\        eptr > md->start_used_ptr) \
427      md->hitend = TRUE;\      { \
428      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\      md->hitend = TRUE; \
429        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
430      }      }
431    
432  #define SCHECK_PARTIAL()\  #define SCHECK_PARTIAL()\
433    if (md->partial != 0 && eptr > mstart)\    if (md->partial != 0 && eptr > md->start_used_ptr) \
434      {\      { \
435      md->hitend = TRUE;\      md->hitend = TRUE; \
436      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
437      }      }
438    
439    
# Line 486  heap storage. Set up the top-level frame Line 489  heap storage. Set up the top-level frame
489  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  heap whenever RMATCH() does a "recursion". See the macro definitions above. */
490    
491  #ifdef NO_RECURSE  #ifdef NO_RECURSE
492  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
493    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
494  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
495    
496  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 707  for (;;) Line 711  for (;;)
711      case OP_FAIL:      case OP_FAIL:
712      MRRETURN(MATCH_NOMATCH);      MRRETURN(MATCH_NOMATCH);
713    
714        /* COMMIT overrides PRUNE, SKIP, and THEN */
715    
716      case OP_COMMIT:      case OP_COMMIT:
717      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
718        ims, eptrb, flags, RM52);        ims, eptrb, flags, RM52);
719      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
720            rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
721            rrc != MATCH_THEN)
722          RRETURN(rrc);
723      MRRETURN(MATCH_COMMIT);      MRRETURN(MATCH_COMMIT);
724    
725        /* PRUNE overrides THEN */
726    
727      case OP_PRUNE:      case OP_PRUNE:
728      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
729        ims, eptrb, flags, RM51);        ims, eptrb, flags, RM51);
730      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
731      MRRETURN(MATCH_PRUNE);      MRRETURN(MATCH_PRUNE);
732    
733      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
734      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
735        ims, eptrb, flags, RM56);        ims, eptrb, flags, RM56);
736      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
737      md->mark = ecode + 2;      md->mark = ecode + 2;
738      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
739    
740        /* SKIP overrides PRUNE and THEN */
741    
742      case OP_SKIP:      case OP_SKIP:
743      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
744        ims, eptrb, flags, RM53);        ims, eptrb, flags, RM53);
745      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
746          RRETURN(rrc);
747      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
748      MRRETURN(MATCH_SKIP);      MRRETURN(MATCH_SKIP);
749    
750      case OP_SKIP_ARG:      case OP_SKIP_ARG:
751      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
752        ims, eptrb, flags, RM57);        ims, eptrb, flags, RM57);
753      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
754          RRETURN(rrc);
755    
756      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
757      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
# Line 746  for (;;) Line 761  for (;;)
761      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
762      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
763    
764        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
765        the alt that is at the start of the current branch. This makes it possible
766        to skip back past alternatives that precede the THEN within the current
767        branch. */
768    
769      case OP_THEN:      case OP_THEN:
770      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
771        ims, eptrb, flags, RM54);        ims, eptrb, flags, RM54);
772      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
773        md->start_match_ptr = ecode - GET(ecode, 1);
774      MRRETURN(MATCH_THEN);      MRRETURN(MATCH_THEN);
775    
776      case OP_THEN_ARG:      case OP_THEN_ARG:
777      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
778        ims, eptrb, flags, RM58);        offset_top, md, ims, eptrb, flags, RM58);
779      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
780      md->mark = ecode + 2;      md->start_match_ptr = ecode - GET(ecode, 1);
781        md->mark = ecode + LINK_SIZE + 2;
782      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
783    
784      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
# Line 793  for (;;) Line 815  for (;;)
815        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
816    
817        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
818        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
819            (int)(eptr - md->start_subject);
820    
821        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
822        do        do
823          {          {
824          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
825            ims, eptrb, flags, RM1);            ims, eptrb, flags, RM1);
826          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
827                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
828              RRETURN(rrc);
829          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
830          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
831          }          }
# Line 861  for (;;) Line 886  for (;;)
886    
887        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
888          eptrb, flags, RM2);          eptrb, flags, RM2);
889        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
890              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
891            RRETURN(rrc);
892        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
893        }        }
894      /* Control never reaches here. */      /* Control never reaches here. */
# Line 888  for (;;) Line 915  for (;;)
915          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
916          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
917          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
918          cb.subject_length   = md->end_subject - md->start_subject;          cb.subject_length   = (int)(md->end_subject - md->start_subject);
919          cb.start_match      = mstart - md->start_subject;          cb.start_match      = (int)(mstart - md->start_subject);
920          cb.current_position = eptr - md->start_subject;          cb.current_position = (int)(eptr - md->start_subject);
921          cb.pattern_position = GET(ecode, LINK_SIZE + 3);          cb.pattern_position = GET(ecode, LINK_SIZE + 3);
922          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
923          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
# Line 1062  for (;;) Line 1089  for (;;)
1089          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1090          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1091          }          }
1092        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)        else if (rrc != MATCH_NOMATCH &&
1093                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1094          {          {
1095          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1096          }          }
# Line 1116  for (;;) Line 1144  for (;;)
1144        {        {
1145        md->offset_vector[offset] =        md->offset_vector[offset] =
1146          md->offset_vector[md->offset_end - number];          md->offset_vector[md->offset_end - number];
1147        md->offset_vector[offset+1] = eptr - md->start_subject;        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1148        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1149        }        }
1150      ecode += 3;      ecode += 3;
# Line 1190  for (;;) Line 1218  for (;;)
1218          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1219          break;          break;
1220          }          }
1221        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1222              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1223            RRETURN(rrc);
1224        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1225        }        }
1226      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1224  for (;;) Line 1254  for (;;)
1254          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1255          break;          break;
1256          }          }
1257        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1258              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1259            RRETURN(rrc);
1260        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1261        }        }
1262      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1279  for (;;) Line 1311  for (;;)
1311        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1312        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1313        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1314        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1315        cb.start_match      = mstart - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1316        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1317        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1318        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1319        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
# Line 1361  for (;;) Line 1393  for (;;)
1393              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1394            MRRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1395            }            }
1396          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH &&
1397                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1398            {            {
1399            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1400            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1404  for (;;) Line 1437  for (;;)
1437          mstart = md->start_match_ptr;          mstart = md->start_match_ptr;
1438          break;          break;
1439          }          }
1440        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1441              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1442            RRETURN(rrc);
1443        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1444        }        }
1445      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1558  for (;;) Line 1593  for (;;)
1593          {          {
1594          md->offset_vector[offset] =          md->offset_vector[offset] =
1595            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1596          md->offset_vector[offset+1] = eptr - md->start_subject;          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1597          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1598          }          }
1599    
# Line 1670  for (;;) Line 1705  for (;;)
1705        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1706          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1707        else        else
1708          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }          {
1709            if (md->noteol) MRRETURN(MATCH_NOMATCH);
1710            SCHECK_PARTIAL();
1711            }
1712        ecode++;        ecode++;
1713        break;        break;
1714        }        }
1715      else      else  /* Not multiline */
1716        {        {
1717        if (md->noteol) MRRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1718        if (!md->endonly)        if (!md->endonly) goto ASSERT_NL_OR_EOS;
         {  
         if (eptr != md->end_subject &&  
             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))  
           MRRETURN(MATCH_NOMATCH);  
         ecode++;  
         break;  
         }  
1719        }        }
1720    
1721      /* ... else fall through for endonly */      /* ... else fall through for endonly */
1722    
1723      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1724    
1725      case OP_EOD:      case OP_EOD:
1726      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1727        SCHECK_PARTIAL();
1728      ecode++;      ecode++;
1729      break;      break;
1730    
1731      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1732    
1733      case OP_EODN:      case OP_EODN:
1734      if (eptr != md->end_subject &&      ASSERT_NL_OR_EOS:
1735        if (eptr < md->end_subject &&
1736          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1737        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1738    
1739        /* Either at end of string or \n before end. */
1740    
1741        SCHECK_PARTIAL();
1742      ecode++;      ecode++;
1743      break;      break;
1744    
# Line 1719  for (;;) Line 1757  for (;;)
1757        if (utf8)        if (utf8)
1758          {          {
1759          /* Get status of previous character */          /* Get status of previous character */
1760    
1761          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1762            {            {
1763            USPTR lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1764            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1765            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1766            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1767  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1768            if (md->use_ucp)            if (md->use_ucp)
1769              {              {
1770              if (c == '_') prev_is_word = TRUE; else              if (c == '_') prev_is_word = TRUE; else
1771                {                {
1772                int cat = UCD_CATEGORY(c);                int cat = UCD_CATEGORY(c);
1773                prev_is_word = (cat == ucp_L || cat == ucp_N);                prev_is_word = (cat == ucp_L || cat == ucp_N);
1774                }                }
1775              }              }
1776            else            else
1777  #endif  #endif
1778            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1779            }            }
1780    
1781          /* Get status of next character */          /* Get status of next character */
1782    
1783          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1784            {            {
1785            SCHECK_PARTIAL();            SCHECK_PARTIAL();
# Line 1750  for (;;) Line 1788  for (;;)
1788          else          else
1789            {            {
1790            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1791  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1792            if (md->use_ucp)            if (md->use_ucp)
1793              {              {
1794              if (c == '_') cur_is_word = TRUE; else              if (c == '_') cur_is_word = TRUE; else
1795                {                {
1796                int cat = UCD_CATEGORY(c);                int cat = UCD_CATEGORY(c);
1797                cur_is_word = (cat == ucp_L || cat == ucp_N);                cur_is_word = (cat == ucp_L || cat == ucp_N);
1798                }                }
1799              }              }
1800            else            else
1801  #endif  #endif
1802            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1803            }            }
1804          }          }
1805        else        else
1806  #endif  #endif
1807    
1808        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1809        consistency with the behaviour of \w we do use it in this case. */        consistency with the behaviour of \w we do use it in this case. */
1810    
1811          {          {
1812          /* Get status of previous character */          /* Get status of previous character */
1813    
1814          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1815            {            {
1816            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1817  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1818            if (md->use_ucp)            if (md->use_ucp)
1819              {              {
1820              c = eptr[-1];              c = eptr[-1];
1821              if (c == '_') prev_is_word = TRUE; else              if (c == '_') prev_is_word = TRUE; else
1822                {                {
1823                int cat = UCD_CATEGORY(c);                int cat = UCD_CATEGORY(c);
1824                prev_is_word = (cat == ucp_L || cat == ucp_N);                prev_is_word = (cat == ucp_L || cat == ucp_N);
1825                }                }
1826              }              }
1827            else            else
1828  #endif  #endif
1829            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1830            }            }
1831    
1832          /* Get status of next character */          /* Get status of next character */
1833    
1834          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1835            {            {
1836            SCHECK_PARTIAL();            SCHECK_PARTIAL();
1837            cur_is_word = FALSE;            cur_is_word = FALSE;
1838            }            }
1839          else          else
1840  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1841          if (md->use_ucp)          if (md->use_ucp)
1842            {            {
1843            c = *eptr;            c = *eptr;
1844            if (c == '_') cur_is_word = TRUE; else            if (c == '_') cur_is_word = TRUE; else
1845              {              {
1846              int cat = UCD_CATEGORY(c);              int cat = UCD_CATEGORY(c);
1847              cur_is_word = (cat == ucp_L || cat == ucp_N);              cur_is_word = (cat == ucp_L || cat == ucp_N);
1848              }              }
1849            }            }
1850          else          else
1851  #endif  #endif
1852          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1853          }          }
1854    
# Line 2134  for (;;) Line 2172  for (;;)
2172          if ((ecode[2] != prop->script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2173            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2174          break;          break;
2175    
2176          /* These are specials */          /* These are specials */
2177    
2178          case PT_ALNUM:          case PT_ALNUM:
2179          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2180               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2181            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2182          break;          break;
2183    
2184          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2185          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2186               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2187                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2188            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2189          break;          break;
2190    
2191          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2192          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2193               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2194               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2195                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2196            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2197          break;          break;
2198    
2199          case PT_WORD:          case PT_WORD:
2200          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2201               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2202               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2203            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2204          break;          break;
2205    
2206          /* This should never occur */          /* This should never occur */
2207    
2208          default:          default:
# Line 2232  for (;;) Line 2270  for (;;)
2270        referenced subpattern. */        referenced subpattern. */
2271    
2272        if (offset >= offset_top || md->offset_vector[offset] < 0)        if (offset >= offset_top || md->offset_vector[offset] < 0)
2273          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;          length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2274        else        else
2275          length = md->offset_vector[offset+1] - md->offset_vector[offset];          length = md->offset_vector[offset+1] - md->offset_vector[offset];
2276    
# Line 3582  for (;;) Line 3620  for (;;)
3620                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3621              }              }
3622            break;            break;
3623    
3624            case PT_ALNUM:            case PT_ALNUM:
3625            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3626              {              {
# Line 3592  for (;;) Line 3630  for (;;)
3630                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3631                }                }
3632              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3633              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3634              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((prop_category == ucp_L || prop_category == ucp_N)
3635                     == prop_fail_result)                     == prop_fail_result)
3636                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3637              }              }
3638            break;            break;
3639    
3640            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
3641            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3642              {              {
# Line 3608  for (;;) Line 3646  for (;;)
3646                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3647                }                }
3648              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3649              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3650              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3651                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
3652                     == prop_fail_result)                     == prop_fail_result)
3653                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3654              }              }
3655            break;            break;
3656    
3657            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
3658            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3659              {              {
# Line 3625  for (;;) Line 3663  for (;;)
3663                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3664                }                }
3665              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3666              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3667              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3668                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3669                     == prop_fail_result)                     == prop_fail_result)
3670                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3671              }              }
3672            break;            break;
3673    
3674            case PT_WORD:            case PT_WORD:
3675            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3676              {              {
3677              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 3642  for (;;) Line 3680  for (;;)
3680                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3681                }                }
3682              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3683              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3684              if ((prop_category == ucp_L || prop_category == ucp_N ||              if ((prop_category == ucp_L || prop_category == ucp_N ||
3685                   c == CHAR_UNDERSCORE)                   c == CHAR_UNDERSCORE)
3686                     == prop_fail_result)                     == prop_fail_result)
3687                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3688              }              }
3689            break;            break;
3690    
3691            /* This should not occur */            /* This should not occur */
3692    
3693            default:            default:
# Line 4212  for (;;) Line 4250  for (;;)
4250                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4251                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4252                }                }
4253              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4254              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4255              }              }
4256            /* Control never gets here */            /* Control never gets here */
# Line 4228  for (;;) Line 4266  for (;;)
4266                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4267                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4268                }                }
4269              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4270              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4271              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4272                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 4248  for (;;) Line 4286  for (;;)
4286                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4287                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4288                }                }
4289              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4290              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4291              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4292                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4266  for (;;) Line 4304  for (;;)
4304                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4305                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4306                }                }
4307              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4308              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4309              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4310                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4284  for (;;) Line 4322  for (;;)
4322                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4323                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4324                }                }
4325              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4326              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4327              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4328                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4294  for (;;) Line 4332  for (;;)
4332            case PT_ALNUM:            case PT_ALNUM:
4333            for (fi = min;; fi++)            for (fi = min;; fi++)
4334              {              {
4335              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
4336              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4337              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4338              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4302  for (;;) Line 4340  for (;;)
4340                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4341                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4342                }                }
4343              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4344              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4345              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((prop_category == ucp_L || prop_category == ucp_N)
4346                     == prop_fail_result)                     == prop_fail_result)
4347                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4348              }              }
4349            /* Control never gets here */            /* Control never gets here */
4350    
4351            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
4352            for (fi = min;; fi++)            for (fi = min;; fi++)
4353              {              {
4354              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
4355              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4356              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4357              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4321  for (;;) Line 4359  for (;;)
4359                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4360                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4361                }                }
4362              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4363              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4364              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4365                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
4366                     == prop_fail_result)                     == prop_fail_result)
4367                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4368              }              }
4369            /* Control never gets here */            /* Control never gets here */
4370    
4371            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
4372            for (fi = min;; fi++)            for (fi = min;; fi++)
4373              {              {
4374              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
4375              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4376              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4377              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4341  for (;;) Line 4379  for (;;)
4379                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4380                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4381                }                }
4382              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4383              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4384              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4385                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4386                     == prop_fail_result)                     == prop_fail_result)
4387                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4388              }              }
4389            /* Control never gets here */            /* Control never gets here */
4390    
4391            case PT_WORD:            case PT_WORD:
4392            for (fi = min;; fi++)            for (fi = min;; fi++)
4393              {              {
4394              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
4395              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4396              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4397              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4361  for (;;) Line 4399  for (;;)
4399                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4400                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4401                }                }
4402              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4403              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4404              if ((prop_category == ucp_L ||              if ((prop_category == ucp_L ||
4405                   prop_category == ucp_N ||                   prop_category == ucp_N ||
4406                   c == CHAR_UNDERSCORE)                   c == CHAR_UNDERSCORE)
4407                     == prop_fail_result)                     == prop_fail_result)
4408                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4409              }              }
4410            /* Control never gets here */            /* Control never gets here */
4411    
4412            /* This should never occur */            /* This should never occur */
4413    
4414            default:            default:
4415            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
4416            }            }
# Line 4719  for (;;) Line 4757  for (;;)
4757                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4758                break;                break;
4759                }                }
4760              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4761              if (prop_fail_result) break;              if (prop_fail_result) break;
4762              eptr+= len;              eptr+= len;
4763              }              }
# Line 4734  for (;;) Line 4772  for (;;)
4772                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4773                break;                break;
4774                }                }
4775              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4776              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4777              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4778                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 4753  for (;;) Line 4791  for (;;)
4791                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4792                break;                break;
4793                }                }
4794              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4795              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4796              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4797                break;                break;
# Line 4770  for (;;) Line 4808  for (;;)
4808                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4809                break;                break;
4810                }                }
4811              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4812              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4813              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4814                break;                break;
# Line 4787  for (;;) Line 4825  for (;;)
4825                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4826                break;                break;
4827                }                }
4828              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4829              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4830              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4831                break;                break;
4832              eptr+= len;              eptr+= len;
4833              }              }
4834            break;            break;
4835    
4836            case PT_ALNUM:            case PT_ALNUM:
4837            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4838              {              {
# Line 4804  for (;;) Line 4842  for (;;)
4842                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4843                break;                break;
4844                }                }
4845              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4846              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4847              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((prop_category == ucp_L || prop_category == ucp_N)
4848                   == prop_fail_result)                   == prop_fail_result)
4849                break;                break;
4850              eptr+= len;              eptr+= len;
# Line 4822  for (;;) Line 4860  for (;;)
4860                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4861                break;                break;
4862                }                }
4863              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4864              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4865              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4866                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
4867                   == prop_fail_result)                   == prop_fail_result)
4868                break;                break;
4869              eptr+= len;              eptr+= len;
# Line 4841  for (;;) Line 4879  for (;;)
4879                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4880                break;                break;
4881                }                }
4882              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4883              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4884              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4885                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4886                   == prop_fail_result)                   == prop_fail_result)
4887                break;                break;
4888              eptr+= len;              eptr+= len;
# Line 4860  for (;;) Line 4898  for (;;)
4898                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4899                break;                break;
4900                }                }
4901              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4902              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4903              if ((prop_category == ucp_L || prop_category == ucp_N ||              if ((prop_category == ucp_L || prop_category == ucp_N ||
4904                   c == CHAR_UNDERSCORE) == prop_fail_result)                   c == CHAR_UNDERSCORE) == prop_fail_result)
# Line 5462  switch (frame->Xwhere) Line 5500  switch (frame->Xwhere)
5500    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
5501  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5502    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5503      LBL(59) LBL(60) LBL(61) LBL(62)
5504  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
5505  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
5506    default:    default:
# Line 5595  if ((options & ~PUBLIC_EXEC_OPTIONS) != Line 5634  if ((options & ~PUBLIC_EXEC_OPTIONS) !=
5634  if (re == NULL || subject == NULL ||  if (re == NULL || subject == NULL ||
5635     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5636  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5637    if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5638    
5639  /* This information is for finding all the numbers associated with a given  /* This information is for finding all the numbers associated with a given
5640  name, for condition testing. */  name, for condition testing. */
# Line 5765  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 5805  if (utf8 && (options & PCRE_NO_UTF8_CHEC
5805      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
5806    if (start_offset > 0 && start_offset < length)    if (start_offset > 0 && start_offset < length)
5807      {      {
5808      int tb = ((USPTR)subject)[start_offset];      int tb = ((USPTR)subject)[start_offset] & 0xc0;
5809      if (tb > 127)      if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
       {  
       tb &= 0xc0;  
       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;  
       }  
5810      }      }
5811    }    }
5812  #endif  #endif
# Line 5954  for(;;) Line 5990  for(;;)
5990        while (start_match < end_subject)        while (start_match < end_subject)
5991          {          {
5992          register unsigned int c = *start_match;          register unsigned int c = *start_match;
5993          if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;          if ((start_bits[c/8] & (1 << (c&7))) == 0)
5994            else break;            {
5995              start_match++;
5996    #ifdef SUPPORT_UTF8
5997              if (utf8)
5998                while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5999                  start_match++;
6000    #endif
6001              }
6002            else break;
6003          }          }
6004        }        }
6005      }   /* Starting optimizations */      }   /* Starting optimizations */
# Line 6056  for(;;) Line 6100  for(;;)
6100    
6101    switch(rc)    switch(rc)
6102      {      {
6103      /* NOMATCH and PRUNE advance by one character. If MATCH_SKIP_ARG reaches      /* SKIP passes back the next starting point explicitly, but if it is the
6104      this level it means that a MARK that matched the SKIP's arg was not found.      same as the match we have just done, treat it as NOMATCH. */
6105      We treat this as NOMATCH. THEN at this level acts exactly like PRUNE. */  
6106        case MATCH_SKIP:
6107        if (md->start_match_ptr != start_match)
6108          {
6109          new_start_match = md->start_match_ptr;
6110          break;
6111          }
6112        /* Fall through */
6113    
6114        /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6115        the SKIP's arg was not found. We also treat this as NOMATCH. */
6116    
6117        case MATCH_SKIP_ARG:
6118        /* Fall through */
6119    
6120        /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6121        exactly like PRUNE. */
6122    
6123      case MATCH_NOMATCH:      case MATCH_NOMATCH:
6124      case MATCH_PRUNE:      case MATCH_PRUNE:
     case MATCH_SKIP_ARG:  
6125      case MATCH_THEN:      case MATCH_THEN:
6126      new_start_match = start_match + 1;      new_start_match = start_match + 1;
6127  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 6072  for(;;) Line 6131  for(;;)
6131  #endif  #endif
6132      break;      break;
6133    
     /* SKIP passes back the next starting point explicitly. */  
   
     case MATCH_SKIP:  
     new_start_match = md->start_match_ptr;  
     break;  
   
6134      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6135    
6136      case MATCH_COMMIT:      case MATCH_COMMIT:
# Line 6174  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6227  if (rc == MATCH_MATCH || rc == MATCH_ACC
6227    
6228    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
6229      {      {
6230      offsets[0] = md->start_match_ptr - md->start_subject;      offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6231      offsets[1] = md->end_match_ptr - md->start_subject;      offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6232      }      }
6233    
6234    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
# Line 6207  if (start_partial != NULL) Line 6260  if (start_partial != NULL)
6260    md->mark = NULL;    md->mark = NULL;
6261    if (offsetcount > 1)    if (offsetcount > 1)
6262      {      {
6263      offsets[0] = start_partial - (USPTR)subject;      offsets[0] = (int)(start_partial - (USPTR)subject);
6264      offsets[1] = end_subject - (USPTR)subject;      offsets[1] = (int)(end_subject - (USPTR)subject);
6265      }      }
6266    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
6267    }    }

Legend:
Removed from v.518  
changed lines
  Added in v.567

  ViewVC Help
Powered by ViewVC 1.1.5