/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 518 by ph10, Tue May 18 15:47:01 2010 UTC revision 550 by ph10, Sun Oct 10 16:24:11 2010 UTC
# Line 255  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 255  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
255         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58 };         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259           RM61,  RM62 };
260    
261  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
262  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 294  argument of match(), which never changes Line 295  argument of match(), which never changes
295  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296    {\    {\
297    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
298      if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299    frame->Xwhere = rw; \    frame->Xwhere = rw; \
300    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
301    newframe->Xecode = rb;\    newframe->Xecode = rb;\
# Line 314  argument of match(), which never changes Line 316  argument of match(), which never changes
316    
317  #define RRETURN(ra)\  #define RRETURN(ra)\
318    {\    {\
319    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
320    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
321    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
322    if (frame != NULL)\    if (frame != NULL)\
323      {\      {\
324      rrc = ra;\      rrc = ra;\
# Line 487  heap whenever RMATCH() does a "recursion Line 489  heap whenever RMATCH() does a "recursion
489    
490  #ifdef NO_RECURSE  #ifdef NO_RECURSE
491  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
492    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
493  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
494    
495  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 745  for (;;) Line 748  for (;;)
748    
749      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
750      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
751    
752        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
753        the alt that is at the start of the current branch. This makes it possible
754        to skip back past alternatives that precede the THEN within the current
755        branch. */
756    
757      case OP_THEN:      case OP_THEN:
758      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
759        ims, eptrb, flags, RM54);        ims, eptrb, flags, RM54);
760      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
761        md->start_match_ptr = ecode - GET(ecode, 1);
762      MRRETURN(MATCH_THEN);      MRRETURN(MATCH_THEN);
763    
764      case OP_THEN_ARG:      case OP_THEN_ARG:
765      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
766        ims, eptrb, flags, RM58);        offset_top, md, ims, eptrb, flags, RM58);
767      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
768      md->mark = ecode + 2;      md->start_match_ptr = ecode - GET(ecode, 1);
769        md->mark = ecode + LINK_SIZE + 2;
770      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
771    
772      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
# Line 793  for (;;) Line 803  for (;;)
803        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
804    
805        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
806        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
807            (int)(eptr - md->start_subject);
808    
809        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
810        do        do
811          {          {
812          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
813            ims, eptrb, flags, RM1);            ims, eptrb, flags, RM1);
814          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
815                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
816              RRETURN(rrc);
817          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
818          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
819          }          }
# Line 861  for (;;) Line 874  for (;;)
874    
875        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
876          eptrb, flags, RM2);          eptrb, flags, RM2);
877        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
878              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
879            RRETURN(rrc);
880        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
881        }        }
882      /* Control never reaches here. */      /* Control never reaches here. */
# Line 888  for (;;) Line 903  for (;;)
903          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
904          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
905          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
906          cb.subject_length   = md->end_subject - md->start_subject;          cb.subject_length   = (int)(md->end_subject - md->start_subject);
907          cb.start_match      = mstart - md->start_subject;          cb.start_match      = (int)(mstart - md->start_subject);
908          cb.current_position = eptr - md->start_subject;          cb.current_position = (int)(eptr - md->start_subject);
909          cb.pattern_position = GET(ecode, LINK_SIZE + 3);          cb.pattern_position = GET(ecode, LINK_SIZE + 3);
910          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
911          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
# Line 1062  for (;;) Line 1077  for (;;)
1077          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1078          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1079          }          }
1080        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)        else if (rrc != MATCH_NOMATCH &&
1081                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1082          {          {
1083          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1084          }          }
# Line 1116  for (;;) Line 1132  for (;;)
1132        {        {
1133        md->offset_vector[offset] =        md->offset_vector[offset] =
1134          md->offset_vector[md->offset_end - number];          md->offset_vector[md->offset_end - number];
1135        md->offset_vector[offset+1] = eptr - md->start_subject;        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1136        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1137        }        }
1138      ecode += 3;      ecode += 3;
# Line 1190  for (;;) Line 1206  for (;;)
1206          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1207          break;          break;
1208          }          }
1209        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1210              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1211            RRETURN(rrc);
1212        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1213        }        }
1214      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1224  for (;;) Line 1242  for (;;)
1242          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1243          break;          break;
1244          }          }
1245        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1246              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1247            RRETURN(rrc);
1248        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1249        }        }
1250      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1279  for (;;) Line 1299  for (;;)
1299        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1300        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1301        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1302        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1303        cb.start_match      = mstart - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1304        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1305        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1306        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1307        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
# Line 1361  for (;;) Line 1381  for (;;)
1381              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1382            MRRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1383            }            }
1384          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH &&
1385                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1386            {            {
1387            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1388            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1404  for (;;) Line 1425  for (;;)
1425          mstart = md->start_match_ptr;          mstart = md->start_match_ptr;
1426          break;          break;
1427          }          }
1428        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1429              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1430            RRETURN(rrc);
1431        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1432        }        }
1433      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1558  for (;;) Line 1581  for (;;)
1581          {          {
1582          md->offset_vector[offset] =          md->offset_vector[offset] =
1583            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1584          md->offset_vector[offset+1] = eptr - md->start_subject;          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1585          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1586          }          }
1587    
# Line 1719  for (;;) Line 1742  for (;;)
1742        if (utf8)        if (utf8)
1743          {          {
1744          /* Get status of previous character */          /* Get status of previous character */
1745    
1746          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1747            {            {
1748            USPTR lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1749            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1750            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1751            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1752  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1753            if (md->use_ucp)            if (md->use_ucp)
1754              {              {
1755              if (c == '_') prev_is_word = TRUE; else              if (c == '_') prev_is_word = TRUE; else
1756                {                {
1757                int cat = UCD_CATEGORY(c);                int cat = UCD_CATEGORY(c);
1758                prev_is_word = (cat == ucp_L || cat == ucp_N);                prev_is_word = (cat == ucp_L || cat == ucp_N);
1759                }                }
1760              }              }
1761            else            else
1762  #endif  #endif
1763            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1764            }            }
1765    
1766          /* Get status of next character */          /* Get status of next character */
1767    
1768          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1769            {            {
1770            SCHECK_PARTIAL();            SCHECK_PARTIAL();
# Line 1750  for (;;) Line 1773  for (;;)
1773          else          else
1774            {            {
1775            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1776  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1777            if (md->use_ucp)            if (md->use_ucp)
1778              {              {
1779              if (c == '_') cur_is_word = TRUE; else              if (c == '_') cur_is_word = TRUE; else
1780                {                {
1781                int cat = UCD_CATEGORY(c);                int cat = UCD_CATEGORY(c);
1782                cur_is_word = (cat == ucp_L || cat == ucp_N);                cur_is_word = (cat == ucp_L || cat == ucp_N);
1783                }                }
1784              }              }
1785            else            else
1786  #endif  #endif
1787            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1788            }            }
1789          }          }
1790        else        else
1791  #endif  #endif
1792    
1793        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1794        consistency with the behaviour of \w we do use it in this case. */        consistency with the behaviour of \w we do use it in this case. */
1795    
1796          {          {
1797          /* Get status of previous character */          /* Get status of previous character */
1798    
1799          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1800            {            {
1801            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1802  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1803            if (md->use_ucp)            if (md->use_ucp)
1804              {              {
1805              c = eptr[-1];              c = eptr[-1];
1806              if (c == '_') prev_is_word = TRUE; else              if (c == '_') prev_is_word = TRUE; else
1807                {                {
1808                int cat = UCD_CATEGORY(c);                int cat = UCD_CATEGORY(c);
1809                prev_is_word = (cat == ucp_L || cat == ucp_N);                prev_is_word = (cat == ucp_L || cat == ucp_N);
1810                }                }
1811              }              }
1812            else            else
1813  #endif  #endif
1814            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1815            }            }
1816    
1817          /* Get status of next character */          /* Get status of next character */
1818    
1819          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1820            {            {
1821            SCHECK_PARTIAL();            SCHECK_PARTIAL();
1822            cur_is_word = FALSE;            cur_is_word = FALSE;
1823            }            }
1824          else          else
1825  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1826          if (md->use_ucp)          if (md->use_ucp)
1827            {            {
1828            c = *eptr;            c = *eptr;
1829            if (c == '_') cur_is_word = TRUE; else            if (c == '_') cur_is_word = TRUE; else
1830              {              {
1831              int cat = UCD_CATEGORY(c);              int cat = UCD_CATEGORY(c);
1832              cur_is_word = (cat == ucp_L || cat == ucp_N);              cur_is_word = (cat == ucp_L || cat == ucp_N);
1833              }              }
1834            }            }
1835          else          else
1836  #endif  #endif
1837          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1838          }          }
1839    
# Line 2134  for (;;) Line 2157  for (;;)
2157          if ((ecode[2] != prop->script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2158            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2159          break;          break;
2160    
2161          /* These are specials */          /* These are specials */
2162    
2163          case PT_ALNUM:          case PT_ALNUM:
2164          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2165               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2166            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2167          break;          break;
2168    
2169          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2170          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2171               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2172                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2173            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2174          break;          break;
2175    
2176          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2177          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2178               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2179               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2180                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2181            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2182          break;          break;
2183    
2184          case PT_WORD:          case PT_WORD:
2185          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2186               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2187               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2188            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2189          break;          break;
2190    
2191          /* This should never occur */          /* This should never occur */
2192    
2193          default:          default:
# Line 2232  for (;;) Line 2255  for (;;)
2255        referenced subpattern. */        referenced subpattern. */
2256    
2257        if (offset >= offset_top || md->offset_vector[offset] < 0)        if (offset >= offset_top || md->offset_vector[offset] < 0)
2258          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;          length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2259        else        else
2260          length = md->offset_vector[offset+1] - md->offset_vector[offset];          length = md->offset_vector[offset+1] - md->offset_vector[offset];
2261    
# Line 3582  for (;;) Line 3605  for (;;)
3605                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3606              }              }
3607            break;            break;
3608    
3609            case PT_ALNUM:            case PT_ALNUM:
3610            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3611              {              {
# Line 3592  for (;;) Line 3615  for (;;)
3615                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3616                }                }
3617              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3618              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3619              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((prop_category == ucp_L || prop_category == ucp_N)
3620                     == prop_fail_result)                     == prop_fail_result)
3621                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3622              }              }
3623            break;            break;
3624    
3625            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
3626            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3627              {              {
# Line 3608  for (;;) Line 3631  for (;;)
3631                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3632                }                }
3633              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3634              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3635              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3636                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
3637                     == prop_fail_result)                     == prop_fail_result)
3638                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3639              }              }
3640            break;            break;
3641    
3642            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
3643            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3644              {              {
# Line 3625  for (;;) Line 3648  for (;;)
3648                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3649                }                }
3650              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3651              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3652              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3653                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3654                     == prop_fail_result)                     == prop_fail_result)
3655                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3656              }              }
3657            break;            break;
3658    
3659            case PT_WORD:            case PT_WORD:
3660            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3661              {              {
3662              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 3642  for (;;) Line 3665  for (;;)
3665                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3666                }                }
3667              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3668              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3669              if ((prop_category == ucp_L || prop_category == ucp_N ||              if ((prop_category == ucp_L || prop_category == ucp_N ||
3670                   c == CHAR_UNDERSCORE)                   c == CHAR_UNDERSCORE)
3671                     == prop_fail_result)                     == prop_fail_result)
3672                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3673              }              }
3674            break;            break;
3675    
3676            /* This should not occur */            /* This should not occur */
3677    
3678            default:            default:
# Line 4212  for (;;) Line 4235  for (;;)
4235                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4236                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4237                }                }
4238              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4239              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4240              }              }
4241            /* Control never gets here */            /* Control never gets here */
# Line 4228  for (;;) Line 4251  for (;;)
4251                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4252                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4253                }                }
4254              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4255              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4256              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4257                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 4248  for (;;) Line 4271  for (;;)
4271                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4272                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4273                }                }
4274              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4275              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4276              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4277                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4266  for (;;) Line 4289  for (;;)
4289                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4290                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4291                }                }
4292              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4293              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4294              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4295                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4284  for (;;) Line 4307  for (;;)
4307                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4308                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4309                }                }
4310              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4311              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4312              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4313                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4294  for (;;) Line 4317  for (;;)
4317            case PT_ALNUM:            case PT_ALNUM:
4318            for (fi = min;; fi++)            for (fi = min;; fi++)
4319              {              {
4320              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
4321              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4322              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4323              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4302  for (;;) Line 4325  for (;;)
4325                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4326                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4327                }                }
4328              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4329              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4330              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((prop_category == ucp_L || prop_category == ucp_N)
4331                     == prop_fail_result)                     == prop_fail_result)
4332                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4333              }              }
4334            /* Control never gets here */            /* Control never gets here */
4335    
4336            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
4337            for (fi = min;; fi++)            for (fi = min;; fi++)
4338              {              {
4339              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
4340              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4341              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4342              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4321  for (;;) Line 4344  for (;;)
4344                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4345                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4346                }                }
4347              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4348              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4349              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4350                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
4351                     == prop_fail_result)                     == prop_fail_result)
4352                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4353              }              }
4354            /* Control never gets here */            /* Control never gets here */
4355    
4356            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
4357            for (fi = min;; fi++)            for (fi = min;; fi++)
4358              {              {
4359              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
4360              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4361              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4362              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4341  for (;;) Line 4364  for (;;)
4364                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4365                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4366                }                }
4367              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4368              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4369              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4370                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4371                     == prop_fail_result)                     == prop_fail_result)
4372                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4373              }              }
4374            /* Control never gets here */            /* Control never gets here */
4375    
4376            case PT_WORD:            case PT_WORD:
4377            for (fi = min;; fi++)            for (fi = min;; fi++)
4378              {              {
4379              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
4380              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4381              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4382              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4361  for (;;) Line 4384  for (;;)
4384                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4385                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4386                }                }
4387              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4388              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4389              if ((prop_category == ucp_L ||              if ((prop_category == ucp_L ||
4390                   prop_category == ucp_N ||                   prop_category == ucp_N ||
4391                   c == CHAR_UNDERSCORE)                   c == CHAR_UNDERSCORE)
4392                     == prop_fail_result)                     == prop_fail_result)
4393                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4394              }              }
4395            /* Control never gets here */            /* Control never gets here */
4396    
4397            /* This should never occur */            /* This should never occur */
4398    
4399            default:            default:
4400            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
4401            }            }
# Line 4719  for (;;) Line 4742  for (;;)
4742                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4743                break;                break;
4744                }                }
4745              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4746              if (prop_fail_result) break;              if (prop_fail_result) break;
4747              eptr+= len;              eptr+= len;
4748              }              }
# Line 4734  for (;;) Line 4757  for (;;)
4757                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4758                break;                break;
4759                }                }
4760              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4761              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4762              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4763                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 4753  for (;;) Line 4776  for (;;)
4776                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4777                break;                break;
4778                }                }
4779              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4780              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4781              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4782                break;                break;
# Line 4770  for (;;) Line 4793  for (;;)
4793                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4794                break;                break;
4795                }                }
4796              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4797              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4798              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4799                break;                break;
# Line 4787  for (;;) Line 4810  for (;;)
4810                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4811                break;                break;
4812                }                }
4813              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4814              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4815              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4816                break;                break;
4817              eptr+= len;              eptr+= len;
4818              }              }
4819            break;            break;
4820    
4821            case PT_ALNUM:            case PT_ALNUM:
4822            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4823              {              {
# Line 4804  for (;;) Line 4827  for (;;)
4827                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4828                break;                break;
4829                }                }
4830              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4831              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4832              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((prop_category == ucp_L || prop_category == ucp_N)
4833                   == prop_fail_result)                   == prop_fail_result)
4834                break;                break;
4835              eptr+= len;              eptr+= len;
# Line 4822  for (;;) Line 4845  for (;;)
4845                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4846                break;                break;
4847                }                }
4848              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4849              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4850              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4851                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
4852                   == prop_fail_result)                   == prop_fail_result)
4853                break;                break;
4854              eptr+= len;              eptr+= len;
# Line 4841  for (;;) Line 4864  for (;;)
4864                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4865                break;                break;
4866                }                }
4867              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4868              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4869              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||              if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4870                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4871                   == prop_fail_result)                   == prop_fail_result)
4872                break;                break;
4873              eptr+= len;              eptr+= len;
# Line 4860  for (;;) Line 4883  for (;;)
4883                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4884                break;                break;
4885                }                }
4886              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4887              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4888              if ((prop_category == ucp_L || prop_category == ucp_N ||              if ((prop_category == ucp_L || prop_category == ucp_N ||
4889                   c == CHAR_UNDERSCORE) == prop_fail_result)                   c == CHAR_UNDERSCORE) == prop_fail_result)
# Line 5462  switch (frame->Xwhere) Line 5485  switch (frame->Xwhere)
5485    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
5486  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5487    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5488      LBL(59) LBL(60) LBL(61) LBL(62)
5489  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
5490  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
5491    default:    default:
# Line 5954  for(;;) Line 5978  for(;;)
5978        while (start_match < end_subject)        while (start_match < end_subject)
5979          {          {
5980          register unsigned int c = *start_match;          register unsigned int c = *start_match;
5981          if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;          if ((start_bits[c/8] & (1 << (c&7))) == 0)
5982            else break;            {
5983              start_match++;
5984    #ifdef SUPPORT_UTF8
5985              if (utf8)
5986                while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5987                  start_match++;
5988    #endif
5989              }
5990            else break;
5991          }          }
5992        }        }
5993      }   /* Starting optimizations */      }   /* Starting optimizations */
# Line 6056  for(;;) Line 6088  for(;;)
6088    
6089    switch(rc)    switch(rc)
6090      {      {
6091      /* NOMATCH and PRUNE advance by one character. If MATCH_SKIP_ARG reaches      /* SKIP passes back the next starting point explicitly, but if it is the
6092      this level it means that a MARK that matched the SKIP's arg was not found.      same as the match we have just done, treat it as NOMATCH. */
6093      We treat this as NOMATCH. THEN at this level acts exactly like PRUNE. */  
6094        case MATCH_SKIP:
6095        if (md->start_match_ptr != start_match)
6096          {
6097          new_start_match = md->start_match_ptr;
6098          break;
6099          }
6100        /* Fall through */
6101    
6102        /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6103        the SKIP's arg was not found. We also treat this as NOMATCH. */
6104    
6105        case MATCH_SKIP_ARG:
6106        /* Fall through */
6107    
6108        /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6109        exactly like PRUNE. */
6110    
6111      case MATCH_NOMATCH:      case MATCH_NOMATCH:
6112      case MATCH_PRUNE:      case MATCH_PRUNE:
     case MATCH_SKIP_ARG:  
6113      case MATCH_THEN:      case MATCH_THEN:
6114      new_start_match = start_match + 1;      new_start_match = start_match + 1;
6115  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 6072  for(;;) Line 6119  for(;;)
6119  #endif  #endif
6120      break;      break;
6121    
     /* SKIP passes back the next starting point explicitly. */  
   
     case MATCH_SKIP:  
     new_start_match = md->start_match_ptr;  
     break;  
   
6122      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6123    
6124      case MATCH_COMMIT:      case MATCH_COMMIT:
# Line 6174  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6215  if (rc == MATCH_MATCH || rc == MATCH_ACC
6215    
6216    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
6217      {      {
6218      offsets[0] = md->start_match_ptr - md->start_subject;      offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6219      offsets[1] = md->end_match_ptr - md->start_subject;      offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6220      }      }
6221    
6222    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
# Line 6207  if (start_partial != NULL) Line 6248  if (start_partial != NULL)
6248    md->mark = NULL;    md->mark = NULL;
6249    if (offsetcount > 1)    if (offsetcount > 1)
6250      {      {
6251      offsets[0] = start_partial - (USPTR)subject;      offsets[0] = (int)(start_partial - (USPTR)subject);
6252      offsets[1] = end_subject - (USPTR)subject;      offsets[1] = (int)(end_subject - (USPTR)subject);
6253      }      }
6254    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
6255    }    }

Legend:
Removed from v.518  
changed lines
  Added in v.550

  ViewVC Help
Powered by ViewVC 1.1.5