/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 427 by ph10, Fri Aug 28 09:55:54 2009 UTC revision 455 by ph10, Sat Sep 26 19:12:32 2009 UTC
# Line 408  immediately. The second one is used when Line 408  immediately. The second one is used when
408  the subject. */  the subject. */
409    
410  #define CHECK_PARTIAL()\  #define CHECK_PARTIAL()\
411    if (md->partial && eptr >= md->end_subject && eptr > mstart)\    if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
412      {\      {\
413      md->hitend = TRUE;\      md->hitend = TRUE;\
414      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
# Line 418  the subject. */ Line 418  the subject. */
418    if (md->partial && eptr > mstart)\    if (md->partial && eptr > mstart)\
419      {\      {\
420      md->hitend = TRUE;\      md->hitend = TRUE;\
     md->hitend = TRUE;\  
421      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
422      }      }
423    
# Line 665  for (;;) Line 664  for (;;)
664    minimize = possessive = FALSE;    minimize = possessive = FALSE;
665    op = *ecode;    op = *ecode;
666    
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. This code is now wrapped in a macro  
   because it appears several times below. */  
   
   CHECK_PARTIAL();  
   
667    switch(op)    switch(op)
668      {      {
669      case OP_FAIL:      case OP_FAIL:
# Line 916  for (;;) Line 909  for (;;)
909        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
910        }        }
911      break;      break;
912    
913    
914        /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
915        to close any currently open capturing brackets. */
916    
917        case OP_CLOSE:
918        number = GET2(ecode, 1);
919        offset = number << 1;
920    
921    #ifdef DEBUG
922          printf("end bracket %d at *ACCEPT", number);
923          printf("\n");
924    #endif
925    
926        md->capture_last = number;
927        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
928          {
929          md->offset_vector[offset] =
930            md->offset_vector[md->offset_end - number];
931          md->offset_vector[offset+1] = eptr - md->start_subject;
932          if (offset_top <= offset) offset_top = offset + 2;
933          }
934        ecode += 3;
935        break;
936    
937    
938      /* End of the pattern, either real or forced. If we are in a top-level      /* End of the pattern, either real or forced. If we are in a top-level
# Line 931  for (;;) Line 948  for (;;)
948        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
949        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
950          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
951          offset_top = rec->offset_top;
952        mstart = rec->save_start;        mstart = rec->save_start;
953        ims = original_ims;        ims = original_ims;
954        ecode = rec->after_call;        ecode = rec->after_call;
955        break;        break;
956        }        }
957    
958      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
959      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
960        the subject. In both cases, backtracking will then try other alternatives,
961        if any. */
962    
963        if (eptr == mstart &&
964            (md->notempty ||
965              (md->notempty_atstart &&
966                mstart == md->start_subject + md->start_offset)))
967          RRETURN(MATCH_NOMATCH);
968    
969        /* Otherwise, we have a match. */
970    
     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);  
971      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
972      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
973      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
# Line 1031  for (;;) Line 1058  for (;;)
1058        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1059        }        }
1060    
1061      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1062    
1063        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1064      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1065      break;      break;
1066    
# Line 1112  for (;;) Line 1140  for (;;)
1140        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1141              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1142        new_recursive.save_start = mstart;        new_recursive.save_start = mstart;
1143          new_recursive.offset_top = offset_top;
1144        mstart = eptr;        mstart = eptr;
1145    
1146        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
# Line 1310  for (;;) Line 1339  for (;;)
1339        {        {
1340        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1341        offset = number << 1;        offset = number << 1;
1342    
1343  #ifdef DEBUG  #ifdef DEBUG
1344        printf("end bracket %d", number);        printf("end bracket %d", number);
1345        printf("\n");        printf("\n");
# Line 1336  for (;;) Line 1365  for (;;)
1365          mstart = rec->save_start;          mstart = rec->save_start;
1366          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1367            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1368            offset_top = rec->offset_top;
1369          ecode = rec->after_call;          ecode = rec->after_call;
1370          ims = original_ims;          ims = original_ims;
1371          break;          break;
# Line 1475  for (;;) Line 1505  for (;;)
1505    
1506        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1507        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1508        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1509          partial matching. */
1510    
1511  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1512        if (utf8)        if (utf8)
# Line 1484  for (;;) Line 1515  for (;;)
1515            {            {
1516            USPTR lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1517            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1518              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1519            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1520            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1521            }            }
1522          if (eptr >= md->end_subject) cur_is_word = FALSE; else          if (eptr >= md->end_subject)
1523              {
1524              SCHECK_PARTIAL();
1525              cur_is_word = FALSE;
1526              }
1527            else
1528            {            {
1529            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1530            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1496  for (;;) Line 1533  for (;;)
1533        else        else
1534  #endif  #endif
1535    
1536        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode */
1537    
1538          {          {
1539          prev_is_word = (eptr != md->start_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1540            ((md->ctypes[eptr[-1]] & ctype_word) != 0);            {
1541          cur_is_word = (eptr < md->end_subject) &&            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1542            ((md->ctypes[*eptr] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1543              }
1544            if (eptr >= md->end_subject)
1545              {
1546              SCHECK_PARTIAL();
1547              cur_is_word = FALSE;
1548              }
1549            else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1550          }          }
1551    
1552        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
# Line 1520  for (;;) Line 1564  for (;;)
1564      /* Fall through */      /* Fall through */
1565    
1566      case OP_ALLANY:      case OP_ALLANY:
1567      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1568          {
1569          SCHECK_PARTIAL();
1570          RRETURN(MATCH_NOMATCH);
1571          }
1572      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1573      ecode++;      ecode++;
1574      break;      break;
# Line 1529  for (;;) Line 1577  for (;;)
1577      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1578    
1579      case OP_ANYBYTE:      case OP_ANYBYTE:
1580      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1581          {
1582          SCHECK_PARTIAL();
1583          RRETURN(MATCH_NOMATCH);
1584          }
1585      ecode++;      ecode++;
1586      break;      break;
1587    
1588      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1589      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1590          {
1591          SCHECK_PARTIAL();
1592          RRETURN(MATCH_NOMATCH);
1593          }
1594      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1595      if (      if (
1596  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1547  for (;;) Line 1603  for (;;)
1603      break;      break;
1604    
1605      case OP_DIGIT:      case OP_DIGIT:
1606      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1607          {
1608          SCHECK_PARTIAL();
1609          RRETURN(MATCH_NOMATCH);
1610          }
1611      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1612      if (      if (
1613  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1560  for (;;) Line 1620  for (;;)
1620      break;      break;
1621    
1622      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1623      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1624          {
1625          SCHECK_PARTIAL();
1626          RRETURN(MATCH_NOMATCH);
1627          }
1628      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1629      if (      if (
1630  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1573  for (;;) Line 1637  for (;;)
1637      break;      break;
1638    
1639      case OP_WHITESPACE:      case OP_WHITESPACE:
1640      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1641          {
1642          SCHECK_PARTIAL();
1643          RRETURN(MATCH_NOMATCH);
1644          }
1645      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1646      if (      if (
1647  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1586  for (;;) Line 1654  for (;;)
1654      break;      break;
1655    
1656      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1657      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1658          {
1659          SCHECK_PARTIAL();
1660          RRETURN(MATCH_NOMATCH);
1661          }
1662      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1663      if (      if (
1664  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1599  for (;;) Line 1671  for (;;)
1671      break;      break;
1672    
1673      case OP_WORDCHAR:      case OP_WORDCHAR:
1674      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1675          {
1676          SCHECK_PARTIAL();
1677          RRETURN(MATCH_NOMATCH);
1678          }
1679      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1680      if (      if (
1681  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1612  for (;;) Line 1688  for (;;)
1688      break;      break;
1689    
1690      case OP_ANYNL:      case OP_ANYNL:
1691      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1692          {
1693          SCHECK_PARTIAL();
1694          RRETURN(MATCH_NOMATCH);
1695          }
1696      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1697      switch(c)      switch(c)
1698        {        {
# Line 1636  for (;;) Line 1716  for (;;)
1716      break;      break;
1717    
1718      case OP_NOT_HSPACE:      case OP_NOT_HSPACE:
1719      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1720          {
1721          SCHECK_PARTIAL();
1722          RRETURN(MATCH_NOMATCH);
1723          }
1724      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1725      switch(c)      switch(c)
1726        {        {
# Line 1666  for (;;) Line 1750  for (;;)
1750      break;      break;
1751    
1752      case OP_HSPACE:      case OP_HSPACE:
1753      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1754          {
1755          SCHECK_PARTIAL();
1756          RRETURN(MATCH_NOMATCH);
1757          }
1758      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1759      switch(c)      switch(c)
1760        {        {
# Line 1696  for (;;) Line 1784  for (;;)
1784      break;      break;
1785    
1786      case OP_NOT_VSPACE:      case OP_NOT_VSPACE:
1787      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1788          {
1789          SCHECK_PARTIAL();
1790          RRETURN(MATCH_NOMATCH);
1791          }
1792      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1793      switch(c)      switch(c)
1794        {        {
# Line 1714  for (;;) Line 1806  for (;;)
1806      break;      break;
1807    
1808      case OP_VSPACE:      case OP_VSPACE:
1809      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1810          {
1811          SCHECK_PARTIAL();
1812          RRETURN(MATCH_NOMATCH);
1813          }
1814      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1815      switch(c)      switch(c)
1816        {        {
# Line 1737  for (;;) Line 1833  for (;;)
1833    
1834      case OP_PROP:      case OP_PROP:
1835      case OP_NOTPROP:      case OP_NOTPROP:
1836      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1837          {
1838          SCHECK_PARTIAL();
1839          RRETURN(MATCH_NOMATCH);
1840          }
1841      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1842        {        {
1843        const ucd_record *prop = GET_UCD(c);        const ucd_record *prop = GET_UCD(c);
# Line 1782  for (;;) Line 1882  for (;;)
1882      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
1883    
1884      case OP_EXTUNI:      case OP_EXTUNI:
1885      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1886          {
1887          SCHECK_PARTIAL();
1888          RRETURN(MATCH_NOMATCH);
1889          }
1890      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1891        {        {
1892        int category = UCD_CATEGORY(c);        int category = UCD_CATEGORY(c);
# Line 1862  for (;;) Line 1966  for (;;)
1966          break;          break;
1967    
1968          default:               /* No repeat follows */          default:               /* No repeat follows */
1969          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
1970              {
1971              CHECK_PARTIAL();
1972              RRETURN(MATCH_NOMATCH);
1973              }
1974          eptr += length;          eptr += length;
1975          continue;              /* With the main loop */          continue;              /* With the main loop */
1976          }          }
# Line 1899  for (;;) Line 2007  for (;;)
2007            {            {
2008            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2009            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2010            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) RRETURN(MATCH_NOMATCH);
2011              if (!match_ref(offset, eptr, length, md, ims))
2012              {              {
2013              CHECK_PARTIAL();              CHECK_PARTIAL();
2014              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 1919  for (;;) Line 2028  for (;;)
2028            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims)) break;
2029            eptr += length;            eptr += length;
2030            }            }
         CHECK_PARTIAL();  
2031          while (eptr >= pp)          while (eptr >= pp)
2032            {            {
2033            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
# Line 1931  for (;;) Line 2039  for (;;)
2039        }        }
2040      /* Control never gets here */      /* Control never gets here */
2041    
   
   
2042      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2043      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2044      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1989  for (;;) Line 2095  for (;;)
2095            {            {
2096            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2097              {              {
2098              CHECK_PARTIAL();              SCHECK_PARTIAL();
2099              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2100              }              }
2101            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
# Line 2011  for (;;) Line 2117  for (;;)
2117            {            {
2118            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2119              {              {
2120              CHECK_PARTIAL();              SCHECK_PARTIAL();
2121              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2122              }              }
2123            c = *eptr++;            c = *eptr++;
# Line 2037  for (;;) Line 2143  for (;;)
2143              {              {
2144              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2145              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2146              if (fi >= max)              if (fi >= max) RRETURN(MATCH_NOMATCH);
               {  
               CHECK_PARTIAL();  
               RRETURN(MATCH_NOMATCH);  
               }  
2147              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2148                {                {
2149                SCHECK_PARTIAL();                SCHECK_PARTIAL();
# Line 2066  for (;;) Line 2168  for (;;)
2168              {              {
2169              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2170              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2171              if (fi >= max)              if (fi >= max) RRETURN(MATCH_NOMATCH);
               {  
               CHECK_PARTIAL();  
               RRETURN(MATCH_NOMATCH);  
               }  
2172              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2173                {                {
2174                SCHECK_PARTIAL();                SCHECK_PARTIAL();
# Line 2108  for (;;) Line 2206  for (;;)
2206                }                }
2207              eptr += len;              eptr += len;
2208              }              }
           CHECK_PARTIAL();  
2209            for (;;)            for (;;)
2210              {              {
2211              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
# Line 2128  for (;;) Line 2225  for (;;)
2225              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2226              eptr++;              eptr++;
2227              }              }
           CHECK_PARTIAL();  
2228            while (eptr >= pp)            while (eptr >= pp)
2229              {              {
2230              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
# Line 2209  for (;;) Line 2305  for (;;)
2305            {            {
2306            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2307            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2308            if (fi >= max)            if (fi >= max) RRETURN(MATCH_NOMATCH);
             {  
             CHECK_PARTIAL();  
             RRETURN(MATCH_NOMATCH);  
             }  
2309            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2310              {              {
2311              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 2238  for (;;) Line 2330  for (;;)
2330            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2331            eptr += len;            eptr += len;
2332            }            }
         CHECK_PARTIAL();  
2333          for(;;)          for(;;)
2334            {            {
2335            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
# Line 2262  for (;;) Line 2353  for (;;)
2353        length = 1;        length = 1;
2354        ecode++;        ecode++;
2355        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2356        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2357            {
2358            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2359            RRETURN(MATCH_NOMATCH);
2360            }
2361        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2362        }        }
2363      else      else
# Line 2270  for (;;) Line 2365  for (;;)
2365    
2366      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2367        {        {
2368        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2369            {
2370            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2371            RRETURN(MATCH_NOMATCH);
2372            }
2373        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2374        ecode += 2;        ecode += 2;
2375        }        }
# Line 2286  for (;;) Line 2385  for (;;)
2385        ecode++;        ecode++;
2386        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2387    
2388        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2389            {
2390            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2391            RRETURN(MATCH_NOMATCH);
2392            }
2393    
2394        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2395        can use the fast lookup table. */        can use the fast lookup table. */
# Line 2321  for (;;) Line 2424  for (;;)
2424    
2425      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2426        {        {
2427        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2428            {
2429            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2430            RRETURN(MATCH_NOMATCH);
2431            }
2432        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2433        ecode += 2;        ecode += 2;
2434        }        }
# Line 2375  for (;;) Line 2482  for (;;)
2482      case OP_MINQUERY:      case OP_MINQUERY:
2483      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2484      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2485    
2486      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2487      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2488      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
# Line 2427  for (;;) Line 2535  for (;;)
2535              {              {
2536              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2537              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2538              if (fi >= max)              if (fi >= max) RRETURN(MATCH_NOMATCH);
               {  
               CHECK_PARTIAL();  
               RRETURN(MATCH_NOMATCH);  
               }  
2539              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
2540                memcmp(eptr, charptr, length) == 0) eptr += length;                memcmp(eptr, charptr, length) == 0) eptr += length;
2541  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2463  for (;;) Line 2567  for (;;)
2567              else break;              else break;
2568              }              }
2569    
           CHECK_PARTIAL();  
2570            if (possessive) continue;            if (possessive) continue;
2571    
2572            for(;;)            for(;;)
# Line 2524  for (;;) Line 2627  for (;;)
2627            {            {
2628            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2629            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2630            if (fi >= max)            if (fi >= max) RRETURN(MATCH_NOMATCH);
             {  
             CHECK_PARTIAL();  
             RRETURN(MATCH_NOMATCH);  
             }  
2631            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2632              {              {
2633              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 2547  for (;;) Line 2646  for (;;)
2646            eptr++;            eptr++;
2647            }            }
2648    
         CHECK_PARTIAL();  
2649          if (possessive) continue;          if (possessive) continue;
2650    
2651          while (eptr >= pp)          while (eptr >= pp)
# Line 2574  for (;;) Line 2672  for (;;)
2672            }            }
2673          if (fc != *eptr++) RRETURN(MATCH_NOMATCH);          if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2674          }          }
2675    
2676        if (min == max) continue;        if (min == max) continue;
2677    
2678        if (minimize)        if (minimize)
2679          {          {
2680          for (fi = min;; fi++)          for (fi = min;; fi++)
2681            {            {
2682            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2683            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2684            if (fi >= max)            if (fi >= max) RRETURN(MATCH_NOMATCH);
             {  
             CHECK_PARTIAL();  
             RRETURN(MATCH_NOMATCH);  
             }  
2685            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
2686              {              {
2687              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 2603  for (;;) Line 2699  for (;;)
2699            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2700            eptr++;            eptr++;
2701            }            }
         CHECK_PARTIAL();  
2702          if (possessive) continue;          if (possessive) continue;
2703    
2704          while (eptr >= pp)          while (eptr >= pp)
2705            {            {
2706            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
# Line 2620  for (;;) Line 2716  for (;;)
2716      checking can be multibyte. */      checking can be multibyte. */
2717    
2718      case OP_NOT:      case OP_NOT:
2719      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2720          {
2721          SCHECK_PARTIAL();
2722          RRETURN(MATCH_NOMATCH);
2723          }
2724      ecode++;      ecode++;
2725      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2726      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2763  for (;;) Line 2863  for (;;)
2863              {              {
2864              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2865              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2866              if (fi >= max)              if (fi >= max) RRETURN(MATCH_NOMATCH);
               {  
               CHECK_PARTIAL();  
               RRETURN(MATCH_NOMATCH);  
               }  
2867              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2868                {                {
2869                SCHECK_PARTIAL();                SCHECK_PARTIAL();
# Line 2786  for (;;) Line 2882  for (;;)
2882              {              {
2883              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2884              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2885              if (fi >= max)              if (fi >= max) RRETURN(MATCH_NOMATCH);
               {  
               CHECK_PARTIAL();  
               RRETURN(MATCH_NOMATCH);  
               }  
2886              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2887                {                {
2888                SCHECK_PARTIAL();                SCHECK_PARTIAL();
# Line 2822  for (;;) Line 2914  for (;;)
2914              if (fc == d) break;              if (fc == d) break;
2915              eptr += len;              eptr += len;
2916              }              }
         CHECK_PARTIAL();  
2917          if (possessive) continue;          if (possessive) continue;
2918          for(;;)          for(;;)
2919              {              {
# Line 2841  for (;;) Line 2932  for (;;)
2932              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2933              eptr++;              eptr++;
2934              }              }
           CHECK_PARTIAL();  
2935            if (possessive) continue;            if (possessive) continue;
2936            while (eptr >= pp)            while (eptr >= pp)
2937              {              {
# Line 2904  for (;;) Line 2994  for (;;)
2994              {              {
2995              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2996              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2997              if (fi >= max)              if (fi >= max) RRETURN(MATCH_NOMATCH);
               {  
               CHECK_PARTIAL();  
               RRETURN(MATCH_NOMATCH);  
               }  
2998              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
2999                {                {
3000                SCHECK_PARTIAL();                SCHECK_PARTIAL();
# Line 2926  for (;;) Line 3012  for (;;)
3012              {              {
3013              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3014              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3015              if (fi >= max)              if (fi >= max) RRETURN(MATCH_NOMATCH);
               {  
               CHECK_PARTIAL();  
               RRETURN(MATCH_NOMATCH);  
               }  
3016              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3017                {                {
3018                SCHECK_PARTIAL();                SCHECK_PARTIAL();
# Line 2961  for (;;) Line 3043  for (;;)
3043              if (fc == d) break;              if (fc == d) break;
3044              eptr += len;              eptr += len;
3045              }              }
           CHECK_PARTIAL();  
3046            if (possessive) continue;            if (possessive) continue;
3047            for(;;)            for(;;)
3048              {              {
# Line 2980  for (;;) Line 3061  for (;;)
3061              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
3062              eptr++;              eptr++;
3063              }              }
           CHECK_PARTIAL();  
3064            if (possessive) continue;            if (possessive) continue;
3065            while (eptr >= pp)            while (eptr >= pp)
3066              {              {
# Line 3486  for (;;) Line 3566  for (;;)
3566          break;          break;
3567    
3568          case OP_ALLANY:          case OP_ALLANY:
3569          if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);          if (eptr > md->end_subject - min)
3570              {
3571              SCHECK_PARTIAL();
3572              RRETURN(MATCH_NOMATCH);
3573              }
3574          eptr += min;          eptr += min;
3575          break;          break;
3576    
3577          case OP_ANYBYTE:          case OP_ANYBYTE:
3578          if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);          if (eptr > md->end_subject - min)
3579              {
3580              SCHECK_PARTIAL();
3581              RRETURN(MATCH_NOMATCH);
3582              }
3583          eptr += min;          eptr += min;
3584          break;          break;
3585    
# Line 3700  for (;;) Line 3788  for (;;)
3788              {              {
3789              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3790              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3791              if (fi >= max)              if (fi >= max) RRETURN(MATCH_NOMATCH);
               {  
               CHECK_PARTIAL();  
               RRETURN(MATCH_NOMATCH);  
               }  
3792              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3793                {                {
3794                SCHECK_PARTIAL();                SCHECK_PARTIAL();
# Line 3720  for (;;) Line 3804  for (;;)
3804              {              {
3805              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3806              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3807              if (fi >= max)              if (fi >= max) RRETURN(MATCH_NOMATCH);
               {  
               CHECK_PARTIAL();  
               RRETURN(MATCH_NOMATCH);  
               }  
3808              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3809                {                {
3810                SCHECK_PARTIAL();                SCHECK_PARTIAL();
# Line 3744  for (;;) Line 3824  for (;;)
3824              {              {
3825              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3826              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3827              if (fi >= max)              if (fi >= max) RRETURN(MATCH_NOMATCH);
               {  
               CHECK_PARTIAL();  
               RRETURN(MATCH_NOMATCH);  
               }  
3828              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3829                {                {
3830                SCHECK_PARTIAL();                SCHECK_PARTIAL();
# Line 3766  for (;;) Line 3842  for (;;)
3842              {              {
3843              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3844              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3845              if (fi >= max)              if (fi >= max) RRETURN(MATCH_NOMATCH);
               {  
               CHECK_PARTIAL();  
               RRETURN(MATCH_NOMATCH);  
               }  
3846              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3847                {                {
3848                SCHECK_PARTIAL();                SCHECK_PARTIAL();
# Line 3788  for (;;) Line 3860  for (;;)
3860              {              {
3861              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3862              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3863              if (fi >= max)              if (fi >= max) RRETURN(MATCH_NOMATCH);
               {  
               CHECK_PARTIAL();  
               RRETURN(MATCH_NOMATCH);  
               }  
3864              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3865                {                {
3866                SCHECK_PARTIAL();                SCHECK_PARTIAL();
# Line 3819  for (;;) Line 3887  for (;;)
3887            {            {
3888            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3889            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3890            if (fi >= max)            if (fi >= max) RRETURN(MATCH_NOMATCH);
             {  
             CHECK_PARTIAL();  
             RRETURN(MATCH_NOMATCH);  
             }  
3891            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3892              {              {
3893              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 3855  for (;;) Line 3919  for (;;)
3919            {            {
3920            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3921            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3922            if (fi >= max)            if (fi >= max) RRETURN(MATCH_NOMATCH);
             {  
             CHECK_PARTIAL();  
             RRETURN(MATCH_NOMATCH);  
             }  
3923            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3924              {              {
3925              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4022  for (;;) Line 4082  for (;;)
4082            {            {
4083            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4084            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4085            if (fi >= max)            if (fi >= max) RRETURN(MATCH_NOMATCH);
             {  
             CHECK_PARTIAL();  
             RRETURN(MATCH_NOMATCH);  
             }  
4086            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4087              {              {
4088              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4222  for (;;) Line 4278  for (;;)
4278    
4279          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4280    
         CHECK_PARTIAL();  
4281          if (possessive) continue;          if (possessive) continue;
4282          for(;;)          for(;;)
4283            {            {
# Line 4259  for (;;) Line 4314  for (;;)
4314    
4315          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4316    
         CHECK_PARTIAL();  
4317          if (possessive) continue;          if (possessive) continue;
4318          for(;;)          for(;;)
4319            {            {
# Line 4496  for (;;) Line 4550  for (;;)
4550    
4551          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4552    
         CHECK_PARTIAL();  
4553          if (possessive) continue;          if (possessive) continue;
4554          for(;;)          for(;;)
4555            {            {
# Line 4652  for (;;) Line 4705  for (;;)
4705    
4706          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4707    
         CHECK_PARTIAL();  
4708          if (possessive) continue;          if (possessive) continue;
4709          while (eptr >= pp)          while (eptr >= pp)
4710            {            {
# Line 4904  md->jscript_compat = (re->options & PCRE Line 4956  md->jscript_compat = (re->options & PCRE
4956  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4957  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
4958  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
4959    md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
4960  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
4961                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
4962  md->hitend = FALSE;  md->hitend = FALSE;
# Line 5068  if (!anchored) Line 5121  if (!anchored)
5121      }      }
5122    else    else
5123      if (!startline && study != NULL &&      if (!startline && study != NULL &&
5124        (study->options & PCRE_STUDY_MAPPED) != 0)        (study->flags & PCRE_STUDY_MAPPED) != 0)
5125          start_bits = study->start_bits;          start_bits = study->start_bits;
5126    }    }
5127    
# Line 5194  for(;;) Line 5247  for(;;)
5247    /* Restore fudged end_subject */    /* Restore fudged end_subject */
5248    
5249    end_subject = save_end_subject;    end_subject = save_end_subject;
5250    
5251  #ifdef DEBUG  /* Sigh. Some compilers never learn. */    /* The following two optimizations are disabled for partial matching or if
   printf(">>>> Match against: ");  
   pchars(start_match, end_subject - start_match, TRUE, md);  
   printf("\n");  
 #endif  
   
   /* If req_byte is set, we know that that character must appear in the  
   subject for the match to succeed. If the first character is set, req_byte  
   must be later in the subject; otherwise the test starts at the match point.  
   This optimization can save a huge amount of backtracking in patterns with  
   nested unlimited repeats that aren't going to match. Writing separate code  
   for cased/caseless versions makes it go faster, as does using an  
   autoincrement and backing off on a match.  
   
   HOWEVER: when the subject string is very, very long, searching to its end  
   can take a long time, and give bad performance on quite ordinary patterns.  
   This showed up when somebody was matching something like /^\d+C/ on a  
   32-megabyte string... so we don't do this when the string is sufficiently  
   long.  
   
   ALSO: this processing is disabled when partial matching is requested, or if  
5252    disabling is explicitly requested. */    disabling is explicitly requested. */
5253    
5254    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
5255        req_byte >= 0 &&      {
5256        end_subject - start_match < REQ_BYTE_MAX &&      /* If the pattern was studied, a minimum subject length may be set. This is
5257        !md->partial)      a lower bound; no actual string of that length may actually match the
5258      {      pattern. Although the value is, strictly, in characters, we treat it as
5259      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);      bytes to avoid spending too much time in this optimization. */
5260    
5261      /* We don't need to repeat the search if we haven't yet reached the      if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
5262      place we found it at last time. */          end_subject - start_match < study->minlength)
5263          {
5264      if (p > req_byte_ptr)        rc = MATCH_NOMATCH;
5265        {        break;
5266        if (req_byte_caseless)        }
5267    
5268        /* If req_byte is set, we know that that character must appear in the
5269        subject for the match to succeed. If the first character is set, req_byte
5270        must be later in the subject; otherwise the test starts at the match point.
5271        This optimization can save a huge amount of backtracking in patterns with
5272        nested unlimited repeats that aren't going to match. Writing separate code
5273        for cased/caseless versions makes it go faster, as does using an
5274        autoincrement and backing off on a match.
5275    
5276        HOWEVER: when the subject string is very, very long, searching to its end
5277        can take a long time, and give bad performance on quite ordinary patterns.
5278        This showed up when somebody was matching something like /^\d+C/ on a
5279        32-megabyte string... so we don't do this when the string is sufficiently
5280        long. */
5281    
5282        if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
5283          {
5284          register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
5285    
5286          /* We don't need to repeat the search if we haven't yet reached the
5287          place we found it at last time. */
5288    
5289          if (p > req_byte_ptr)
5290          {          {
5291          while (p < end_subject)          if (req_byte_caseless)
5292            {            {
5293            register int pp = *p++;            while (p < end_subject)
5294            if (pp == req_byte || pp == req_byte2) { p--; break; }              {
5295                register int pp = *p++;
5296                if (pp == req_byte || pp == req_byte2) { p--; break; }
5297                }
5298            }            }
5299          }          else
       else  
         {  
         while (p < end_subject)  
5300            {            {
5301            if (*p++ == req_byte) { p--; break; }            while (p < end_subject)
5302                {
5303                if (*p++ == req_byte) { p--; break; }
5304                }
5305            }            }
5306    
5307            /* If we can't find the required character, break the matching loop,
5308            forcing a match failure. */
5309    
5310            if (p >= end_subject)
5311              {
5312              rc = MATCH_NOMATCH;
5313              break;
5314              }
5315    
5316            /* If we have found the required character, save the point where we
5317            found it, so that we don't search again next time round the loop if
5318            the start hasn't passed this character yet. */
5319    
5320            req_byte_ptr = p;
5321          }          }
   
       /* If we can't find the required character, break the matching loop,  
       forcing a match failure. */  
   
       if (p >= end_subject)  
         {  
         rc = MATCH_NOMATCH;  
         break;  
         }  
   
       /* If we have found the required character, save the point where we  
       found it, so that we don't search again next time round the loop if  
       the start hasn't passed this character yet. */  
   
       req_byte_ptr = p;  
5322        }        }
5323      }      }
5324    
5325    #ifdef DEBUG  /* Sigh. Some compilers never learn. */
5326      printf(">>>> Match against: ");
5327      pchars(start_match, end_subject - start_match, TRUE, md);
5328      printf("\n");
5329    #endif
5330    
5331    /* OK, we can now run the match. If "hitend" is set afterwards, remember the    /* OK, we can now run the match. If "hitend" is set afterwards, remember the
5332    first starting point for which a partial match was found. */    first starting point for which a partial match was found. */
5333    
5334    md->start_match_ptr = start_match;    md->start_match_ptr = start_match;
5335      md->start_used_ptr = start_match;
5336    md->match_call_count = 0;    md->match_call_count = 0;
5337    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
5338    if (md->hitend && start_partial == NULL) start_partial = start_match;    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
5339    
5340    switch(rc)    switch(rc)
5341      {      {
# Line 5299  for(;;) Line 5365  for(;;)
5365      rc = MATCH_NOMATCH;      rc = MATCH_NOMATCH;
5366      goto ENDLOOP;      goto ENDLOOP;
5367    
5368      /* Any other return is some kind of error. */      /* Any other return is either a match, or some kind of error. */
5369    
5370      default:      default:
5371      goto ENDLOOP;      goto ENDLOOP;
# Line 5381  if (rc == MATCH_MATCH) Line 5447  if (rc == MATCH_MATCH)
5447    too many to fit into the vector. */    too many to fit into the vector. */
5448    
5449    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = md->offset_overflow? 0 : md->end_offset_top/2;
5450    
5451    /* If there is space, set up the whole thing as substring 0. The value of    /* If there is space, set up the whole thing as substring 0. The value of
5452    md->start_match_ptr might be modified if \K was encountered on the success    md->start_match_ptr might be modified if \K was encountered on the success
5453    matching path. */    matching path. */

Legend:
Removed from v.427  
changed lines
  Added in v.455

  ViewVC Help
Powered by ViewVC 1.1.5