/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 904 by ph10, Mon Jan 23 17:30:49 2012 UTC revision 1011 by ph10, Sat Aug 25 11:36:15 2012 UTC
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
39  */  */
40    
   
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a sort of DFA algorithm (not a true  alternative matching function that uses a sort of DFA algorithm (not a true
43  FSM). This is NOT Perl- compatible, but it has advantages in certain  FSM). This is NOT Perl-compatible, but it has advantages in certain
44  applications. */  applications. */
45    
46    
# Line 282  typedef struct stateblock { Line 281  typedef struct stateblock {
281    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
282  } stateblock;  } stateblock;
283    
284  #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))  #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
285    
286    
287  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 382  for the current character, one for the f Line 381  for the current character, one for the f
381      next_new_state->count  = (y); \      next_new_state->count  = (y); \
382      next_new_state->data   = (z); \      next_new_state->data   = (z); \
383      next_new_state++; \      next_new_state++; \
384      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
385          (x), (y), (z), __LINE__)); \
386      } \      } \
387    else return PCRE_ERROR_DFA_WSSIZE    else return PCRE_ERROR_DFA_WSSIZE
388    
# Line 424  BOOL utf = (md->poptions & PCRE_UTF8) != Line 424  BOOL utf = (md->poptions & PCRE_UTF8) !=
424  BOOL utf = FALSE;  BOOL utf = FALSE;
425  #endif  #endif
426    
427    BOOL reset_could_continue = FALSE;
428    
429  rlevel++;  rlevel++;
430  offsetcount &= (-2);  offsetcount &= (-2);
431    
# Line 571  for (;;) Line 573  for (;;)
573    int clen, dlen;    int clen, dlen;
574    unsigned int c, d;    unsigned int c, d;
575    int forced_fail = 0;    int forced_fail = 0;
576    BOOL could_continue = FALSE;    BOOL partial_newline = FALSE;
577      BOOL could_continue = reset_could_continue;
578      reset_could_continue = FALSE;
579    
580    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
581    new state list. */    new state list. */
# Line 607  for (;;) Line 611  for (;;)
611    
612    if (ptr < end_subject)    if (ptr < end_subject)
613      {      {
614      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of data items in the character */
615  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
616      if (utf) { GETCHARLEN(c, ptr, clen); } else      if (utf) { GETCHARLEN(c, ptr, clen); } else
617  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
# Line 641  for (;;) Line 645  for (;;)
645    
646      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
647      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
648      been skipped". */      been skipped". If the could_continue flag was passed over from a previous
649        state, arrange for it to passed on. */
650    
651      if (state_offset < 0)      if (state_offset < 0)
652        {        {
# Line 650  for (;;) Line 655  for (;;)
655          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
656          ADD_NEW_DATA(state_offset, current_state->count,          ADD_NEW_DATA(state_offset, current_state->count,
657            current_state->data - 1);            current_state->data - 1);
658            if (could_continue) reset_could_continue = TRUE;
659          continue;          continue;
660          }          }
661        else        else
# Line 689  for (;;) Line 695  for (;;)
695      permitted.      permitted.
696    
697      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
698      argument that is not a data character - but is always one byte long. We      argument that is not a data character - but is always one byte long because
699      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in      the values are small. We have to take special action to deal with  \P, \p,
700      this case. To keep the other cases fast, convert these ones to new opcodes.      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
701      */      these ones to new opcodes. */
702    
703      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
704        {        {
# Line 783  for (;;) Line 789  for (;;)
789              offsets[0] = (int)(current_subject - start_subject);              offsets[0] = (int)(current_subject - start_subject);
790              offsets[1] = (int)(ptr - start_subject);              offsets[1] = (int)(ptr - start_subject);
791              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
792                offsets[1] - offsets[0], current_subject));                offsets[1] - offsets[0], (char *)current_subject));
793              }              }
794            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
795              {              {
# Line 888  for (;;) Line 894  for (;;)
894        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
895        case OP_ANY:        case OP_ANY:
896        if (clen > 0 && !IS_NEWLINE(ptr))        if (clen > 0 && !IS_NEWLINE(ptr))
897          { ADD_NEW(state_offset + 1, 0); }          {
898            if (ptr + 1 >= md->end_subject &&
899                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
900                NLBLOCK->nltype == NLTYPE_FIXED &&
901                NLBLOCK->nllen == 2 &&
902                c == NLBLOCK->nl[0])
903              {
904              could_continue = partial_newline = TRUE;
905              }
906            else
907              {
908              ADD_NEW(state_offset + 1, 0);
909              }
910            }
911        break;        break;
912    
913        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 916  for (;;) Line 935  for (;;)
935                 (ptr == end_subject - md->nllen)                 (ptr == end_subject - md->nllen)
936              ))              ))
937            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
938            else if (ptr + 1 >= md->end_subject &&
939                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
940                     NLBLOCK->nltype == NLTYPE_FIXED &&
941                     NLBLOCK->nllen == 2 &&
942                     c == NLBLOCK->nl[0])
943              {
944              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
945                {
946                reset_could_continue = TRUE;
947                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
948                }
949              else could_continue = partial_newline = TRUE;
950              }
951          }          }
952        break;        break;
953    
# Line 928  for (;;) Line 960  for (;;)
960          else if (clen == 0 ||          else if (clen == 0 ||
961              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
962            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
963            else if (ptr + 1 >= md->end_subject &&
964                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
965                     NLBLOCK->nltype == NLTYPE_FIXED &&
966                     NLBLOCK->nllen == 2 &&
967                     c == NLBLOCK->nl[0])
968              {
969              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
970                {
971                reset_could_continue = TRUE;
972                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
973                }
974              else could_continue = partial_newline = TRUE;
975              }
976          }          }
977        else if (IS_NEWLINE(ptr))        else if (IS_NEWLINE(ptr))
978          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
# Line 1090  for (;;) Line 1135  for (;;)
1135        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1136        if (clen > 0)        if (clen > 0)
1137          {          {
1138          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1139                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1140                NLBLOCK->nltype == NLTYPE_FIXED &&
1141                NLBLOCK->nllen == 2 &&
1142                c == NLBLOCK->nl[0])
1143              {
1144              could_continue = partial_newline = TRUE;
1145              }
1146            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1147              (c < 256 &&              (c < 256 &&
1148                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1149                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1113  for (;;) Line 1166  for (;;)
1166        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1167        if (clen > 0)        if (clen > 0)
1168          {          {
1169          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1170                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1171                NLBLOCK->nltype == NLTYPE_FIXED &&
1172                NLBLOCK->nllen == 2 &&
1173                c == NLBLOCK->nl[0])
1174              {
1175              could_continue = partial_newline = TRUE;
1176              }
1177            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1178              (c < 256 &&              (c < 256 &&
1179                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1180                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1135  for (;;) Line 1196  for (;;)
1196        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1197        if (clen > 0)        if (clen > 0)
1198          {          {
1199          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1200                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1201                NLBLOCK->nltype == NLTYPE_FIXED &&
1202                NLBLOCK->nllen == 2 &&
1203                c == NLBLOCK->nl[0])
1204              {
1205              could_continue = partial_newline = TRUE;
1206              }
1207            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1208              (c < 256 &&              (c < 256 &&
1209                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1210                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1155  for (;;) Line 1224  for (;;)
1224        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1225        if (clen > 0)        if (clen > 0)
1226          {          {
1227          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1228                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1229                NLBLOCK->nltype == NLTYPE_FIXED &&
1230                NLBLOCK->nllen == 2 &&
1231                c == NLBLOCK->nl[0])
1232              {
1233              could_continue = partial_newline = TRUE;
1234              }
1235            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1236              (c < 256 &&              (c < 256 &&
1237                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1238                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1176  for (;;) Line 1253  for (;;)
1253        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1254        if (clen > 0)        if (clen > 0)
1255          {          {
1256          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1257                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1258                NLBLOCK->nltype == NLTYPE_FIXED &&
1259                NLBLOCK->nllen == 2 &&
1260                c == NLBLOCK->nl[0])
1261              {
1262              could_continue = partial_newline = TRUE;
1263              }
1264            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1265              (c < 256 &&              (c < 256 &&
1266                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1267                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1283  for (;;) Line 1368  for (;;)
1368        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1369        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1370        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1371        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1372          {          {
1373            int lgb, rgb;
1374          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1375          int ncount = 0;          int ncount = 0;
1376          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
# Line 1292  for (;;) Line 1378  for (;;)
1378            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1379            next_active_state--;            next_active_state--;
1380            }            }
1381            lgb = UCD_GRAPHBREAK(c);
1382          while (nptr < end_subject)          while (nptr < end_subject)
1383            {            {
1384            int nd;            dlen = 1;
1385            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1386            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1387            if (UCD_CATEGORY(nd) != ucp_M) break;            if (PRIV(ucp_gbtable)[lgb * ucp_gbCount + rgb] == 0) break;
1388            ncount++;            ncount++;
1389            nptr += ndlen;            lgb = rgb;
1390              nptr += dlen;
1391            }            }
1392          count++;          count++;
1393          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
# Line 1539  for (;;) Line 1627  for (;;)
1627        QS2:        QS2:
1628    
1629        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1630        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1631          {          {
1632            int lgb, rgb;
1633          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1634          int ncount = 0;          int ncount = 0;
1635          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
# Line 1549  for (;;) Line 1638  for (;;)
1638            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1639            next_active_state--;            next_active_state--;
1640            }            }
1641            lgb = UCD_GRAPHBREAK(c);
1642          while (nptr < end_subject)          while (nptr < end_subject)
1643            {            {
1644            int nd;            dlen = 1;
1645            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1646            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1647            if (UCD_CATEGORY(nd) != ucp_M) break;            if (PRIV(ucp_gbtable)[lgb * ucp_gbCount + rgb] == 0) break;
1648            ncount++;            ncount++;
1649            nptr += ndlen;            lgb = rgb;
1650              nptr += dlen;
1651            }            }
1652          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1653          }          }
# Line 1806  for (;;) Line 1897  for (;;)
1897        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1898          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1899        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1900        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1901          {          {
1902            int lgb, rgb;
1903          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1904          int ncount = 0;          int ncount = 0;
1905          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
# Line 1815  for (;;) Line 1907  for (;;)
1907            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1908            next_active_state--;            next_active_state--;
1909            }            }
1910            lgb = UCD_GRAPHBREAK(c);
1911          while (nptr < end_subject)          while (nptr < end_subject)
1912            {            {
1913            int nd;            dlen = 1;
1914            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1915            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1916            if (UCD_CATEGORY(nd) != ucp_M) break;            if (PRIV(ucp_gbtable)[lgb * ucp_gbCount + rgb] == 0) break;
1917            ncount++;            ncount++;
1918            nptr += ndlen;            lgb = rgb;
1919              nptr += dlen;
1920            }            }
1921            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1922                reset_could_continue = TRUE;
1923          if (++count >= GET2(code, 1))          if (++count >= GET2(code, 1))
1924            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1925          else          else
# Line 2025  for (;;) Line 2121  for (;;)
2121        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
2122    
2123        case OP_EXTUNI:        case OP_EXTUNI:
2124        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
2125          {          {
2126            int lgb, rgb;
2127          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
2128          int ncount = 0;          int ncount = 0;
2129            lgb = UCD_GRAPHBREAK(c);
2130          while (nptr < end_subject)          while (nptr < end_subject)
2131            {            {
2132            int nclen = 1;            dlen = 1;
2133            GETCHARLEN(c, nptr, nclen);            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2134            if (UCD_CATEGORY(c) != ucp_M) break;            rgb = UCD_GRAPHBREAK(d);
2135              if (PRIV(ucp_gbtable)[lgb * ucp_gbCount + rgb] == 0) break;
2136            ncount++;            ncount++;
2137            nptr += nclen;            lgb = rgb;
2138              nptr += dlen;
2139            }            }
2140            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2141                reset_could_continue = TRUE;
2142          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2143          }          }
2144        break;        break;
# Line 2062  for (;;) Line 2164  for (;;)
2164          break;          break;
2165    
2166          case 0x000d:          case 0x000d:
2167          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 >= end_subject)
2168              {
2169              ADD_NEW(state_offset + 1, 0);
2170              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2171                reset_could_continue = TRUE;
2172              }
2173            else if (ptr[1] == 0x0a)
2174            {            {
2175            ADD_NEW_DATA(-(state_offset + 1), 0, 1);            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2176            }            }
# Line 2171  for (;;) Line 2279  for (;;)
2279        break;        break;
2280    
2281        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2282        /* Match a negated single character casefully. This is only used for        /* Match a negated single character casefully. */
       one-byte characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2283    
2284        case OP_NOT:        case OP_NOT:
2285        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2286        break;        break;
2287    
2288        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2289        /* Match a negated single character caselessly. This is only used for        /* Match a negated single character caselessly. */
       one-byte characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2290    
2291        case OP_NOTI:        case OP_NOTI:
2292        if (clen > 0 && c != d && c != fcc[d])        if (clen > 0)
2293          { ADD_NEW(state_offset + dlen + 1, 0); }          {
2294            unsigned int otherd;
2295    #ifdef SUPPORT_UTF
2296            if (utf && d >= 128)
2297              {
2298    #ifdef SUPPORT_UCP
2299              otherd = UCD_OTHERCASE(d);
2300    #endif  /* SUPPORT_UCP */
2301              }
2302            else
2303    #endif  /* SUPPORT_UTF */
2304            otherd = TABLE_GET(d, fcc, d);
2305            if (c != d && c != otherd)
2306              { ADD_NEW(state_offset + dlen + 1, 0); }
2307            }
2308        break;        break;
2309    
2310        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 2692  for (;;) Line 2810  for (;;)
2810              {              {
2811              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2812  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
2813              const pcre_uchar *p = start_subject + local_offsets[rc];              if (utf)
2814              const pcre_uchar *pp = start_subject + local_offsets[rc+1];                {
2815              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;                const pcre_uchar *p = start_subject + local_offsets[rc];
2816                  const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2817                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2818                  }
2819  #endif  #endif
2820              if (charcount > 0)              if (charcount > 0)
2821                {                {
# Line 2793  for (;;) Line 2914  for (;;)
2914              const pcre_uchar *pp = local_ptr;              const pcre_uchar *pp = local_ptr;
2915              charcount = (int)(pp - p);              charcount = (int)(pp - p);
2916  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
2917              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;              if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2918  #endif  #endif
2919              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2920              }              }
# Line 2875  for (;;) Line 2996  for (;;)
2996            else            else
2997              {              {
2998  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
2999              const pcre_uchar *p = start_subject + local_offsets[0];              if (utf)
3000              const pcre_uchar *pp = start_subject + local_offsets[1];                {
3001              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;                const pcre_uchar *p = start_subject + local_offsets[0];
3002                  const pcre_uchar *pp = start_subject + local_offsets[1];
3003                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
3004                  }
3005  #endif  #endif
3006              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
3007              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
# Line 2946  for (;;) Line 3070  for (;;)
3070    if (new_count <= 0)    if (new_count <= 0)
3071      {      {
3072      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
3073          could_continue &&                            /* Some could go on */          could_continue &&                            /* Some could go on, and */
3074          forced_fail != workspace[1] &&               /* Not all forced fail & */          forced_fail != workspace[1] &&               /* Not all forced fail & */
3075          (                                            /* either... */          (                                            /* either... */
3076          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
# Line 2954  for (;;) Line 3078  for (;;)
3078          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
3079           match_count < 0)                            /* no matches */           match_count < 0)                            /* no matches */
3080          ) &&                                         /* And... */          ) &&                                         /* And... */
3081          ptr >= end_subject &&                  /* Reached end of subject */          (
3082          ptr > md->start_used_ptr)              /* Inspected non-empty string */          partial_newline ||                           /* Either partial NL */
3083              (                                          /* or ... */
3084              ptr >= end_subject &&                /* End of subject and */
3085              ptr > md->start_used_ptr)            /* Inspected non-empty string */
3086              )
3087            )
3088        {        {
3089        if (offsetcount >= 2)        if (offsetcount >= 2)
3090          {          {
# Line 3052  if (offsetcount < 0) return PCRE_ERROR_B Line 3181  if (offsetcount < 0) return PCRE_ERROR_B
3181  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3182  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3183    
3184  /* We need to find the pointer to any study data before we test for byte  /* Check that the first field in the block is the magic number. If it is not,
3185  flipping, so we scan the extra_data block first. This may set two fields in the  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3186  match block, so we must initialize them beforehand. However, the other fields  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3187  in the match block must not be set until after the byte flipping. */  means that the pattern is likely compiled with different endianness. */
3188    
3189    if (re->magic_number != MAGIC_NUMBER)
3190      return re->magic_number == REVERSED_MAGIC_NUMBER?
3191        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3192    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3193    
3194    /* If restarting after a partial match, do some sanity checks on the contents
3195    of the workspace. */
3196    
3197    if ((options & PCRE_DFA_RESTART) != 0)
3198      {
3199      if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3200        workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3201          return PCRE_ERROR_DFA_BADRESTART;
3202      }
3203    
3204    /* Set up study, callout, and table data */
3205    
3206  md->tables = re->tables;  md->tables = re->tables;
3207  md->callout_data = NULL;  md->callout_data = NULL;
# Line 3074  if (extra_data != NULL) Line 3220  if (extra_data != NULL)
3220      md->tables = extra_data->tables;      md->tables = extra_data->tables;
3221    }    }
3222    
 /* Check that the first field in the block is the magic number. If it is not,  
 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to  
 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which  
 means that the pattern is likely compiled with different endianness. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   return re->magic_number == REVERSED_MAGIC_NUMBER?  
     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;  
 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;  
   
3223  /* Set some local values */  /* Set some local values */
3224    
3225  current_subject = (const pcre_uchar *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;

Legend:
Removed from v.904  
changed lines
  Added in v.1011

  ViewVC Help
Powered by ViewVC 1.1.5