/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 919 by ph10, Fri Feb 17 11:48:02 2012 UTC revision 1015 by ph10, Sun Aug 26 16:07:14 2012 UTC
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
39  */  */
40    
   
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a sort of DFA algorithm (not a true  alternative matching function that uses a sort of DFA algorithm (not a true
43  FSM). This is NOT Perl- compatible, but it has advantages in certain  FSM). This is NOT Perl-compatible, but it has advantages in certain
44  applications. */  applications. */
45    
46    
# Line 282  typedef struct stateblock { Line 281  typedef struct stateblock {
281    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
282  } stateblock;  } stateblock;
283    
284  #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))  #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
285    
286    
287  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 382  for the current character, one for the f Line 381  for the current character, one for the f
381      next_new_state->count  = (y); \      next_new_state->count  = (y); \
382      next_new_state->data   = (z); \      next_new_state->data   = (z); \
383      next_new_state++; \      next_new_state++; \
384      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
385          (x), (y), (z), __LINE__)); \
386      } \      } \
387    else return PCRE_ERROR_DFA_WSSIZE    else return PCRE_ERROR_DFA_WSSIZE
388    
# Line 573  for (;;) Line 573  for (;;)
573    int clen, dlen;    int clen, dlen;
574    unsigned int c, d;    unsigned int c, d;
575    int forced_fail = 0;    int forced_fail = 0;
576    BOOL partial_newline = FALSE;    BOOL partial_newline = FALSE;
577    BOOL could_continue = reset_could_continue;    BOOL could_continue = reset_could_continue;
578    reset_could_continue = FALSE;    reset_could_continue = FALSE;
579    
580    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
581    new state list. */    new state list. */
582    
# Line 611  for (;;) Line 611  for (;;)
611    
612    if (ptr < end_subject)    if (ptr < end_subject)
613      {      {
614      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of data items in the character */
615  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
616      if (utf) { GETCHARLEN(c, ptr, clen); } else      if (utf) { GETCHARLEN(c, ptr, clen); } else
617  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
# Line 645  for (;;) Line 645  for (;;)
645    
646      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
647      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
648      been skipped". If the could_continue flag was passed over from a previous      been skipped". If the could_continue flag was passed over from a previous
649      state, arrange for it to passed on. */      state, arrange for it to passed on. */
650    
651      if (state_offset < 0)      if (state_offset < 0)
# Line 695  for (;;) Line 695  for (;;)
695      permitted.      permitted.
696    
697      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
698      argument that is not a data character - but is always one byte long. We      argument that is not a data character - but is always one byte long because
699      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in      the values are small. We have to take special action to deal with  \P, \p,
700      this case. To keep the other cases fast, convert these ones to new opcodes.      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
701      */      these ones to new opcodes. */
702    
703      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
704        {        {
# Line 789  for (;;) Line 789  for (;;)
789              offsets[0] = (int)(current_subject - start_subject);              offsets[0] = (int)(current_subject - start_subject);
790              offsets[1] = (int)(ptr - start_subject);              offsets[1] = (int)(ptr - start_subject);
791              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
792                offsets[1] - offsets[0], current_subject));                offsets[1] - offsets[0], (char *)current_subject));
793              }              }
794            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
795              {              {
# Line 894  for (;;) Line 894  for (;;)
894        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
895        case OP_ANY:        case OP_ANY:
896        if (clen > 0 && !IS_NEWLINE(ptr))        if (clen > 0 && !IS_NEWLINE(ptr))
897          {          {
898          if (ptr + 1 >= md->end_subject &&          if (ptr + 1 >= md->end_subject &&
899              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
900              NLBLOCK->nltype == NLTYPE_FIXED &&              NLBLOCK->nltype == NLTYPE_FIXED &&
901              NLBLOCK->nllen == 2 &&              NLBLOCK->nllen == 2 &&
902              c == NLBLOCK->nl[0])              c == NLBLOCK->nl[0])
903            {            {
904            could_continue = partial_newline = TRUE;            could_continue = partial_newline = TRUE;
905            }            }
906          else          else
907            {            {
908            ADD_NEW(state_offset + 1, 0);            ADD_NEW(state_offset + 1, 0);
909            }            }
910          }          }
911        break;        break;
912    
# Line 938  for (;;) Line 938  for (;;)
938          else if (ptr + 1 >= md->end_subject &&          else if (ptr + 1 >= md->end_subject &&
939                   (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&                   (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
940                   NLBLOCK->nltype == NLTYPE_FIXED &&                   NLBLOCK->nltype == NLTYPE_FIXED &&
941                   NLBLOCK->nllen == 2 &&                   NLBLOCK->nllen == 2 &&
942                   c == NLBLOCK->nl[0])                   c == NLBLOCK->nl[0])
943            {            {
944            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
945              {              {
946              reset_could_continue = TRUE;              reset_could_continue = TRUE;
947              ADD_NEW_DATA(-(state_offset + 1), 0, 1);              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
948              }              }
949            else could_continue = partial_newline = TRUE;            else could_continue = partial_newline = TRUE;
950            }            }
951          }          }
952        break;        break;
953    
# Line 963  for (;;) Line 963  for (;;)
963          else if (ptr + 1 >= md->end_subject &&          else if (ptr + 1 >= md->end_subject &&
964                   (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&                   (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
965                   NLBLOCK->nltype == NLTYPE_FIXED &&                   NLBLOCK->nltype == NLTYPE_FIXED &&
966                   NLBLOCK->nllen == 2 &&                   NLBLOCK->nllen == 2 &&
967                   c == NLBLOCK->nl[0])                   c == NLBLOCK->nl[0])
968            {            {
969            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
970              {              {
971              reset_could_continue = TRUE;              reset_could_continue = TRUE;
972              ADD_NEW_DATA(-(state_offset + 1), 0, 1);              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
973              }              }
974            else could_continue = partial_newline = TRUE;            else could_continue = partial_newline = TRUE;
975            }            }
976          }          }
977        else if (IS_NEWLINE(ptr))        else if (IS_NEWLINE(ptr))
978          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
# Line 1138  for (;;) Line 1138  for (;;)
1138          if (d == OP_ANY && ptr + 1 >= md->end_subject &&          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1139              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1140              NLBLOCK->nltype == NLTYPE_FIXED &&              NLBLOCK->nltype == NLTYPE_FIXED &&
1141              NLBLOCK->nllen == 2 &&              NLBLOCK->nllen == 2 &&
1142              c == NLBLOCK->nl[0])              c == NLBLOCK->nl[0])
1143            {            {
1144            could_continue = partial_newline = TRUE;            could_continue = partial_newline = TRUE;
1145            }            }
1146          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1147              (c < 256 &&              (c < 256 &&
1148                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
# Line 1169  for (;;) Line 1169  for (;;)
1169          if (d == OP_ANY && ptr + 1 >= md->end_subject &&          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1170              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1171              NLBLOCK->nltype == NLTYPE_FIXED &&              NLBLOCK->nltype == NLTYPE_FIXED &&
1172              NLBLOCK->nllen == 2 &&              NLBLOCK->nllen == 2 &&
1173              c == NLBLOCK->nl[0])              c == NLBLOCK->nl[0])
1174            {            {
1175            could_continue = partial_newline = TRUE;            could_continue = partial_newline = TRUE;
1176            }            }
1177          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1178              (c < 256 &&              (c < 256 &&
1179                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
# Line 1199  for (;;) Line 1199  for (;;)
1199          if (d == OP_ANY && ptr + 1 >= md->end_subject &&          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1200              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1201              NLBLOCK->nltype == NLTYPE_FIXED &&              NLBLOCK->nltype == NLTYPE_FIXED &&
1202              NLBLOCK->nllen == 2 &&              NLBLOCK->nllen == 2 &&
1203              c == NLBLOCK->nl[0])              c == NLBLOCK->nl[0])
1204            {            {
1205            could_continue = partial_newline = TRUE;            could_continue = partial_newline = TRUE;
1206            }            }
1207          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1208              (c < 256 &&              (c < 256 &&
1209                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
# Line 1227  for (;;) Line 1227  for (;;)
1227          if (d == OP_ANY && ptr + 1 >= md->end_subject &&          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1228              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1229              NLBLOCK->nltype == NLTYPE_FIXED &&              NLBLOCK->nltype == NLTYPE_FIXED &&
1230              NLBLOCK->nllen == 2 &&              NLBLOCK->nllen == 2 &&
1231              c == NLBLOCK->nl[0])              c == NLBLOCK->nl[0])
1232            {            {
1233            could_continue = partial_newline = TRUE;            could_continue = partial_newline = TRUE;
1234            }            }
1235          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1236              (c < 256 &&              (c < 256 &&
1237                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
# Line 1256  for (;;) Line 1256  for (;;)
1256          if (d == OP_ANY && ptr + 1 >= md->end_subject &&          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1257              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1258              NLBLOCK->nltype == NLTYPE_FIXED &&              NLBLOCK->nltype == NLTYPE_FIXED &&
1259              NLBLOCK->nllen == 2 &&              NLBLOCK->nllen == 2 &&
1260              c == NLBLOCK->nl[0])              c == NLBLOCK->nl[0])
1261            {            {
1262            could_continue = partial_newline = TRUE;            could_continue = partial_newline = TRUE;
1263            }            }
1264          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1265              (c < 256 &&              (c < 256 &&
1266                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
# Line 1368  for (;;) Line 1368  for (;;)
1368        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1369        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1370        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1371        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1372          {          {
1373            int lgb, rgb;
1374          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1375          int ncount = 0;          int ncount = 0;
1376          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
# Line 1377  for (;;) Line 1378  for (;;)
1378            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1379            next_active_state--;            next_active_state--;
1380            }            }
1381            lgb = UCD_GRAPHBREAK(c);
1382          while (nptr < end_subject)          while (nptr < end_subject)
1383            {            {
1384            int nd;            dlen = 1;
1385            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1386            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1387            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1388            ncount++;            ncount++;
1389            nptr += ndlen;            lgb = rgb;
1390              nptr += dlen;
1391            }            }
1392          count++;          count++;
1393          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
# Line 1624  for (;;) Line 1627  for (;;)
1627        QS2:        QS2:
1628    
1629        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1630        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1631          {          {
1632            int lgb, rgb;
1633          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1634          int ncount = 0;          int ncount = 0;
1635          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
# Line 1634  for (;;) Line 1638  for (;;)
1638            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1639            next_active_state--;            next_active_state--;
1640            }            }
1641            lgb = UCD_GRAPHBREAK(c);
1642          while (nptr < end_subject)          while (nptr < end_subject)
1643            {            {
1644            int nd;            dlen = 1;
1645            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1646            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1647            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1648            ncount++;            ncount++;
1649            nptr += ndlen;            lgb = rgb;
1650              nptr += dlen;
1651            }            }
1652          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1653          }          }
# Line 1891  for (;;) Line 1897  for (;;)
1897        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1898          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1899        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1900        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1901          {          {
1902            int lgb, rgb;
1903          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1904          int ncount = 0;          int ncount = 0;
1905          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
# Line 1900  for (;;) Line 1907  for (;;)
1907            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1908            next_active_state--;            next_active_state--;
1909            }            }
1910            lgb = UCD_GRAPHBREAK(c);
1911          while (nptr < end_subject)          while (nptr < end_subject)
1912            {            {
1913            int nd;            dlen = 1;
1914            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1915            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1916            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1917            ncount++;            ncount++;
1918            nptr += ndlen;            lgb = rgb;
1919              nptr += dlen;
1920            }            }
1921          if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)          if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1922              reset_could_continue = TRUE;              reset_could_continue = TRUE;
1923          if (++count >= GET2(code, 1))          if (++count >= GET2(code, 1))
1924            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1925          else          else
# Line 2112  for (;;) Line 2121  for (;;)
2121        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
2122    
2123        case OP_EXTUNI:        case OP_EXTUNI:
2124        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
2125          {          {
2126            int lgb, rgb;
2127          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
2128          int ncount = 0;          int ncount = 0;
2129            lgb = UCD_GRAPHBREAK(c);
2130          while (nptr < end_subject)          while (nptr < end_subject)
2131            {            {
2132            int nclen = 1;            dlen = 1;
2133            GETCHARLEN(c, nptr, nclen);            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2134            if (UCD_CATEGORY(c) != ucp_M) break;            rgb = UCD_GRAPHBREAK(d);
2135              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2136            ncount++;            ncount++;
2137            nptr += nclen;            lgb = rgb;
2138              nptr += dlen;
2139            }            }
2140          if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)          if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2141              reset_could_continue = TRUE;              reset_could_continue = TRUE;
2142          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2143          }          }
2144        break;        break;
# Line 2151  for (;;) Line 2164  for (;;)
2164          break;          break;
2165    
2166          case 0x000d:          case 0x000d:
2167          if (ptr + 1 >= end_subject)          if (ptr + 1 >= end_subject)
2168            {            {
2169            ADD_NEW(state_offset + 1, 0);            ADD_NEW(state_offset + 1, 0);
2170            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2171              reset_could_continue = TRUE;              reset_could_continue = TRUE;
2172            }            }
2173          else if (ptr[1] == 0x0a)          else if (ptr[1] == 0x0a)
2174            {            {
2175            ADD_NEW_DATA(-(state_offset + 1), 0, 1);            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2176            }            }
2177          else          else
2178            {            {
2179            ADD_NEW(state_offset + 1, 0);            ADD_NEW(state_offset + 1, 0);
2180            }            }
2181          break;          break;
2182          }          }
2183        break;        break;
# Line 2266  for (;;) Line 2279  for (;;)
2279        break;        break;
2280    
2281        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2282        /* Match a negated single character casefully. This is only used for        /* Match a negated single character casefully. */
       one-byte characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2283    
2284        case OP_NOT:        case OP_NOT:
2285        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2286        break;        break;
2287    
2288        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2289        /* Match a negated single character caselessly. This is only used for        /* Match a negated single character caselessly. */
       one-byte characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2290    
2291        case OP_NOTI:        case OP_NOTI:
2292        if (clen > 0 && c != d && c != fcc[d])        if (clen > 0)
2293          { ADD_NEW(state_offset + dlen + 1, 0); }          {
2294            unsigned int otherd;
2295    #ifdef SUPPORT_UTF
2296            if (utf && d >= 128)
2297              {
2298    #ifdef SUPPORT_UCP
2299              otherd = UCD_OTHERCASE(d);
2300    #endif  /* SUPPORT_UCP */
2301              }
2302            else
2303    #endif  /* SUPPORT_UTF */
2304            otherd = TABLE_GET(d, fcc, d);
2305            if (c != d && c != otherd)
2306              { ADD_NEW(state_offset + dlen + 1, 0); }
2307            }
2308        break;        break;
2309    
2310        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 2787  for (;;) Line 2810  for (;;)
2810              {              {
2811              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2812  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
2813              const pcre_uchar *p = start_subject + local_offsets[rc];              if (utf)
2814              const pcre_uchar *pp = start_subject + local_offsets[rc+1];                {
2815              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;                const pcre_uchar *p = start_subject + local_offsets[rc];
2816                  const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2817                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2818                  }
2819  #endif  #endif
2820              if (charcount > 0)              if (charcount > 0)
2821                {                {
# Line 2888  for (;;) Line 2914  for (;;)
2914              const pcre_uchar *pp = local_ptr;              const pcre_uchar *pp = local_ptr;
2915              charcount = (int)(pp - p);              charcount = (int)(pp - p);
2916  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
2917              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;              if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2918  #endif  #endif
2919              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2920              }              }
# Line 2970  for (;;) Line 2996  for (;;)
2996            else            else
2997              {              {
2998  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
2999              const pcre_uchar *p = start_subject + local_offsets[0];              if (utf)
3000              const pcre_uchar *pp = start_subject + local_offsets[1];                {
3001              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;                const pcre_uchar *p = start_subject + local_offsets[0];
3002                  const pcre_uchar *pp = start_subject + local_offsets[1];
3003                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
3004                  }
3005  #endif  #endif
3006              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
3007              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
# Line 3037  for (;;) Line 3066  for (;;)
3066    
3067    The "could_continue" variable is true if a state could have continued but    The "could_continue" variable is true if a state could have continued but
3068    for the fact that the end of the subject was reached. */    for the fact that the end of the subject was reached. */
3069    
3070    if (new_count <= 0)    if (new_count <= 0)
3071      {      {
3072      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
# Line 3054  for (;;) Line 3083  for (;;)
3083            (                                          /* or ... */            (                                          /* or ... */
3084            ptr >= end_subject &&                /* End of subject and */            ptr >= end_subject &&                /* End of subject and */
3085            ptr > md->start_used_ptr)            /* Inspected non-empty string */            ptr > md->start_used_ptr)            /* Inspected non-empty string */
3086            )            )
3087          )          )
3088        {        {
3089        if (offsetcount >= 2)        if (offsetcount >= 2)
3090          {          {
# Line 3152  if (offsetcount < 0) return PCRE_ERROR_B Line 3181  if (offsetcount < 0) return PCRE_ERROR_B
3181  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3182  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3183    
3184  /* We need to find the pointer to any study data before we test for byte  /* Check that the first field in the block is the magic number. If it is not,
3185  flipping, so we scan the extra_data block first. This may set two fields in the  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3186  match block, so we must initialize them beforehand. However, the other fields  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3187  in the match block must not be set until after the byte flipping. */  means that the pattern is likely compiled with different endianness. */
3188    
3189    if (re->magic_number != MAGIC_NUMBER)
3190      return re->magic_number == REVERSED_MAGIC_NUMBER?
3191        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3192    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3193    
3194    /* If restarting after a partial match, do some sanity checks on the contents
3195    of the workspace. */
3196    
3197    if ((options & PCRE_DFA_RESTART) != 0)
3198      {
3199      if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3200        workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3201          return PCRE_ERROR_DFA_BADRESTART;
3202      }
3203    
3204    /* Set up study, callout, and table data */
3205    
3206  md->tables = re->tables;  md->tables = re->tables;
3207  md->callout_data = NULL;  md->callout_data = NULL;
# Line 3174  if (extra_data != NULL) Line 3220  if (extra_data != NULL)
3220      md->tables = extra_data->tables;      md->tables = extra_data->tables;
3221    }    }
3222    
 /* Check that the first field in the block is the magic number. If it is not,  
 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to  
 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which  
 means that the pattern is likely compiled with different endianness. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   return re->magic_number == REVERSED_MAGIC_NUMBER?  
     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;  
 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;  
   
3223  /* Set some local values */  /* Set some local values */
3224    
3225  current_subject = (const pcre_uchar *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;

Legend:
Removed from v.919  
changed lines
  Added in v.1015

  ViewVC Help
Powered by ViewVC 1.1.5