/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 850 by zherczeg, Wed Jan 4 17:29:11 2012 UTC revision 1033 by ph10, Mon Sep 10 11:02:48 2012 UTC
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
39  */  */
40    
   
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a sort of DFA algorithm (not a true  alternative matching function that uses a sort of DFA algorithm (not a true
43  FSM). This is NOT Perl- compatible, but it has advantages in certain  FSM). This is NOT Perl-compatible, but it has advantages in certain
44  applications. */  applications. */
45    
46    
# Line 282  typedef struct stateblock { Line 281  typedef struct stateblock {
281    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
282  } stateblock;  } stateblock;
283    
284  #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))  #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
285    
286    
287  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 382  for the current character, one for the f Line 381  for the current character, one for the f
381      next_new_state->count  = (y); \      next_new_state->count  = (y); \
382      next_new_state->data   = (z); \      next_new_state->data   = (z); \
383      next_new_state++; \      next_new_state++; \
384      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
385          (x), (y), (z), __LINE__)); \
386      } \      } \
387    else return PCRE_ERROR_DFA_WSSIZE    else return PCRE_ERROR_DFA_WSSIZE
388    
# Line 424  BOOL utf = (md->poptions & PCRE_UTF8) != Line 424  BOOL utf = (md->poptions & PCRE_UTF8) !=
424  BOOL utf = FALSE;  BOOL utf = FALSE;
425  #endif  #endif
426    
427    BOOL reset_could_continue = FALSE;
428    
429  rlevel++;  rlevel++;
430  offsetcount &= (-2);  offsetcount &= (-2);
431    
# Line 571  for (;;) Line 573  for (;;)
573    int clen, dlen;    int clen, dlen;
574    unsigned int c, d;    unsigned int c, d;
575    int forced_fail = 0;    int forced_fail = 0;
576    BOOL could_continue = FALSE;    BOOL partial_newline = FALSE;
577      BOOL could_continue = reset_could_continue;
578      reset_could_continue = FALSE;
579    
580    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
581    new state list. */    new state list. */
# Line 607  for (;;) Line 611  for (;;)
611    
612    if (ptr < end_subject)    if (ptr < end_subject)
613      {      {
614      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of data items in the character */
615  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
616      if (utf) { GETCHARLEN(c, ptr, clen); } else      if (utf) { GETCHARLEN(c, ptr, clen); } else
617  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
# Line 641  for (;;) Line 645  for (;;)
645    
646      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
647      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
648      been skipped". */      been skipped". If the could_continue flag was passed over from a previous
649        state, arrange for it to passed on. */
650    
651      if (state_offset < 0)      if (state_offset < 0)
652        {        {
# Line 650  for (;;) Line 655  for (;;)
655          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
656          ADD_NEW_DATA(state_offset, current_state->count,          ADD_NEW_DATA(state_offset, current_state->count,
657            current_state->data - 1);            current_state->data - 1);
658            if (could_continue) reset_could_continue = TRUE;
659          continue;          continue;
660          }          }
661        else        else
# Line 689  for (;;) Line 695  for (;;)
695      permitted.      permitted.
696    
697      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
698      argument that is not a data character - but is always one byte long. We      argument that is not a data character - but is always one byte long because
699      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in      the values are small. We have to take special action to deal with  \P, \p,
700      this case. To keep the other cases fast, convert these ones to new opcodes.      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
701      */      these ones to new opcodes. */
702    
703      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
704        {        {
# Line 783  for (;;) Line 789  for (;;)
789              offsets[0] = (int)(current_subject - start_subject);              offsets[0] = (int)(current_subject - start_subject);
790              offsets[1] = (int)(ptr - start_subject);              offsets[1] = (int)(ptr - start_subject);
791              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
792                offsets[1] - offsets[0], current_subject));                offsets[1] - offsets[0], (char *)current_subject));
793              }              }
794            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
795              {              {
# Line 888  for (;;) Line 894  for (;;)
894        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
895        case OP_ANY:        case OP_ANY:
896        if (clen > 0 && !IS_NEWLINE(ptr))        if (clen > 0 && !IS_NEWLINE(ptr))
897          { ADD_NEW(state_offset + 1, 0); }          {
898            if (ptr + 1 >= md->end_subject &&
899                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
900                NLBLOCK->nltype == NLTYPE_FIXED &&
901                NLBLOCK->nllen == 2 &&
902                c == NLBLOCK->nl[0])
903              {
904              could_continue = partial_newline = TRUE;
905              }
906            else
907              {
908              ADD_NEW(state_offset + 1, 0);
909              }
910            }
911        break;        break;
912    
913        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 916  for (;;) Line 935  for (;;)
935                 (ptr == end_subject - md->nllen)                 (ptr == end_subject - md->nllen)
936              ))              ))
937            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
938            else if (ptr + 1 >= md->end_subject &&
939                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
940                     NLBLOCK->nltype == NLTYPE_FIXED &&
941                     NLBLOCK->nllen == 2 &&
942                     c == NLBLOCK->nl[0])
943              {
944              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
945                {
946                reset_could_continue = TRUE;
947                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
948                }
949              else could_continue = partial_newline = TRUE;
950              }
951          }          }
952        break;        break;
953    
# Line 928  for (;;) Line 960  for (;;)
960          else if (clen == 0 ||          else if (clen == 0 ||
961              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
962            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
963            else if (ptr + 1 >= md->end_subject &&
964                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
965                     NLBLOCK->nltype == NLTYPE_FIXED &&
966                     NLBLOCK->nllen == 2 &&
967                     c == NLBLOCK->nl[0])
968              {
969              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
970                {
971                reset_could_continue = TRUE;
972                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
973                }
974              else could_continue = partial_newline = TRUE;
975              }
976          }          }
977        else if (IS_NEWLINE(ptr))        else if (IS_NEWLINE(ptr))
978          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
# Line 1090  for (;;) Line 1135  for (;;)
1135        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1136        if (clen > 0)        if (clen > 0)
1137          {          {
1138          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1139                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1140                NLBLOCK->nltype == NLTYPE_FIXED &&
1141                NLBLOCK->nllen == 2 &&
1142                c == NLBLOCK->nl[0])
1143              {
1144              could_continue = partial_newline = TRUE;
1145              }
1146            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1147              (c < 256 &&              (c < 256 &&
1148                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1149                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1113  for (;;) Line 1166  for (;;)
1166        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1167        if (clen > 0)        if (clen > 0)
1168          {          {
1169          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1170                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1171                NLBLOCK->nltype == NLTYPE_FIXED &&
1172                NLBLOCK->nllen == 2 &&
1173                c == NLBLOCK->nl[0])
1174              {
1175              could_continue = partial_newline = TRUE;
1176              }
1177            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1178              (c < 256 &&              (c < 256 &&
1179                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1180                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1135  for (;;) Line 1196  for (;;)
1196        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1197        if (clen > 0)        if (clen > 0)
1198          {          {
1199          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1200                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1201                NLBLOCK->nltype == NLTYPE_FIXED &&
1202                NLBLOCK->nllen == 2 &&
1203                c == NLBLOCK->nl[0])
1204              {
1205              could_continue = partial_newline = TRUE;
1206              }
1207            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1208              (c < 256 &&              (c < 256 &&
1209                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1210                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1155  for (;;) Line 1224  for (;;)
1224        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1225        if (clen > 0)        if (clen > 0)
1226          {          {
1227          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1228                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1229                NLBLOCK->nltype == NLTYPE_FIXED &&
1230                NLBLOCK->nllen == 2 &&
1231                c == NLBLOCK->nl[0])
1232              {
1233              could_continue = partial_newline = TRUE;
1234              }
1235            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1236              (c < 256 &&              (c < 256 &&
1237                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1238                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1176  for (;;) Line 1253  for (;;)
1253        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1254        if (clen > 0)        if (clen > 0)
1255          {          {
1256          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1257                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1258                NLBLOCK->nltype == NLTYPE_FIXED &&
1259                NLBLOCK->nllen == 2 &&
1260                c == NLBLOCK->nl[0])
1261              {
1262              could_continue = partial_newline = TRUE;
1263              }
1264            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1265              (c < 256 &&              (c < 256 &&
1266                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1267                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1283  for (;;) Line 1368  for (;;)
1368        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1369        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1370        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1371        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1372          {          {
1373            int lgb, rgb;
1374          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1375          int ncount = 0;          int ncount = 0;
1376          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
# Line 1292  for (;;) Line 1378  for (;;)
1378            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1379            next_active_state--;            next_active_state--;
1380            }            }
1381            lgb = UCD_GRAPHBREAK(c);
1382          while (nptr < end_subject)          while (nptr < end_subject)
1383            {            {
1384            int nd;            dlen = 1;
1385            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1386            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1387            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1388            ncount++;            ncount++;
1389            nptr += ndlen;            lgb = rgb;
1390              nptr += dlen;
1391            }            }
1392          count++;          count++;
1393          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
# Line 1318  for (;;) Line 1406  for (;;)
1406          int ncount = 0;          int ncount = 0;
1407          switch (c)          switch (c)
1408            {            {
1409            case 0x000b:            case CHAR_VT:
1410            case 0x000c:            case CHAR_FF:
1411            case 0x0085:            case CHAR_NEL:
1412    #ifndef EBCDIC
1413            case 0x2028:            case 0x2028:
1414            case 0x2029:            case 0x2029:
1415    #endif  /* Not EBCDIC */
1416            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1417            goto ANYNL01;            goto ANYNL01;
1418    
1419            case 0x000d:            case CHAR_CR:
1420            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
1421            /* Fall through */            /* Fall through */
1422    
1423            ANYNL01:            ANYNL01:
1424            case 0x000a:            case CHAR_LF:
1425            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1426              {              {
1427              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1358  for (;;) Line 1448  for (;;)
1448          BOOL OK;          BOOL OK;
1449          switch (c)          switch (c)
1450            {            {
1451            case 0x000a:            case CHAR_LF:
1452            case 0x000b:            case CHAR_VT:
1453            case 0x000c:            case CHAR_FF:
1454            case 0x000d:            case CHAR_CR:
1455            case 0x0085:            case CHAR_NEL:
1456    #ifndef EBCDIC
1457            case 0x2028:            case 0x2028:
1458            case 0x2029:            case 0x2029:
1459    #endif  /* Not EBCDIC */
1460            OK = TRUE;            OK = TRUE;
1461            break;            break;
1462    
# Line 1397  for (;;) Line 1489  for (;;)
1489          BOOL OK;          BOOL OK;
1490          switch (c)          switch (c)
1491            {            {
1492            case 0x09:      /* HT */            case CHAR_HT:
1493            case 0x20:      /* SPACE */            case CHAR_SPACE:
1494    #ifndef EBCDIC
1495            case 0xa0:      /* NBSP */            case 0xa0:      /* NBSP */
1496            case 0x1680:    /* OGHAM SPACE MARK */            case 0x1680:    /* OGHAM SPACE MARK */
1497            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
# Line 1416  for (;;) Line 1509  for (;;)
1509            case 0x202f:    /* NARROW NO-BREAK SPACE */            case 0x202f:    /* NARROW NO-BREAK SPACE */
1510            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1511            case 0x3000:    /* IDEOGRAPHIC SPACE */            case 0x3000:    /* IDEOGRAPHIC SPACE */
1512    #endif  /* Not EBCDIC */
1513            OK = TRUE;            OK = TRUE;
1514            break;            break;
1515    
# Line 1539  for (;;) Line 1633  for (;;)
1633        QS2:        QS2:
1634    
1635        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1636        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1637          {          {
1638            int lgb, rgb;
1639          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1640          int ncount = 0;          int ncount = 0;
1641          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
# Line 1549  for (;;) Line 1644  for (;;)
1644            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1645            next_active_state--;            next_active_state--;
1646            }            }
1647            lgb = UCD_GRAPHBREAK(c);
1648          while (nptr < end_subject)          while (nptr < end_subject)
1649            {            {
1650            int nd;            dlen = 1;
1651            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1652            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1653            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1654            ncount++;            ncount++;
1655            nptr += ndlen;            lgb = rgb;
1656              nptr += dlen;
1657            }            }
1658          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1659          }          }
# Line 1582  for (;;) Line 1679  for (;;)
1679          int ncount = 0;          int ncount = 0;
1680          switch (c)          switch (c)
1681            {            {
1682            case 0x000b:            case CHAR_VT:
1683            case 0x000c:            case CHAR_FF:
1684            case 0x0085:            case CHAR_NEL:
1685    #ifndef EBCDIC
1686            case 0x2028:            case 0x2028:
1687            case 0x2029:            case 0x2029:
1688    #endif  /* Not EBCDIC */
1689            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1690            goto ANYNL02;            goto ANYNL02;
1691    
1692            case 0x000d:            case CHAR_CR:
1693            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
1694            /* Fall through */            /* Fall through */
1695    
1696            ANYNL02:            ANYNL02:
1697            case 0x000a:            case CHAR_LF:
1698            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1699                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1700              {              {
# Line 1630  for (;;) Line 1729  for (;;)
1729          BOOL OK;          BOOL OK;
1730          switch (c)          switch (c)
1731            {            {
1732            case 0x000a:            case CHAR_LF:
1733            case 0x000b:            case CHAR_VT:
1734            case 0x000c:            case CHAR_FF:
1735            case 0x000d:            case CHAR_CR:
1736            case 0x0085:            case CHAR_NEL:
1737    #ifndef EBCDIC
1738            case 0x2028:            case 0x2028:
1739            case 0x2029:            case 0x2029:
1740    #endif  /* Not EBCDIC */
1741            OK = TRUE;            OK = TRUE;
1742            break;            break;
1743    
# Line 1676  for (;;) Line 1777  for (;;)
1777          BOOL OK;          BOOL OK;
1778          switch (c)          switch (c)
1779            {            {
1780            case 0x09:      /* HT */            case CHAR_HT:
1781            case 0x20:      /* SPACE */            case CHAR_SPACE:
1782    #ifndef EBCDIC
1783            case 0xa0:      /* NBSP */            case 0xa0:      /* NBSP */
1784            case 0x1680:    /* OGHAM SPACE MARK */            case 0x1680:    /* OGHAM SPACE MARK */
1785            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
# Line 1695  for (;;) Line 1797  for (;;)
1797            case 0x202f:    /* NARROW NO-BREAK SPACE */            case 0x202f:    /* NARROW NO-BREAK SPACE */
1798            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1799            case 0x3000:    /* IDEOGRAPHIC SPACE */            case 0x3000:    /* IDEOGRAPHIC SPACE */
1800    #endif  /* Not EBCDIC */
1801            OK = TRUE;            OK = TRUE;
1802            break;            break;
1803    
# Line 1806  for (;;) Line 1909  for (;;)
1909        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1910          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1911        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1912        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1913          {          {
1914            int lgb, rgb;
1915          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1916          int ncount = 0;          int ncount = 0;
1917          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
# Line 1815  for (;;) Line 1919  for (;;)
1919            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1920            next_active_state--;            next_active_state--;
1921            }            }
1922            lgb = UCD_GRAPHBREAK(c);
1923          while (nptr < end_subject)          while (nptr < end_subject)
1924            {            {
1925            int nd;            dlen = 1;
1926            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1927            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1928            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1929            ncount++;            ncount++;
1930            nptr += ndlen;            lgb = rgb;
1931              nptr += dlen;
1932            }            }
1933            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1934                reset_could_continue = TRUE;
1935          if (++count >= GET2(code, 1))          if (++count >= GET2(code, 1))
1936            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1937          else          else
# Line 1845  for (;;) Line 1953  for (;;)
1953          int ncount = 0;          int ncount = 0;
1954          switch (c)          switch (c)
1955            {            {
1956            case 0x000b:            case CHAR_VT:
1957            case 0x000c:            case CHAR_FF:
1958            case 0x0085:            case CHAR_NEL:
1959    #ifndef EBCDIC
1960            case 0x2028:            case 0x2028:
1961            case 0x2029:            case 0x2029:
1962    #endif  /* Not EBCDIC */
1963            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1964            goto ANYNL03;            goto ANYNL03;
1965    
1966            case 0x000d:            case CHAR_CR:
1967            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
1968            /* Fall through */            /* Fall through */
1969    
1970            ANYNL03:            ANYNL03:
1971            case 0x000a:            case CHAR_LF:
1972            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1973              {              {
1974              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1889  for (;;) Line 1999  for (;;)
1999          BOOL OK;          BOOL OK;
2000          switch (c)          switch (c)
2001            {            {
2002            case 0x000a:            case CHAR_LF:
2003            case 0x000b:            case CHAR_VT:
2004            case 0x000c:            case CHAR_FF:
2005            case 0x000d:            case CHAR_CR:
2006            case 0x0085:            case CHAR_NEL:
2007    #ifndef EBCDIC
2008            case 0x2028:            case 0x2028:
2009            case 0x2029:            case 0x2029:
2010    #endif  /* Not EBCDIC */
2011            OK = TRUE;            OK = TRUE;
2012            break;            break;
2013    
# Line 1931  for (;;) Line 2043  for (;;)
2043          BOOL OK;          BOOL OK;
2044          switch (c)          switch (c)
2045            {            {
2046            case 0x09:      /* HT */            case CHAR_HT:
2047            case 0x20:      /* SPACE */            case CHAR_SPACE:
2048    #ifndef EBCDIC
2049            case 0xa0:      /* NBSP */            case 0xa0:      /* NBSP */
2050            case 0x1680:    /* OGHAM SPACE MARK */            case 0x1680:    /* OGHAM SPACE MARK */
2051            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
# Line 1950  for (;;) Line 2063  for (;;)
2063            case 0x202f:    /* NARROW NO-BREAK SPACE */            case 0x202f:    /* NARROW NO-BREAK SPACE */
2064            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2065            case 0x3000:    /* IDEOGRAPHIC SPACE */            case 0x3000:    /* IDEOGRAPHIC SPACE */
2066    #endif  /* Not EBCDIC */
2067            OK = TRUE;            OK = TRUE;
2068            break;            break;
2069    
# Line 2025  for (;;) Line 2139  for (;;)
2139        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
2140    
2141        case OP_EXTUNI:        case OP_EXTUNI:
2142        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
2143          {          {
2144            int lgb, rgb;
2145          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
2146          int ncount = 0;          int ncount = 0;
2147            lgb = UCD_GRAPHBREAK(c);
2148          while (nptr < end_subject)          while (nptr < end_subject)
2149            {            {
2150            int nclen = 1;            dlen = 1;
2151            GETCHARLEN(c, nptr, nclen);            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2152            if (UCD_CATEGORY(c) != ucp_M) break;            rgb = UCD_GRAPHBREAK(d);
2153              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2154            ncount++;            ncount++;
2155            nptr += nclen;            lgb = rgb;
2156              nptr += dlen;
2157            }            }
2158            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2159                reset_could_continue = TRUE;
2160          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2161          }          }
2162        break;        break;
# Line 2050  for (;;) Line 2170  for (;;)
2170        case OP_ANYNL:        case OP_ANYNL:
2171        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2172          {          {
2173          case 0x000b:          case CHAR_VT:
2174          case 0x000c:          case CHAR_FF:
2175          case 0x0085:          case CHAR_NEL:
2176    #ifndef EBCDIC
2177          case 0x2028:          case 0x2028:
2178          case 0x2029:          case 0x2029:
2179    #endif  /* Not EBCDIC */
2180          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2181    
2182          case 0x000a:          case CHAR_LF:
2183          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2184          break;          break;
2185    
2186          case 0x000d:          case CHAR_CR:
2187          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 >= end_subject)
2188              {
2189              ADD_NEW(state_offset + 1, 0);
2190              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2191                reset_could_continue = TRUE;
2192              }
2193            else if (ptr[1] == CHAR_LF)
2194            {            {
2195            ADD_NEW_DATA(-(state_offset + 1), 0, 1);            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2196            }            }
# Line 2078  for (;;) Line 2206  for (;;)
2206        case OP_NOT_VSPACE:        case OP_NOT_VSPACE:
2207        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2208          {          {
2209          case 0x000a:          case CHAR_LF:
2210          case 0x000b:          case CHAR_VT:
2211          case 0x000c:          case CHAR_FF:
2212          case 0x000d:          case CHAR_CR:
2213          case 0x0085:          case CHAR_NEL:
2214    #ifndef EBCDIC
2215          case 0x2028:          case 0x2028:
2216          case 0x2029:          case 0x2029:
2217    #endif  /* Not EBCDIC */
2218          break;          break;
2219    
2220          default:          default:
# Line 2097  for (;;) Line 2227  for (;;)
2227        case OP_VSPACE:        case OP_VSPACE:
2228        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2229          {          {
2230          case 0x000a:          case CHAR_LF:
2231          case 0x000b:          case CHAR_VT:
2232          case 0x000c:          case CHAR_FF:
2233          case 0x000d:          case CHAR_CR:
2234          case 0x0085:          case CHAR_NEL:
2235    #ifndef EBCDIC
2236          case 0x2028:          case 0x2028:
2237          case 0x2029:          case 0x2029:
2238    #endif  /* Not EBCDIC */
2239          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2240          break;          break;
2241    
# Line 2115  for (;;) Line 2247  for (;;)
2247        case OP_NOT_HSPACE:        case OP_NOT_HSPACE:
2248        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2249          {          {
2250          case 0x09:      /* HT */          case CHAR_HT:
2251          case 0x20:      /* SPACE */          case CHAR_SPACE:
2252    #ifndef EBCDIC
2253          case 0xa0:      /* NBSP */          case 0xa0:      /* NBSP */
2254          case 0x1680:    /* OGHAM SPACE MARK */          case 0x1680:    /* OGHAM SPACE MARK */
2255          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
# Line 2134  for (;;) Line 2267  for (;;)
2267          case 0x202f:    /* NARROW NO-BREAK SPACE */          case 0x202f:    /* NARROW NO-BREAK SPACE */
2268          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2269          case 0x3000:    /* IDEOGRAPHIC SPACE */          case 0x3000:    /* IDEOGRAPHIC SPACE */
2270    #endif  /* Not EBCDIC */
2271          break;          break;
2272    
2273          default:          default:
# Line 2146  for (;;) Line 2280  for (;;)
2280        case OP_HSPACE:        case OP_HSPACE:
2281        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2282          {          {
2283          case 0x09:      /* HT */          case CHAR_HT:
2284          case 0x20:      /* SPACE */          case CHAR_SPACE:
2285    #ifndef EBCDIC
2286          case 0xa0:      /* NBSP */          case 0xa0:      /* NBSP */
2287          case 0x1680:    /* OGHAM SPACE MARK */          case 0x1680:    /* OGHAM SPACE MARK */
2288          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
# Line 2165  for (;;) Line 2300  for (;;)
2300          case 0x202f:    /* NARROW NO-BREAK SPACE */          case 0x202f:    /* NARROW NO-BREAK SPACE */
2301          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2302          case 0x3000:    /* IDEOGRAPHIC SPACE */          case 0x3000:    /* IDEOGRAPHIC SPACE */
2303    #endif  /* Not EBCDIC */
2304          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2305          break;          break;
2306          }          }
2307        break;        break;
2308    
2309        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2310        /* Match a negated single character casefully. This is only used for        /* Match a negated single character casefully. */
       one-byte characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2311    
2312        case OP_NOT:        case OP_NOT:
2313        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2314        break;        break;
2315    
2316        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2317        /* Match a negated single character caselessly. This is only used for        /* Match a negated single character caselessly. */
       one-byte characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2318    
2319        case OP_NOTI:        case OP_NOTI:
2320        if (clen > 0 && c != d && c != fcc[d])        if (clen > 0)
2321          { ADD_NEW(state_offset + dlen + 1, 0); }          {
2322            unsigned int otherd;
2323    #ifdef SUPPORT_UTF
2324            if (utf && d >= 128)
2325              {
2326    #ifdef SUPPORT_UCP
2327              otherd = UCD_OTHERCASE(d);
2328    #endif  /* SUPPORT_UCP */
2329              }
2330            else
2331    #endif  /* SUPPORT_UTF */
2332            otherd = TABLE_GET(d, fcc, d);
2333            if (c != d && c != otherd)
2334              { ADD_NEW(state_offset + dlen + 1, 0); }
2335            }
2336        break;        break;
2337    
2338        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 2557  for (;;) Line 2703  for (;;)
2703              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2704              cb.callout_number   = code[LINK_SIZE+2];              cb.callout_number   = code[LINK_SIZE+2];
2705              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
2706    #ifdef COMPILE_PCRE8
2707              cb.subject          = (PCRE_SPTR)start_subject;              cb.subject          = (PCRE_SPTR)start_subject;
2708    #else
2709                cb.subject          = (PCRE_SPTR16)start_subject;
2710    #endif
2711              cb.subject_length   = (int)(end_subject - start_subject);              cb.subject_length   = (int)(end_subject - start_subject);
2712              cb.start_match      = (int)(current_subject - start_subject);              cb.start_match      = (int)(current_subject - start_subject);
2713              cb.current_position = (int)(ptr - start_subject);              cb.current_position = (int)(ptr - start_subject);
# Line 2686  for (;;) Line 2836  for (;;)
2836            {            {
2837            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2838              {              {
             const pcre_uchar *p = start_subject + local_offsets[rc];  
             const pcre_uchar *pp = start_subject + local_offsets[rc+1];  
2839              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2840  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
2841              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;              if (utf)
2842                  {
2843                  const pcre_uchar *p = start_subject + local_offsets[rc];
2844                  const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2845                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2846                  }
2847  #endif  #endif
2848              if (charcount > 0)              if (charcount > 0)
2849                {                {
# Line 2789  for (;;) Line 2942  for (;;)
2942              const pcre_uchar *pp = local_ptr;              const pcre_uchar *pp = local_ptr;
2943              charcount = (int)(pp - p);              charcount = (int)(pp - p);
2944  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
2945              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;              if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2946  #endif  #endif
2947              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2948              }              }
# Line 2871  for (;;) Line 3024  for (;;)
3024            else            else
3025              {              {
3026  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3027              const pcre_uchar *p = start_subject + local_offsets[0];              if (utf)
3028              const pcre_uchar *pp = start_subject + local_offsets[1];                {
3029              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;                const pcre_uchar *p = start_subject + local_offsets[0];
3030                  const pcre_uchar *pp = start_subject + local_offsets[1];
3031                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
3032                  }
3033  #endif  #endif
3034              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
3035              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
# Line 2896  for (;;) Line 3052  for (;;)
3052          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
3053          cb.callout_number   = code[1];          cb.callout_number   = code[1];
3054          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
3055    #ifdef COMPILE_PCRE8
3056          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
3057    #else
3058            cb.subject          = (PCRE_SPTR16)start_subject;
3059    #endif
3060          cb.subject_length   = (int)(end_subject - start_subject);          cb.subject_length   = (int)(end_subject - start_subject);
3061          cb.start_match      = (int)(current_subject - start_subject);          cb.start_match      = (int)(current_subject - start_subject);
3062          cb.current_position = (int)(ptr - start_subject);          cb.current_position = (int)(ptr - start_subject);
# Line 2938  for (;;) Line 3098  for (;;)
3098    if (new_count <= 0)    if (new_count <= 0)
3099      {      {
3100      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
3101          could_continue &&                            /* Some could go on */          could_continue &&                            /* Some could go on, and */
3102          forced_fail != workspace[1] &&               /* Not all forced fail & */          forced_fail != workspace[1] &&               /* Not all forced fail & */
3103          (                                            /* either... */          (                                            /* either... */
3104          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
# Line 2946  for (;;) Line 3106  for (;;)
3106          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
3107           match_count < 0)                            /* no matches */           match_count < 0)                            /* no matches */
3108          ) &&                                         /* And... */          ) &&                                         /* And... */
3109          ptr >= end_subject &&                  /* Reached end of subject */          (
3110          ptr > md->start_used_ptr)              /* Inspected non-empty string */          partial_newline ||                           /* Either partial NL */
3111              (                                          /* or ... */
3112              ptr >= end_subject &&                /* End of subject and */
3113              ptr > md->start_used_ptr)            /* Inspected non-empty string */
3114              )
3115            )
3116        {        {
3117        if (offsetcount >= 2)        if (offsetcount >= 2)
3118          {          {
# Line 3013  pcre_dfa_exec(const pcre *argument_re, c Line 3178  pcre_dfa_exec(const pcre *argument_re, c
3178    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3179  #else  #else
3180  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3181  pcre16_dfa_exec(const pcre *argument_re, const pcre16_extra *extra_data,  pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3182    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3183    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3184  #endif  #endif
3185  {  {
3186  real_pcre *re = (real_pcre *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
3187  dfa_match_data match_block;  dfa_match_data match_block;
3188  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3189  BOOL utf, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3190  const pcre_uchar *current_subject, *end_subject;  const pcre_uchar *current_subject, *end_subject;
 const pcre_uint8 *lcc;  
   
3191  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
3192    
3193  const pcre_uchar *req_char_ptr;  const pcre_uchar *req_char_ptr;
# Line 3046  if (offsetcount < 0) return PCRE_ERROR_B Line 3209  if (offsetcount < 0) return PCRE_ERROR_B
3209  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3210  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3211    
3212  /* We need to find the pointer to any study data before we test for byte  /* Check that the first field in the block is the magic number. If it is not,
3213  flipping, so we scan the extra_data block first. This may set two fields in the  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3214  match block, so we must initialize them beforehand. However, the other fields  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3215  in the match block must not be set until after the byte flipping. */  means that the pattern is likely compiled with different endianness. */
3216    
3217    if (re->magic_number != MAGIC_NUMBER)
3218      return re->magic_number == REVERSED_MAGIC_NUMBER?
3219        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3220    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3221    
3222    /* If restarting after a partial match, do some sanity checks on the contents
3223    of the workspace. */
3224    
3225    if ((options & PCRE_DFA_RESTART) != 0)
3226      {
3227      if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3228        workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3229          return PCRE_ERROR_DFA_BADRESTART;
3230      }
3231    
3232    /* Set up study, callout, and table data */
3233    
3234  md->tables = re->tables;  md->tables = re->tables;
3235  md->callout_data = NULL;  md->callout_data = NULL;
# Line 3068  if (extra_data != NULL) Line 3248  if (extra_data != NULL)
3248      md->tables = extra_data->tables;      md->tables = extra_data->tables;
3249    }    }
3250    
 /* Check that the first field in the block is the magic number. If it is not,  
 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to  
 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which  
 means that the pattern is likely compiled with different endianness. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   return re->magic_number == REVERSED_MAGIC_NUMBER?  
     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;  
 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;  
   
3251  /* Set some local values */  /* Set some local values */
3252    
3253  current_subject = (const pcre_uchar *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
# Line 3186  in other programs later. */ Line 3356  in other programs later. */
3356    
3357  if (md->tables == NULL) md->tables = PRIV(default_tables);  if (md->tables == NULL) md->tables = PRIV(default_tables);
3358    
3359  /* The lower casing table and the "must be at the start of a line" flag are  /* The "must be at the start of a line" flags are used in a loop when finding
3360  used in a loop when finding where to start. */  where to start. */
3361    
 lcc = md->tables + lcc_offset;  
3362  startline = (re->flags & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
3363  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
3364    
# Line 3204  if (!anchored) Line 3373  if (!anchored)
3373    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3374      {      {
3375      has_first_char = TRUE;      has_first_char = TRUE;
3376      first_char = first_char2 = re->first_char;      first_char = first_char2 = (pcre_uchar)(re->first_char);
3377      if ((re->flags & PCRE_FCH_CASELESS) != 0)      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3378        {        {
3379        first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);        first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
# Line 3228  character" set. */ Line 3397  character" set. */
3397  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3398    {    {
3399    has_req_char = TRUE;    has_req_char = TRUE;
3400    req_char = req_char2 = re->req_char;    req_char = req_char2 = (pcre_uchar)(re->req_char);
3401    if ((re->flags & PCRE_RCH_CASELESS) != 0)    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3402      {      {
3403      req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);      req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);

Legend:
Removed from v.850  
changed lines
  Added in v.1033

  ViewVC Help
Powered by ViewVC 1.1.5