/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 850 by zherczeg, Wed Jan 4 17:29:11 2012 UTC revision 1334 by ph10, Wed May 15 16:53:18 2013 UTC
# Line 7  and semantics are as close as possible t Line 7  and semantics are as close as possible t
7  below for why this module is different).  below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
39  */  */
40    
   
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a sort of DFA algorithm (not a true  alternative matching function that uses a sort of DFA algorithm (not a true
43  FSM). This is NOT Perl- compatible, but it has advantages in certain  FSM). This is NOT Perl-compatible, but it has advantages in certain
44  applications. */  applications. */
45    
46    
# Line 282  typedef struct stateblock { Line 281  typedef struct stateblock {
281    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
282  } stateblock;  } stateblock;
283    
284  #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))  #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
285    
286    
287  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 303  Returns:       nothing Line 302  Returns:       nothing
302  static void  static void
303  pchars(const pcre_uchar *p, int length, FILE *f)  pchars(const pcre_uchar *p, int length, FILE *f)
304  {  {
305  int c;  pcre_uint32 c;
306  while (length-- > 0)  while (length-- > 0)
307    {    {
308    if (isprint(c = *(p++)))    if (isprint(c = *(p++)))
309      fprintf(f, "%c", c);      fprintf(f, "%c", c);
310    else    else
311      fprintf(f, "\\x%02x", c);      fprintf(f, "\\x{%02x}", c);
312    }    }
313  }  }
314  #endif  #endif
# Line 382  for the current character, one for the f Line 381  for the current character, one for the f
381      next_new_state->count  = (y); \      next_new_state->count  = (y); \
382      next_new_state->data   = (z); \      next_new_state->data   = (z); \
383      next_new_state++; \      next_new_state++; \
384      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
385          (x), (y), (z), __LINE__)); \
386      } \      } \
387    else return PCRE_ERROR_DFA_WSSIZE    else return PCRE_ERROR_DFA_WSSIZE
388    
# Line 424  BOOL utf = (md->poptions & PCRE_UTF8) != Line 424  BOOL utf = (md->poptions & PCRE_UTF8) !=
424  BOOL utf = FALSE;  BOOL utf = FALSE;
425  #endif  #endif
426    
427    BOOL reset_could_continue = FALSE;
428    
429  rlevel++;  rlevel++;
430  offsetcount &= (-2);  offsetcount &= (-2);
431    
# Line 569  for (;;) Line 571  for (;;)
571    {    {
572    int i, j;    int i, j;
573    int clen, dlen;    int clen, dlen;
574    unsigned int c, d;    pcre_uint32 c, d;
575    int forced_fail = 0;    int forced_fail = 0;
576    BOOL could_continue = FALSE;    BOOL partial_newline = FALSE;
577      BOOL could_continue = reset_could_continue;
578      reset_could_continue = FALSE;
579    
580    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
581    new state list. */    new state list. */
# Line 607  for (;;) Line 611  for (;;)
611    
612    if (ptr < end_subject)    if (ptr < end_subject)
613      {      {
614      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of data items in the character */
615  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
616      if (utf) { GETCHARLEN(c, ptr, clen); } else      GETCHARLENTEST(c, ptr, clen);
617  #endif  /* SUPPORT_UTF */  #else
618      c = *ptr;      c = *ptr;
619    #endif  /* SUPPORT_UTF */
620      }      }
621    else    else
622      {      {
# Line 630  for (;;) Line 635  for (;;)
635      BOOL caseless = FALSE;      BOOL caseless = FALSE;
636      const pcre_uchar *code;      const pcre_uchar *code;
637      int state_offset = current_state->offset;      int state_offset = current_state->offset;
638      int count, codevalue, rrc;      int codevalue, rrc;
639        int count;
640    
641  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
642      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 641  for (;;) Line 647  for (;;)
647    
648      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
649      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
650      been skipped". */      been skipped". If the could_continue flag was passed over from a previous
651        state, arrange for it to passed on. */
652    
653      if (state_offset < 0)      if (state_offset < 0)
654        {        {
# Line 650  for (;;) Line 657  for (;;)
657          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
658          ADD_NEW_DATA(state_offset, current_state->count,          ADD_NEW_DATA(state_offset, current_state->count,
659            current_state->data - 1);            current_state->data - 1);
660            if (could_continue) reset_could_continue = TRUE;
661          continue;          continue;
662          }          }
663        else        else
# Line 689  for (;;) Line 697  for (;;)
697      permitted.      permitted.
698    
699      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
700      argument that is not a data character - but is always one byte long. We      argument that is not a data character - but is always one byte long because
701      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in      the values are small. We have to take special action to deal with  \P, \p,
702      this case. To keep the other cases fast, convert these ones to new opcodes.      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
703      */      these ones to new opcodes. */
704    
705      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
706        {        {
# Line 783  for (;;) Line 791  for (;;)
791              offsets[0] = (int)(current_subject - start_subject);              offsets[0] = (int)(current_subject - start_subject);
792              offsets[1] = (int)(ptr - start_subject);              offsets[1] = (int)(ptr - start_subject);
793              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
794                offsets[1] - offsets[0], current_subject));                offsets[1] - offsets[0], (char *)current_subject));
795              }              }
796            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
797              {              {
# Line 888  for (;;) Line 896  for (;;)
896        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
897        case OP_ANY:        case OP_ANY:
898        if (clen > 0 && !IS_NEWLINE(ptr))        if (clen > 0 && !IS_NEWLINE(ptr))
899          { ADD_NEW(state_offset + 1, 0); }          {
900            if (ptr + 1 >= md->end_subject &&
901                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
902                NLBLOCK->nltype == NLTYPE_FIXED &&
903                NLBLOCK->nllen == 2 &&
904                c == NLBLOCK->nl[0])
905              {
906              could_continue = partial_newline = TRUE;
907              }
908            else
909              {
910              ADD_NEW(state_offset + 1, 0);
911              }
912            }
913        break;        break;
914    
915        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 916  for (;;) Line 937  for (;;)
937                 (ptr == end_subject - md->nllen)                 (ptr == end_subject - md->nllen)
938              ))              ))
939            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
940            else if (ptr + 1 >= md->end_subject &&
941                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
942                     NLBLOCK->nltype == NLTYPE_FIXED &&
943                     NLBLOCK->nllen == 2 &&
944                     c == NLBLOCK->nl[0])
945              {
946              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
947                {
948                reset_could_continue = TRUE;
949                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
950                }
951              else could_continue = partial_newline = TRUE;
952              }
953          }          }
954        break;        break;
955    
# Line 928  for (;;) Line 962  for (;;)
962          else if (clen == 0 ||          else if (clen == 0 ||
963              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
964            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
965            else if (ptr + 1 >= md->end_subject &&
966                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
967                     NLBLOCK->nltype == NLTYPE_FIXED &&
968                     NLBLOCK->nllen == 2 &&
969                     c == NLBLOCK->nl[0])
970              {
971              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
972                {
973                reset_could_continue = TRUE;
974                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
975                }
976              else could_continue = partial_newline = TRUE;
977              }
978          }          }
979        else if (IS_NEWLINE(ptr))        else if (IS_NEWLINE(ptr))
980          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
# Line 962  for (;;) Line 1009  for (;;)
1009            {            {
1010            const pcre_uchar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
1011            if (temp < md->start_used_ptr) md->start_used_ptr = temp;            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1012  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1013            if (utf) { BACKCHAR(temp); }            if (utf) { BACKCHAR(temp); }
1014  #endif  #endif
1015            GETCHARTEST(d, temp);            GETCHARTEST(d, temp);
# Line 1015  for (;;) Line 1062  for (;;)
1062        if (clen > 0)        if (clen > 0)
1063          {          {
1064          BOOL OK;          BOOL OK;
1065            const pcre_uint32 *cp;
1066          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1067          switch(code[1])          switch(code[1])
1068            {            {
# Line 1063  for (;;) Line 1111  for (;;)
1111                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1112            break;            break;
1113    
1114              case PT_CLIST:
1115              cp = PRIV(ucd_caseless_sets) + code[2];
1116              for (;;)
1117                {
1118                if (c < *cp) { OK = FALSE; break; }
1119                if (c == *cp++) { OK = TRUE; break; }
1120                }
1121              break;
1122    
1123              case PT_UCNC:
1124              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1125                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1126                   c >= 0xe000;
1127              break;
1128    
1129            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1130    
1131            default:            default:
# Line 1090  for (;;) Line 1153  for (;;)
1153        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1154        if (clen > 0)        if (clen > 0)
1155          {          {
1156          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1157                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1158                NLBLOCK->nltype == NLTYPE_FIXED &&
1159                NLBLOCK->nllen == 2 &&
1160                c == NLBLOCK->nl[0])
1161              {
1162              could_continue = partial_newline = TRUE;
1163              }
1164            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1165              (c < 256 &&              (c < 256 &&
1166                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1167                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1113  for (;;) Line 1184  for (;;)
1184        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1185        if (clen > 0)        if (clen > 0)
1186          {          {
1187          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1188                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1189                NLBLOCK->nltype == NLTYPE_FIXED &&
1190                NLBLOCK->nllen == 2 &&
1191                c == NLBLOCK->nl[0])
1192              {
1193              could_continue = partial_newline = TRUE;
1194              }
1195            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1196              (c < 256 &&              (c < 256 &&
1197                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1198                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1135  for (;;) Line 1214  for (;;)
1214        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1215        if (clen > 0)        if (clen > 0)
1216          {          {
1217          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1218                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1219                NLBLOCK->nltype == NLTYPE_FIXED &&
1220                NLBLOCK->nllen == 2 &&
1221                c == NLBLOCK->nl[0])
1222              {
1223              could_continue = partial_newline = TRUE;
1224              }
1225            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1226              (c < 256 &&              (c < 256 &&
1227                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1228                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1155  for (;;) Line 1242  for (;;)
1242        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1243        if (clen > 0)        if (clen > 0)
1244          {          {
1245          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1246                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1247                NLBLOCK->nltype == NLTYPE_FIXED &&
1248                NLBLOCK->nllen == 2 &&
1249                c == NLBLOCK->nl[0])
1250              {
1251              could_continue = partial_newline = TRUE;
1252              }
1253            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1254              (c < 256 &&              (c < 256 &&
1255                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1256                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1257            {            {
1258            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1259              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1260            else            else
1261              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
# Line 1176  for (;;) Line 1271  for (;;)
1271        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1272        if (clen > 0)        if (clen > 0)
1273          {          {
1274          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1275                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1276                NLBLOCK->nltype == NLTYPE_FIXED &&
1277                NLBLOCK->nllen == 2 &&
1278                c == NLBLOCK->nl[0])
1279              {
1280              could_continue = partial_newline = TRUE;
1281              }
1282            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1283              (c < 256 &&              (c < 256 &&
1284                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1285                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1186  for (;;) Line 1289  for (;;)
1289              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1290              next_active_state--;              next_active_state--;
1291              }              }
1292            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1293              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1294            else            else
1295              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
# Line 1209  for (;;) Line 1312  for (;;)
1312        if (clen > 0)        if (clen > 0)
1313          {          {
1314          BOOL OK;          BOOL OK;
1315            const pcre_uint32 *cp;
1316          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1317          switch(code[2])          switch(code[2])
1318            {            {
# Line 1257  for (;;) Line 1361  for (;;)
1361                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1362            break;            break;
1363    
1364              case PT_CLIST:
1365              cp = PRIV(ucd_caseless_sets) + code[3];
1366              for (;;)
1367                {
1368                if (c < *cp) { OK = FALSE; break; }
1369                if (c == *cp++) { OK = TRUE; break; }
1370                }
1371              break;
1372    
1373              case PT_UCNC:
1374              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1375                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1376                   c >= 0xe000;
1377              break;
1378    
1379            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1380    
1381            default:            default:
# Line 1283  for (;;) Line 1402  for (;;)
1402        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1403        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1404        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1405        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1406          {          {
1407            int lgb, rgb;
1408          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1409          int ncount = 0;          int ncount = 0;
1410          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
# Line 1292  for (;;) Line 1412  for (;;)
1412            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1413            next_active_state--;            next_active_state--;
1414            }            }
1415            lgb = UCD_GRAPHBREAK(c);
1416          while (nptr < end_subject)          while (nptr < end_subject)
1417            {            {
1418            int nd;            dlen = 1;
1419            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1420            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1421            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1422            ncount++;            ncount++;
1423            nptr += ndlen;            lgb = rgb;
1424              nptr += dlen;
1425            }            }
1426          count++;          count++;
1427          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
# Line 1318  for (;;) Line 1440  for (;;)
1440          int ncount = 0;          int ncount = 0;
1441          switch (c)          switch (c)
1442            {            {
1443            case 0x000b:            case CHAR_VT:
1444            case 0x000c:            case CHAR_FF:
1445            case 0x0085:            case CHAR_NEL:
1446    #ifndef EBCDIC
1447            case 0x2028:            case 0x2028:
1448            case 0x2029:            case 0x2029:
1449    #endif  /* Not EBCDIC */
1450            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1451            goto ANYNL01;            goto ANYNL01;
1452    
1453            case 0x000d:            case CHAR_CR:
1454            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1455            /* Fall through */            /* Fall through */
1456    
1457            ANYNL01:            ANYNL01:
1458            case 0x000a:            case CHAR_LF:
1459            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1460              {              {
1461              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1358  for (;;) Line 1482  for (;;)
1482          BOOL OK;          BOOL OK;
1483          switch (c)          switch (c)
1484            {            {
1485            case 0x000a:            VSPACE_CASES:
           case 0x000b:  
           case 0x000c:  
           case 0x000d:  
           case 0x0085:  
           case 0x2028:  
           case 0x2029:  
1486            OK = TRUE;            OK = TRUE;
1487            break;            break;
1488    
# Line 1397  for (;;) Line 1515  for (;;)
1515          BOOL OK;          BOOL OK;
1516          switch (c)          switch (c)
1517            {            {
1518            case 0x09:      /* HT */            HSPACE_CASES:
           case 0x20:      /* SPACE */  
           case 0xa0:      /* NBSP */  
           case 0x1680:    /* OGHAM SPACE MARK */  
           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
           case 0x2000:    /* EN QUAD */  
           case 0x2001:    /* EM QUAD */  
           case 0x2002:    /* EN SPACE */  
           case 0x2003:    /* EM SPACE */  
           case 0x2004:    /* THREE-PER-EM SPACE */  
           case 0x2005:    /* FOUR-PER-EM SPACE */  
           case 0x2006:    /* SIX-PER-EM SPACE */  
           case 0x2007:    /* FIGURE SPACE */  
           case 0x2008:    /* PUNCTUATION SPACE */  
           case 0x2009:    /* THIN SPACE */  
           case 0x200A:    /* HAIR SPACE */  
           case 0x202f:    /* NARROW NO-BREAK SPACE */  
           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
           case 0x3000:    /* IDEOGRAPHIC SPACE */  
1519            OK = TRUE;            OK = TRUE;
1520            break;            break;
1521    
# Line 1456  for (;;) Line 1556  for (;;)
1556        if (clen > 0)        if (clen > 0)
1557          {          {
1558          BOOL OK;          BOOL OK;
1559            const pcre_uint32 *cp;
1560          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1561          switch(code[2])          switch(code[2])
1562            {            {
# Line 1504  for (;;) Line 1605  for (;;)
1605                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1606            break;            break;
1607    
1608              case PT_CLIST:
1609              cp = PRIV(ucd_caseless_sets) + code[3];
1610              for (;;)
1611                {
1612                if (c < *cp) { OK = FALSE; break; }
1613                if (c == *cp++) { OK = TRUE; break; }
1614                }
1615              break;
1616    
1617              case PT_UCNC:
1618              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1619                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1620                   c >= 0xe000;
1621              break;
1622    
1623            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1624    
1625            default:            default:
# Line 1539  for (;;) Line 1655  for (;;)
1655        QS2:        QS2:
1656    
1657        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1658        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1659          {          {
1660            int lgb, rgb;
1661          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1662          int ncount = 0;          int ncount = 0;
1663          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
# Line 1549  for (;;) Line 1666  for (;;)
1666            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1667            next_active_state--;            next_active_state--;
1668            }            }
1669            lgb = UCD_GRAPHBREAK(c);
1670          while (nptr < end_subject)          while (nptr < end_subject)
1671            {            {
1672            int nd;            dlen = 1;
1673            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1674            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1675            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1676            ncount++;            ncount++;
1677            nptr += ndlen;            lgb = rgb;
1678              nptr += dlen;
1679            }            }
1680          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1681          }          }
# Line 1582  for (;;) Line 1701  for (;;)
1701          int ncount = 0;          int ncount = 0;
1702          switch (c)          switch (c)
1703            {            {
1704            case 0x000b:            case CHAR_VT:
1705            case 0x000c:            case CHAR_FF:
1706            case 0x0085:            case CHAR_NEL:
1707    #ifndef EBCDIC
1708            case 0x2028:            case 0x2028:
1709            case 0x2029:            case 0x2029:
1710    #endif  /* Not EBCDIC */
1711            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1712            goto ANYNL02;            goto ANYNL02;
1713    
1714            case 0x000d:            case CHAR_CR:
1715            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1716            /* Fall through */            /* Fall through */
1717    
1718            ANYNL02:            ANYNL02:
1719            case 0x000a:            case CHAR_LF:
1720            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1721                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1722              {              {
1723              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1724              next_active_state--;              next_active_state--;
1725              }              }
1726            ADD_NEW_DATA(-(state_offset + count), 0, ncount);            ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
1727            break;            break;
1728    
1729            default:            default:
# Line 1630  for (;;) Line 1751  for (;;)
1751          BOOL OK;          BOOL OK;
1752          switch (c)          switch (c)
1753            {            {
1754            case 0x000a:            VSPACE_CASES:
           case 0x000b:  
           case 0x000c:  
           case 0x000d:  
           case 0x0085:  
           case 0x2028:  
           case 0x2029:  
1755            OK = TRUE;            OK = TRUE;
1756            break;            break;
1757    
# Line 1652  for (;;) Line 1767  for (;;)
1767              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1768              next_active_state--;              next_active_state--;
1769              }              }
1770            ADD_NEW_DATA(-(state_offset + count), 0, 0);            ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1771            }            }
1772          }          }
1773        break;        break;
# Line 1676  for (;;) Line 1791  for (;;)
1791          BOOL OK;          BOOL OK;
1792          switch (c)          switch (c)
1793            {            {
1794            case 0x09:      /* HT */            HSPACE_CASES:
           case 0x20:      /* SPACE */  
           case 0xa0:      /* NBSP */  
           case 0x1680:    /* OGHAM SPACE MARK */  
           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
           case 0x2000:    /* EN QUAD */  
           case 0x2001:    /* EM QUAD */  
           case 0x2002:    /* EN SPACE */  
           case 0x2003:    /* EM SPACE */  
           case 0x2004:    /* THREE-PER-EM SPACE */  
           case 0x2005:    /* FOUR-PER-EM SPACE */  
           case 0x2006:    /* SIX-PER-EM SPACE */  
           case 0x2007:    /* FIGURE SPACE */  
           case 0x2008:    /* PUNCTUATION SPACE */  
           case 0x2009:    /* THIN SPACE */  
           case 0x200A:    /* HAIR SPACE */  
           case 0x202f:    /* NARROW NO-BREAK SPACE */  
           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
           case 0x3000:    /* IDEOGRAPHIC SPACE */  
1795            OK = TRUE;            OK = TRUE;
1796            break;            break;
1797    
# Line 1711  for (;;) Line 1808  for (;;)
1808              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1809              next_active_state--;              next_active_state--;
1810              }              }
1811            ADD_NEW_DATA(-(state_offset + count), 0, 0);            ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1812            }            }
1813          }          }
1814        break;        break;
# Line 1728  for (;;) Line 1825  for (;;)
1825        if (clen > 0)        if (clen > 0)
1826          {          {
1827          BOOL OK;          BOOL OK;
1828            const pcre_uint32 *cp;
1829          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1830          switch(code[1 + IMM2_SIZE + 1])          switch(code[1 + IMM2_SIZE + 1])
1831            {            {
# Line 1776  for (;;) Line 1874  for (;;)
1874                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1875            break;            break;
1876    
1877              case PT_CLIST:
1878              cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
1879              for (;;)
1880                {
1881                if (c < *cp) { OK = FALSE; break; }
1882                if (c == *cp++) { OK = TRUE; break; }
1883                }
1884              break;
1885    
1886              case PT_UCNC:
1887              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1888                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1889                   c >= 0xe000;
1890              break;
1891    
1892            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1893    
1894            default:            default:
# Line 1790  for (;;) Line 1903  for (;;)
1903              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1904              next_active_state--;              next_active_state--;
1905              }              }
1906            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1907              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1908            else            else
1909              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
# Line 1806  for (;;) Line 1919  for (;;)
1919        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1920          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1921        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1922        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1923          {          {
1924            int lgb, rgb;
1925          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1926          int ncount = 0;          int ncount = 0;
1927          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
# Line 1815  for (;;) Line 1929  for (;;)
1929            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1930            next_active_state--;            next_active_state--;
1931            }            }
1932            lgb = UCD_GRAPHBREAK(c);
1933          while (nptr < end_subject)          while (nptr < end_subject)
1934            {            {
1935            int nd;            dlen = 1;
1936            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1937            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1938            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1939            ncount++;            ncount++;
1940            nptr += ndlen;            lgb = rgb;
1941              nptr += dlen;
1942            }            }
1943          if (++count >= GET2(code, 1))          if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1944                reset_could_continue = TRUE;
1945            if (++count >= (int)GET2(code, 1))
1946            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1947          else          else
1948            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
# Line 1845  for (;;) Line 1963  for (;;)
1963          int ncount = 0;          int ncount = 0;
1964          switch (c)          switch (c)
1965            {            {
1966            case 0x000b:            case CHAR_VT:
1967            case 0x000c:            case CHAR_FF:
1968            case 0x0085:            case CHAR_NEL:
1969    #ifndef EBCDIC
1970            case 0x2028:            case 0x2028:
1971            case 0x2029:            case 0x2029:
1972    #endif  /* Not EBCDIC */
1973            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1974            goto ANYNL03;            goto ANYNL03;
1975    
1976            case 0x000d:            case CHAR_CR:
1977            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1978            /* Fall through */            /* Fall through */
1979    
1980            ANYNL03:            ANYNL03:
1981            case 0x000a:            case CHAR_LF:
1982            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1983              {              {
1984              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1985              next_active_state--;              next_active_state--;
1986              }              }
1987            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1988              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1989            else            else
1990              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, ncount); }
# Line 1889  for (;;) Line 2009  for (;;)
2009          BOOL OK;          BOOL OK;
2010          switch (c)          switch (c)
2011            {            {
2012            case 0x000a:            VSPACE_CASES:
           case 0x000b:  
           case 0x000c:  
           case 0x000d:  
           case 0x0085:  
           case 0x2028:  
           case 0x2029:  
2013            OK = TRUE;            OK = TRUE;
2014            break;            break;
2015    
# Line 1910  for (;;) Line 2024  for (;;)
2024              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
2025              next_active_state--;              next_active_state--;
2026              }              }
2027            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2028              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2029            else            else
2030              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
# Line 1931  for (;;) Line 2045  for (;;)
2045          BOOL OK;          BOOL OK;
2046          switch (c)          switch (c)
2047            {            {
2048            case 0x09:      /* HT */            HSPACE_CASES:
           case 0x20:      /* SPACE */  
           case 0xa0:      /* NBSP */  
           case 0x1680:    /* OGHAM SPACE MARK */  
           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
           case 0x2000:    /* EN QUAD */  
           case 0x2001:    /* EM QUAD */  
           case 0x2002:    /* EN SPACE */  
           case 0x2003:    /* EM SPACE */  
           case 0x2004:    /* THREE-PER-EM SPACE */  
           case 0x2005:    /* FOUR-PER-EM SPACE */  
           case 0x2006:    /* SIX-PER-EM SPACE */  
           case 0x2007:    /* FIGURE SPACE */  
           case 0x2008:    /* PUNCTUATION SPACE */  
           case 0x2009:    /* THIN SPACE */  
           case 0x200A:    /* HAIR SPACE */  
           case 0x202f:    /* NARROW NO-BREAK SPACE */  
           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
           case 0x3000:    /* IDEOGRAPHIC SPACE */  
2049            OK = TRUE;            OK = TRUE;
2050            break;            break;
2051    
# Line 1965  for (;;) Line 2061  for (;;)
2061              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
2062              next_active_state--;              next_active_state--;
2063              }              }
2064            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2065              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2066            else            else
2067              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
# Line 2025  for (;;) Line 2121  for (;;)
2121        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
2122    
2123        case OP_EXTUNI:        case OP_EXTUNI:
2124        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
2125          {          {
2126            int lgb, rgb;
2127          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
2128          int ncount = 0;          int ncount = 0;
2129            lgb = UCD_GRAPHBREAK(c);
2130          while (nptr < end_subject)          while (nptr < end_subject)
2131            {            {
2132            int nclen = 1;            dlen = 1;
2133            GETCHARLEN(c, nptr, nclen);            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2134            if (UCD_CATEGORY(c) != ucp_M) break;            rgb = UCD_GRAPHBREAK(d);
2135              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2136            ncount++;            ncount++;
2137            nptr += nclen;            lgb = rgb;
2138              nptr += dlen;
2139            }            }
2140            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2141                reset_could_continue = TRUE;
2142          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2143          }          }
2144        break;        break;
# Line 2050  for (;;) Line 2152  for (;;)
2152        case OP_ANYNL:        case OP_ANYNL:
2153        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2154          {          {
2155          case 0x000b:          case CHAR_VT:
2156          case 0x000c:          case CHAR_FF:
2157          case 0x0085:          case CHAR_NEL:
2158    #ifndef EBCDIC
2159          case 0x2028:          case 0x2028:
2160          case 0x2029:          case 0x2029:
2161    #endif  /* Not EBCDIC */
2162          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2163    
2164          case 0x000a:          case CHAR_LF:
2165          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2166          break;          break;
2167    
2168          case 0x000d:          case CHAR_CR:
2169          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 >= end_subject)
2170              {
2171              ADD_NEW(state_offset + 1, 0);
2172              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2173                reset_could_continue = TRUE;
2174              }
2175            else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
2176            {            {
2177            ADD_NEW_DATA(-(state_offset + 1), 0, 1);            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2178            }            }
# Line 2078  for (;;) Line 2188  for (;;)
2188        case OP_NOT_VSPACE:        case OP_NOT_VSPACE:
2189        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2190          {          {
2191          case 0x000a:          VSPACE_CASES:
         case 0x000b:  
         case 0x000c:  
         case 0x000d:  
         case 0x0085:  
         case 0x2028:  
         case 0x2029:  
2192          break;          break;
2193    
2194          default:          default:
# Line 2097  for (;;) Line 2201  for (;;)
2201        case OP_VSPACE:        case OP_VSPACE:
2202        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2203          {          {
2204          case 0x000a:          VSPACE_CASES:
         case 0x000b:  
         case 0x000c:  
         case 0x000d:  
         case 0x0085:  
         case 0x2028:  
         case 0x2029:  
2205          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2206          break;          break;
2207    
2208          default: break;          default:
2209            break;
2210          }          }
2211        break;        break;
2212    
# Line 2115  for (;;) Line 2214  for (;;)
2214        case OP_NOT_HSPACE:        case OP_NOT_HSPACE:
2215        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2216          {          {
2217          case 0x09:      /* HT */          HSPACE_CASES:
         case 0x20:      /* SPACE */  
         case 0xa0:      /* NBSP */  
         case 0x1680:    /* OGHAM SPACE MARK */  
         case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
         case 0x2000:    /* EN QUAD */  
         case 0x2001:    /* EM QUAD */  
         case 0x2002:    /* EN SPACE */  
         case 0x2003:    /* EM SPACE */  
         case 0x2004:    /* THREE-PER-EM SPACE */  
         case 0x2005:    /* FOUR-PER-EM SPACE */  
         case 0x2006:    /* SIX-PER-EM SPACE */  
         case 0x2007:    /* FIGURE SPACE */  
         case 0x2008:    /* PUNCTUATION SPACE */  
         case 0x2009:    /* THIN SPACE */  
         case 0x200A:    /* HAIR SPACE */  
         case 0x202f:    /* NARROW NO-BREAK SPACE */  
         case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
         case 0x3000:    /* IDEOGRAPHIC SPACE */  
2218          break;          break;
2219    
2220          default:          default:
# Line 2146  for (;;) Line 2227  for (;;)
2227        case OP_HSPACE:        case OP_HSPACE:
2228        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2229          {          {
2230          case 0x09:      /* HT */          HSPACE_CASES:
         case 0x20:      /* SPACE */  
         case 0xa0:      /* NBSP */  
         case 0x1680:    /* OGHAM SPACE MARK */  
         case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
         case 0x2000:    /* EN QUAD */  
         case 0x2001:    /* EM QUAD */  
         case 0x2002:    /* EN SPACE */  
         case 0x2003:    /* EM SPACE */  
         case 0x2004:    /* THREE-PER-EM SPACE */  
         case 0x2005:    /* FOUR-PER-EM SPACE */  
         case 0x2006:    /* SIX-PER-EM SPACE */  
         case 0x2007:    /* FIGURE SPACE */  
         case 0x2008:    /* PUNCTUATION SPACE */  
         case 0x2009:    /* THIN SPACE */  
         case 0x200A:    /* HAIR SPACE */  
         case 0x202f:    /* NARROW NO-BREAK SPACE */  
         case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
         case 0x3000:    /* IDEOGRAPHIC SPACE */  
2231          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2232          break;          break;
2233    
2234            default:
2235            break;
2236          }          }
2237        break;        break;
2238    
2239        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2240        /* Match a negated single character casefully. This is only used for        /* Match a negated single character casefully. */
       one-byte characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2241    
2242        case OP_NOT:        case OP_NOT:
2243        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2244        break;        break;
2245    
2246        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2247        /* Match a negated single character caselessly. This is only used for        /* Match a negated single character caselessly. */
       one-byte characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2248    
2249        case OP_NOTI:        case OP_NOTI:
2250        if (clen > 0 && c != d && c != fcc[d])        if (clen > 0)
2251          { ADD_NEW(state_offset + dlen + 1, 0); }          {
2252            unsigned int otherd;
2253    #ifdef SUPPORT_UTF
2254            if (utf && d >= 128)
2255              {
2256    #ifdef SUPPORT_UCP
2257              otherd = UCD_OTHERCASE(d);
2258    #endif  /* SUPPORT_UCP */
2259              }
2260            else
2261    #endif  /* SUPPORT_UTF */
2262            otherd = TABLE_GET(d, fcc, d);
2263            if (c != d && c != otherd)
2264              { ADD_NEW(state_offset + dlen + 1, 0); }
2265            }
2266        break;        break;
2267    
2268        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 2210  for (;;) Line 2286  for (;;)
2286        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
2287        if (clen > 0)        if (clen > 0)
2288          {          {
2289          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2290          if (caseless)          if (caseless)
2291            {            {
2292  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 2257  for (;;) Line 2333  for (;;)
2333        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
2334        if (clen > 0)        if (clen > 0)
2335          {          {
2336          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2337          if (caseless)          if (caseless)
2338            {            {
2339  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 2302  for (;;) Line 2378  for (;;)
2378        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
2379        if (clen > 0)        if (clen > 0)
2380          {          {
2381          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2382          if (caseless)          if (caseless)
2383            {            {
2384  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 2339  for (;;) Line 2415  for (;;)
2415        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2416        if (clen > 0)        if (clen > 0)
2417          {          {
2418          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2419          if (caseless)          if (caseless)
2420            {            {
2421  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 2355  for (;;) Line 2431  for (;;)
2431            }            }
2432          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2433            {            {
2434            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2435              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2436            else            else
2437              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
# Line 2383  for (;;) Line 2459  for (;;)
2459        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2460        if (clen > 0)        if (clen > 0)
2461          {          {
2462          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2463          if (caseless)          if (caseless)
2464            {            {
2465  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 2404  for (;;) Line 2480  for (;;)
2480              active_count--;             /* Remove non-match possibility */              active_count--;             /* Remove non-match possibility */
2481              next_active_state--;              next_active_state--;
2482              }              }
2483            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2484              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2485            else            else
2486              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
# Line 2477  for (;;) Line 2553  for (;;)
2553            case OP_CRRANGE:            case OP_CRRANGE:
2554            case OP_CRMINRANGE:            case OP_CRMINRANGE:
2555            count = current_state->count;  /* Already matched */            count = current_state->count;  /* Already matched */
2556            if (count >= GET2(ecode, 1))            if (count >= (int)GET2(ecode, 1))
2557              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2558            if (isinclass)            if (isinclass)
2559              {              {
2560              int max = GET2(ecode, 1 + IMM2_SIZE);              int max = (int)GET2(ecode, 1 + IMM2_SIZE);
2561              if (++count >= max && max != 0)   /* Max 0 => no limit */              if (++count >= max && max != 0)   /* Max 0 => no limit */
2562                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2563              else              else
# Line 2557  for (;;) Line 2633  for (;;)
2633              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2634              cb.callout_number   = code[LINK_SIZE+2];              cb.callout_number   = code[LINK_SIZE+2];
2635              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
2636    #if defined COMPILE_PCRE8
2637              cb.subject          = (PCRE_SPTR)start_subject;              cb.subject          = (PCRE_SPTR)start_subject;
2638    #elif defined COMPILE_PCRE16
2639                cb.subject          = (PCRE_SPTR16)start_subject;
2640    #elif defined COMPILE_PCRE32
2641                cb.subject          = (PCRE_SPTR32)start_subject;
2642    #endif
2643              cb.subject_length   = (int)(end_subject - start_subject);              cb.subject_length   = (int)(end_subject - start_subject);
2644              cb.start_match      = (int)(current_subject - start_subject);              cb.start_match      = (int)(current_subject - start_subject);
2645              cb.current_position = (int)(ptr - start_subject);              cb.current_position = (int)(ptr - start_subject);
# Line 2686  for (;;) Line 2768  for (;;)
2768            {            {
2769            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2770              {              {
             const pcre_uchar *p = start_subject + local_offsets[rc];  
             const pcre_uchar *pp = start_subject + local_offsets[rc+1];  
2771              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2772  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2773              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;              if (utf)
2774                  {
2775                  const pcre_uchar *p = start_subject + local_offsets[rc];
2776                  const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2777                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2778                  }
2779  #endif  #endif
2780              if (charcount > 0)              if (charcount > 0)
2781                {                {
# Line 2788  for (;;) Line 2873  for (;;)
2873              const pcre_uchar *p = ptr;              const pcre_uchar *p = ptr;
2874              const pcre_uchar *pp = local_ptr;              const pcre_uchar *pp = local_ptr;
2875              charcount = (int)(pp - p);              charcount = (int)(pp - p);
2876  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2877              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;              if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2878  #endif  #endif
2879              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2880              }              }
# Line 2870  for (;;) Line 2955  for (;;)
2955              }              }
2956            else            else
2957              {              {
2958  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2959              const pcre_uchar *p = start_subject + local_offsets[0];              if (utf)
2960              const pcre_uchar *pp = start_subject + local_offsets[1];                {
2961              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;                const pcre_uchar *p = start_subject + local_offsets[0];
2962                  const pcre_uchar *pp = start_subject + local_offsets[1];
2963                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2964                  }
2965  #endif  #endif
2966              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2967              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
# Line 2896  for (;;) Line 2984  for (;;)
2984          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2985          cb.callout_number   = code[1];          cb.callout_number   = code[1];
2986          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
2987    #if defined COMPILE_PCRE8
2988          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
2989    #elif defined COMPILE_PCRE16
2990            cb.subject          = (PCRE_SPTR16)start_subject;
2991    #elif defined COMPILE_PCRE32
2992            cb.subject          = (PCRE_SPTR32)start_subject;
2993    #endif
2994          cb.subject_length   = (int)(end_subject - start_subject);          cb.subject_length   = (int)(end_subject - start_subject);
2995          cb.start_match      = (int)(current_subject - start_subject);          cb.start_match      = (int)(current_subject - start_subject);
2996          cb.current_position = (int)(ptr - start_subject);          cb.current_position = (int)(ptr - start_subject);
# Line 2938  for (;;) Line 3032  for (;;)
3032    if (new_count <= 0)    if (new_count <= 0)
3033      {      {
3034      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
3035          could_continue &&                            /* Some could go on */          could_continue &&                            /* Some could go on, and */
3036          forced_fail != workspace[1] &&               /* Not all forced fail & */          forced_fail != workspace[1] &&               /* Not all forced fail & */
3037          (                                            /* either... */          (                                            /* either... */
3038          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
# Line 2946  for (;;) Line 3040  for (;;)
3040          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
3041           match_count < 0)                            /* no matches */           match_count < 0)                            /* no matches */
3042          ) &&                                         /* And... */          ) &&                                         /* And... */
3043          ptr >= end_subject &&                  /* Reached end of subject */          (
3044          ptr > md->start_used_ptr)              /* Inspected non-empty string */          partial_newline ||                           /* Either partial NL */
3045        {            (                                          /* or ... */
3046        if (offsetcount >= 2)            ptr >= end_subject &&                /* End of subject and */
3047          {            ptr > md->start_used_ptr)            /* Inspected non-empty string */
3048          offsets[0] = (int)(md->start_used_ptr - start_subject);            )
3049          offsets[1] = (int)(end_subject - start_subject);          )
         }  
3050        match_count = PCRE_ERROR_PARTIAL;        match_count = PCRE_ERROR_PARTIAL;
       }  
   
3051      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
3052        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
3053        rlevel*2-2, SP));        rlevel*2-2, SP));
# Line 3006  Returns:          > 0 => number of match Line 3097  Returns:          > 0 => number of match
3097                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3098  */  */
3099    
3100  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
3101  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3102  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3103    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
3104    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3105  #else  #elif defined COMPILE_PCRE16
3106  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3107  pcre16_dfa_exec(const pcre *argument_re, const pcre16_extra *extra_data,  pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3108    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3109    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3110    #elif defined COMPILE_PCRE32
3111    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3112    pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
3113      PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
3114      int offsetcount, int *workspace, int wscount)
3115  #endif  #endif
3116  {  {
3117  real_pcre *re = (real_pcre *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
3118  dfa_match_data match_block;  dfa_match_data match_block;
3119  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3120  BOOL utf, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3121  const pcre_uchar *current_subject, *end_subject;  const pcre_uchar *current_subject, *end_subject;
 const pcre_uint8 *lcc;  
   
3122  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
3123    
3124  const pcre_uchar *req_char_ptr;  const pcre_uchar *req_char_ptr;
# Line 3044  if (re == NULL || subject == NULL || wor Line 3138  if (re == NULL || subject == NULL || wor
3138     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3139  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3140  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3141    if (length < 0) return PCRE_ERROR_BADLENGTH;
3142  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3143    
3144  /* We need to find the pointer to any study data before we test for byte  /* Check that the first field in the block is the magic number. If it is not,
3145  flipping, so we scan the extra_data block first. This may set two fields in the  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3146  match block, so we must initialize them beforehand. However, the other fields  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3147  in the match block must not be set until after the byte flipping. */  means that the pattern is likely compiled with different endianness. */
3148    
3149    if (re->magic_number != MAGIC_NUMBER)
3150      return re->magic_number == REVERSED_MAGIC_NUMBER?
3151        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3152    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3153    
3154    /* If restarting after a partial match, do some sanity checks on the contents
3155    of the workspace. */
3156    
3157    if ((options & PCRE_DFA_RESTART) != 0)
3158      {
3159      if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3160        workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3161          return PCRE_ERROR_DFA_BADRESTART;
3162      }
3163    
3164    /* Set up study, callout, and table data */
3165    
3166  md->tables = re->tables;  md->tables = re->tables;
3167  md->callout_data = NULL;  md->callout_data = NULL;
# Line 3068  if (extra_data != NULL) Line 3180  if (extra_data != NULL)
3180      md->tables = extra_data->tables;      md->tables = extra_data->tables;
3181    }    }
3182    
 /* Check that the first field in the block is the magic number. If it is not,  
 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to  
 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which  
 means that the pattern is likely compiled with different endianness. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   return re->magic_number == REVERSED_MAGIC_NUMBER?  
     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;  
 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;  
   
3183  /* Set some local values */  /* Set some local values */
3184    
3185  current_subject = (const pcre_uchar *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
# Line 3085  end_subject = (const pcre_uchar *)subjec Line 3187  end_subject = (const pcre_uchar *)subjec
3187  req_char_ptr = current_subject - 1;  req_char_ptr = current_subject - 1;
3188    
3189  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3190  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
3191  utf = (re->options & PCRE_UTF8) != 0;  utf = (re->options & PCRE_UTF8) != 0;
3192  #else  #else
3193  utf = FALSE;  utf = FALSE;
# Line 3171  if (utf && (options & PCRE_NO_UTF8_CHECK Line 3273  if (utf && (options & PCRE_NO_UTF8_CHECK
3273        offsets[0] = erroroffset;        offsets[0] = erroroffset;
3274        offsets[1] = errorcode;        offsets[1] = errorcode;
3275        }        }
3276      return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?  #if defined COMPILE_PCRE8
3277        return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
3278        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3279    #elif defined COMPILE_PCRE16
3280        return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
3281          PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
3282    #elif defined COMPILE_PCRE32
3283        return PCRE_ERROR_BADUTF32;
3284    #endif
3285      }      }
3286    #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
3287    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
3288          NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))          NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3289      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
3290    #endif
3291    }    }
3292  #endif  #endif
3293    
# Line 3186  in other programs later. */ Line 3297  in other programs later. */
3297    
3298  if (md->tables == NULL) md->tables = PRIV(default_tables);  if (md->tables == NULL) md->tables = PRIV(default_tables);
3299    
3300  /* The lower casing table and the "must be at the start of a line" flag are  /* The "must be at the start of a line" flags are used in a loop when finding
3301  used in a loop when finding where to start. */  where to start. */
3302    
 lcc = md->tables + lcc_offset;  
3303  startline = (re->flags & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
3304  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
3305    
# Line 3204  if (!anchored) Line 3314  if (!anchored)
3314    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3315      {      {
3316      has_first_char = TRUE;      has_first_char = TRUE;
3317      first_char = first_char2 = re->first_char;      first_char = first_char2 = (pcre_uchar)(re->first_char);
3318      if ((re->flags & PCRE_FCH_CASELESS) != 0)      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3319        {        {
3320        first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);        first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
# Line 3228  character" set. */ Line 3338  character" set. */
3338  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3339    {    {
3340    has_req_char = TRUE;    has_req_char = TRUE;
3341    req_char = req_char2 = re->req_char;    req_char = req_char2 = (pcre_uchar)(re->req_char);
3342    if ((re->flags & PCRE_RCH_CASELESS) != 0)    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3343      {      {
3344      req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);      req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
# Line 3287  for (;;) Line 3397  for (;;)
3397        if (has_first_char)        if (has_first_char)
3398          {          {
3399          if (first_char != first_char2)          if (first_char != first_char2)
3400              {
3401              pcre_uchar csc;
3402            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3403                *current_subject != first_char && *current_subject != first_char2)                   (csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
3404              current_subject++;              current_subject++;
3405              }
3406          else          else
3407            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3408                   *current_subject != first_char)                   RAWUCHARTEST(current_subject) != first_char)
3409              current_subject++;              current_subject++;
3410          }          }
3411    
# Line 3322  for (;;) Line 3435  for (;;)
3435            ANYCRLF, and we are now at a LF, advance the match position by one            ANYCRLF, and we are now at a LF, advance the match position by one
3436            more character. */            more character. */
3437    
3438            if (current_subject[-1] == CHAR_CR &&            if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
3439                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
3440                 current_subject < end_subject &&                 current_subject < end_subject &&
3441                 *current_subject == CHAR_NL)                 RAWUCHARTEST(current_subject) == CHAR_NL)
3442              current_subject++;              current_subject++;
3443            }            }
3444          }          }
# Line 3336  for (;;) Line 3449  for (;;)
3449          {          {
3450          while (current_subject < end_subject)          while (current_subject < end_subject)
3451            {            {
3452            register unsigned int c = *current_subject;            register pcre_uint32 c = RAWUCHARTEST(current_subject);
3453  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3454            if (c > 255) c = 255;            if (c > 255) c = 255;
3455  #endif  #endif
# Line 3402  for (;;) Line 3515  for (;;)
3515              {              {
3516              while (p < end_subject)              while (p < end_subject)
3517                {                {
3518                register int pp = *p++;                register pcre_uint32 pp = RAWUCHARINCTEST(p);
3519                if (pp == req_char || pp == req_char2) { p--; break; }                if (pp == req_char || pp == req_char2) { p--; break; }
3520                }                }
3521              }              }
# Line 3410  for (;;) Line 3523  for (;;)
3523              {              {
3524              while (p < end_subject)              while (p < end_subject)
3525                {                {
3526                if (*p++ == req_char) { p--; break; }                if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
3527                }                }
3528              }              }
3529    
# Line 3448  for (;;) Line 3561  for (;;)
3561    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
3562    on only if not anchored. */    on only if not anchored. */
3563    
3564    if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;    if (rc != PCRE_ERROR_NOMATCH || anchored)
3565        {
3566        if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
3567          {
3568          offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
3569          offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
3570          if (offsetcount > 2)
3571            offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
3572          }
3573        return rc;
3574        }
3575    
3576    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
3577    and firstline is set. */    and firstline is set. */
# Line 3468  for (;;) Line 3591  for (;;)
3591    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
3592    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. */
3593    
3594    if (current_subject[-1] == CHAR_CR &&    if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
3595        current_subject < end_subject &&        current_subject < end_subject &&
3596        *current_subject == CHAR_NL &&        RAWUCHARTEST(current_subject) == CHAR_NL &&
3597        (re->flags & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
3598          (md->nltype == NLTYPE_ANY ||          (md->nltype == NLTYPE_ANY ||
3599           md->nltype == NLTYPE_ANYCRLF ||           md->nltype == NLTYPE_ANYCRLF ||

Legend:
Removed from v.850  
changed lines
  Added in v.1334

  ViewVC Help
Powered by ViewVC 1.1.5