# Diff of /code/trunk/pcre_dfa_exec.c

revision 850 by zherczeg, Wed Jan 4 17:29:11 2012 UTC revision 1364 by ph10, Sat Oct 5 15:45:11 2013 UTC
# Line 7  and semantics are as close as possible t Line 7  and semantics are as close as possible t
7  below for why this module is different).  below for why this module is different).
8
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
11
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
39  */  */
40

41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a sort of DFA algorithm (not a true  alternative matching function that uses a sort of DFA algorithm (not a true
43  FSM). This is NOT Perl- compatible, but it has advantages in certain  FSM). This is NOT Perl-compatible, but it has advantages in certain
44  applications. */  applications. */
45
46
# Line 121  static const pcre_uint8 coptable[] = { Line 120  static const pcre_uint8 coptable[] = {
120    0, 0,                          /* \P, \p                                 */    0, 0,                          /* \P, \p                                 */
121    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
122    0,                             /* \X                                     */    0,                             /* \X                                     */
123    0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, \$, \$M                   */    0, 0, 0, 0, 0, 0,              /* \Z, \z, \$, \$M, ^, ^M                   */
124    1,                             /* Char                                   */    1,                             /* Char                                   */
125    1,                             /* Chari                                  */    1,                             /* Chari                                  */
126    1,                             /* not                                    */    1,                             /* not                                    */
# Line 157  static const pcre_uint8 coptable[] = { Line 156  static const pcre_uint8 coptable[] = {
156    0,                             /* XCLASS - variable length               */    0,                             /* XCLASS - variable length               */
157    0,                             /* REF                                    */    0,                             /* REF                                    */
158    0,                             /* REFI                                   */    0,                             /* REFI                                   */
159      0,                             /* DNREF                                  */
160      0,                             /* DNREFI                                 */
161    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
162    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
163    0,                             /* Alt                                    */    0,                             /* Alt                                    */
# Line 195  static const pcre_uint8 poptable[] = { Line 196  static const pcre_uint8 poptable[] = {
196    1, 1,                          /* \P, \p                                 */    1, 1,                          /* \P, \p                                 */
197    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
198    1,                             /* \X                                     */    1,                             /* \X                                     */
199    0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, \$, \$M                   */    0, 0, 0, 0, 0, 0,              /* \Z, \z, \$, \$M, ^, ^M                   */
200    1,                             /* Char                                   */    1,                             /* Char                                   */
201    1,                             /* Chari                                  */    1,                             /* Chari                                  */
202    1,                             /* not                                    */    1,                             /* not                                    */
# Line 226  static const pcre_uint8 poptable[] = { Line 227  static const pcre_uint8 poptable[] = {
227    1,                             /* XCLASS - variable length               */    1,                             /* XCLASS - variable length               */
228    0,                             /* REF                                    */    0,                             /* REF                                    */
229    0,                             /* REFI                                   */    0,                             /* REFI                                   */
230      0,                             /* DNREF                                  */
231      0,                             /* DNREFI                                 */
232    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
233    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
234    0,                             /* Alt                                    */    0,                             /* Alt                                    */
# Line 282  typedef struct stateblock { Line 285  typedef struct stateblock {
285    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
286  } stateblock;  } stateblock;
287
288  #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))  #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
289
290
291  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 303  Returns:       nothing Line 306  Returns:       nothing
306  static void  static void
307  pchars(const pcre_uchar *p, int length, FILE *f)  pchars(const pcre_uchar *p, int length, FILE *f)
308  {  {
309  int c;  pcre_uint32 c;
310  while (length-- > 0)  while (length-- > 0)
311    {    {
312    if (isprint(c = *(p++)))    if (isprint(c = *(p++)))
313      fprintf(f, "%c", c);      fprintf(f, "%c", c);
314    else    else
315      fprintf(f, "\\x%02x", c);      fprintf(f, "\\x{%02x}", c);
316    }    }
317  }  }
318  #endif  #endif
# Line 382  for the current character, one for the f Line 385  for the current character, one for the f
385      next_new_state->count  = (y); \      next_new_state->count  = (y); \
386      next_new_state->data   = (z); \      next_new_state->data   = (z); \
387      next_new_state++; \      next_new_state++; \
388      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
389          (x), (y), (z), __LINE__)); \
390      } \      } \
391    else return PCRE_ERROR_DFA_WSSIZE    else return PCRE_ERROR_DFA_WSSIZE
392
# Line 424  BOOL utf = (md->poptions & PCRE_UTF8) != Line 428  BOOL utf = (md->poptions & PCRE_UTF8) !=
428  BOOL utf = FALSE;  BOOL utf = FALSE;
429  #endif  #endif
430
431    BOOL reset_could_continue = FALSE;
432
433  rlevel++;  rlevel++;
434  offsetcount &= (-2);  offsetcount &= (-2);
435
# Line 569  for (;;) Line 575  for (;;)
575    {    {
576    int i, j;    int i, j;
577    int clen, dlen;    int clen, dlen;
578    unsigned int c, d;    pcre_uint32 c, d;
579    int forced_fail = 0;    int forced_fail = 0;
580    BOOL could_continue = FALSE;    BOOL partial_newline = FALSE;
581      BOOL could_continue = reset_could_continue;
582      reset_could_continue = FALSE;
583
584    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
585    new state list. */    new state list. */
# Line 607  for (;;) Line 615  for (;;)
615
616    if (ptr < end_subject)    if (ptr < end_subject)
617      {      {
618      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of data items in the character */
619  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
620      if (utf) { GETCHARLEN(c, ptr, clen); } else      GETCHARLENTEST(c, ptr, clen);
621  #endif  /* SUPPORT_UTF */  #else
622      c = *ptr;      c = *ptr;
623    #endif  /* SUPPORT_UTF */
624      }      }
625    else    else
626      {      {
# Line 630  for (;;) Line 639  for (;;)
639      BOOL caseless = FALSE;      BOOL caseless = FALSE;
640      const pcre_uchar *code;      const pcre_uchar *code;
641      int state_offset = current_state->offset;      int state_offset = current_state->offset;
642      int count, codevalue, rrc;      int codevalue, rrc;
643        int count;
644
645  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
646      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 641  for (;;) Line 651  for (;;)
651
652      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
653      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
654      been skipped". */      been skipped". If the could_continue flag was passed over from a previous
655        state, arrange for it to passed on. */
656
657      if (state_offset < 0)      if (state_offset < 0)
658        {        {
# Line 650  for (;;) Line 661  for (;;)
661          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
663            current_state->data - 1);            current_state->data - 1);
664            if (could_continue) reset_could_continue = TRUE;
665          continue;          continue;
666          }          }
667        else        else
# Line 689  for (;;) Line 701  for (;;)
701      permitted.      permitted.
702
703      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
704      argument that is not a data character - but is always one byte long. We      argument that is not a data character - but is always one byte long because
705      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in      the values are small. We have to take special action to deal with  \P, \p,
706      this case. To keep the other cases fast, convert these ones to new opcodes.      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
707      */      these ones to new opcodes. */
708
709      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
710        {        {
# Line 783  for (;;) Line 795  for (;;)
795              offsets[0] = (int)(current_subject - start_subject);              offsets[0] = (int)(current_subject - start_subject);
796              offsets[1] = (int)(ptr - start_subject);              offsets[1] = (int)(ptr - start_subject);
797              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
798                offsets[1] - offsets[0], current_subject));                offsets[1] - offsets[0], (char *)current_subject));
799              }              }
800            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
801              {              {
# Line 888  for (;;) Line 900  for (;;)
900        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
901        case OP_ANY:        case OP_ANY:
902        if (clen > 0 && !IS_NEWLINE(ptr))        if (clen > 0 && !IS_NEWLINE(ptr))
903          { ADD_NEW(state_offset + 1, 0); }          {
904            if (ptr + 1 >= md->end_subject &&
905                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
906                NLBLOCK->nltype == NLTYPE_FIXED &&
907                NLBLOCK->nllen == 2 &&
908                c == NLBLOCK->nl[0])
909              {
910              could_continue = partial_newline = TRUE;
911              }
912            else
913              {
914              ADD_NEW(state_offset + 1, 0);
915              }
916            }
917        break;        break;
918
919        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 916  for (;;) Line 941  for (;;)
941                 (ptr == end_subject - md->nllen)                 (ptr == end_subject - md->nllen)
942              ))              ))
943            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
944            else if (ptr + 1 >= md->end_subject &&
945                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
946                     NLBLOCK->nltype == NLTYPE_FIXED &&
947                     NLBLOCK->nllen == 2 &&
948                     c == NLBLOCK->nl[0])
949              {
950              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
951                {
952                reset_could_continue = TRUE;
953                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
954                }
955              else could_continue = partial_newline = TRUE;
956              }
957          }          }
958        break;        break;
959
# Line 928  for (;;) Line 966  for (;;)
966          else if (clen == 0 ||          else if (clen == 0 ||
967              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
968            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
969            else if (ptr + 1 >= md->end_subject &&
970                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
971                     NLBLOCK->nltype == NLTYPE_FIXED &&
972                     NLBLOCK->nllen == 2 &&
973                     c == NLBLOCK->nl[0])
974              {
975              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
976                {
977                reset_could_continue = TRUE;
978                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
979                }
980              else could_continue = partial_newline = TRUE;
981              }
982          }          }
983        else if (IS_NEWLINE(ptr))        else if (IS_NEWLINE(ptr))
984          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
# Line 962  for (;;) Line 1013  for (;;)
1013            {            {
1014            const pcre_uchar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
1015            if (temp < md->start_used_ptr) md->start_used_ptr = temp;            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1016  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1017            if (utf) { BACKCHAR(temp); }            if (utf) { BACKCHAR(temp); }
1018  #endif  #endif
1019            GETCHARTEST(d, temp);            GETCHARTEST(d, temp);
# Line 1015  for (;;) Line 1066  for (;;)
1066        if (clen > 0)        if (clen > 0)
1067          {          {
1068          BOOL OK;          BOOL OK;
1069            const pcre_uint32 *cp;
1070          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1071          switch(code[1])          switch(code[1])
1072            {            {
# Line 1046  for (;;) Line 1098  for (;;)
1098                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1099            break;            break;
1100
1101              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1102              which means that Perl space and POSIX space are now identical. PCRE
1103              was changed at release 8.34. */
1104
1105            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;

1106            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1107            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1108                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
# Line 1063  for (;;) Line 1115  for (;;)
1115                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1116            break;            break;
1117
1118              case PT_CLIST:
1119              cp = PRIV(ucd_caseless_sets) + code[2];
1120              for (;;)
1121                {
1122                if (c < *cp) { OK = FALSE; break; }
1123                if (c == *cp++) { OK = TRUE; break; }
1124                }
1125              break;
1126
1127              case PT_UCNC:
1128              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1129                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1130                   c >= 0xe000;
1131              break;
1132
1133            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1134
1135            default:            default:
# Line 1090  for (;;) Line 1157  for (;;)
1157        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1158        if (clen > 0)        if (clen > 0)
1159          {          {
1160          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1161                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1162                NLBLOCK->nltype == NLTYPE_FIXED &&
1163                NLBLOCK->nllen == 2 &&
1164                c == NLBLOCK->nl[0])
1165              {
1166              could_continue = partial_newline = TRUE;
1167              }
1168            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1169              (c < 256 &&              (c < 256 &&
1170                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1171                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1113  for (;;) Line 1188  for (;;)
1188        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1189        if (clen > 0)        if (clen > 0)
1190          {          {
1191          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1192                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1193                NLBLOCK->nltype == NLTYPE_FIXED &&
1194                NLBLOCK->nllen == 2 &&
1195                c == NLBLOCK->nl[0])
1196              {
1197              could_continue = partial_newline = TRUE;
1198              }
1199            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1200              (c < 256 &&              (c < 256 &&
1201                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1202                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1135  for (;;) Line 1218  for (;;)
1218        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1219        if (clen > 0)        if (clen > 0)
1220          {          {
1221          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1222                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1223                NLBLOCK->nltype == NLTYPE_FIXED &&
1224                NLBLOCK->nllen == 2 &&
1225                c == NLBLOCK->nl[0])
1226              {
1227              could_continue = partial_newline = TRUE;
1228              }
1229            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1230              (c < 256 &&              (c < 256 &&
1231                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1232                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1155  for (;;) Line 1246  for (;;)
1246        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1247        if (clen > 0)        if (clen > 0)
1248          {          {
1249          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1250                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1251                NLBLOCK->nltype == NLTYPE_FIXED &&
1252                NLBLOCK->nllen == 2 &&
1253                c == NLBLOCK->nl[0])
1254              {
1255              could_continue = partial_newline = TRUE;
1256              }
1257            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1258              (c < 256 &&              (c < 256 &&
1259                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1260                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1261            {            {
1262            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1263              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1264            else            else
1265              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
# Line 1176  for (;;) Line 1275  for (;;)
1275        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1276        if (clen > 0)        if (clen > 0)
1277          {          {
1278          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1279                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1280                NLBLOCK->nltype == NLTYPE_FIXED &&
1281                NLBLOCK->nllen == 2 &&
1282                c == NLBLOCK->nl[0])
1283              {
1284              could_continue = partial_newline = TRUE;
1285              }
1286            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1287              (c < 256 &&              (c < 256 &&
1288                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1289                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1186  for (;;) Line 1293  for (;;)
1293              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1294              next_active_state--;              next_active_state--;
1295              }              }
1296            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1297              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1298            else            else
1299              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
# Line 1209  for (;;) Line 1316  for (;;)
1316        if (clen > 0)        if (clen > 0)
1317          {          {
1318          BOOL OK;          BOOL OK;
1319            const pcre_uint32 *cp;
1320          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1321          switch(code[2])          switch(code[2])
1322            {            {
# Line 1240  for (;;) Line 1348  for (;;)
1348                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1349            break;            break;
1350
1351              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1352              which means that Perl space and POSIX space are now identical. PCRE
1353              was changed at release 8.34. */
1354
1355            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;

1356            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1357            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1358                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
# Line 1257  for (;;) Line 1365  for (;;)
1365                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1366            break;            break;
1367
1368              case PT_CLIST:
1369              cp = PRIV(ucd_caseless_sets) + code[3];
1370              for (;;)
1371                {
1372                if (c < *cp) { OK = FALSE; break; }
1373                if (c == *cp++) { OK = TRUE; break; }
1374                }
1375              break;
1376
1377              case PT_UCNC:
1378              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1379                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1380                   c >= 0xe000;
1381              break;
1382
1383            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1384
1385            default:            default:
# Line 1283  for (;;) Line 1406  for (;;)
1406        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1407        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1408        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1409        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1410          {          {
1411            int lgb, rgb;
1412          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1413          int ncount = 0;          int ncount = 0;
1414          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
# Line 1292  for (;;) Line 1416  for (;;)
1416            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1417            next_active_state--;            next_active_state--;
1418            }            }
1419            lgb = UCD_GRAPHBREAK(c);
1420          while (nptr < end_subject)          while (nptr < end_subject)
1421            {            {
1422            int nd;            dlen = 1;
1423            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1424            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1425            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1426            ncount++;            ncount++;
1427            nptr += ndlen;            lgb = rgb;
1428              nptr += dlen;
1429            }            }
1430          count++;          count++;
1431          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
# Line 1318  for (;;) Line 1444  for (;;)
1444          int ncount = 0;          int ncount = 0;
1445          switch (c)          switch (c)
1446            {            {
1447            case 0x000b:            case CHAR_VT:
1448            case 0x000c:            case CHAR_FF:
1449            case 0x0085:            case CHAR_NEL:
1450    #ifndef EBCDIC
1451            case 0x2028:            case 0x2028:
1452            case 0x2029:            case 0x2029:
1453    #endif  /* Not EBCDIC */
1454            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1455            goto ANYNL01;            goto ANYNL01;
1456
1457            case 0x000d:            case CHAR_CR:
1458            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1459            /* Fall through */            /* Fall through */
1460
1461            ANYNL01:            ANYNL01:
1462            case 0x000a:            case CHAR_LF:
1463            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1464              {              {
1465              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1358  for (;;) Line 1486  for (;;)
1486          BOOL OK;          BOOL OK;
1487          switch (c)          switch (c)
1488            {            {
1489            case 0x000a:            VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
1490            OK = TRUE;            OK = TRUE;
1491            break;            break;
1492
# Line 1397  for (;;) Line 1519  for (;;)
1519          BOOL OK;          BOOL OK;
1520          switch (c)          switch (c)
1521            {            {
1522            case 0x09:      /* HT */            HSPACE_CASES:
case 0x20:      /* SPACE */
case 0xa0:      /* NBSP */
case 0x1680:    /* OGHAM SPACE MARK */
case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000:    /* EN QUAD */
case 0x2001:    /* EM QUAD */
case 0x2002:    /* EN SPACE */
case 0x2003:    /* EM SPACE */
case 0x2004:    /* THREE-PER-EM SPACE */
case 0x2005:    /* FOUR-PER-EM SPACE */
case 0x2006:    /* SIX-PER-EM SPACE */
case 0x2007:    /* FIGURE SPACE */
case 0x2008:    /* PUNCTUATION SPACE */
case 0x2009:    /* THIN SPACE */
case 0x200A:    /* HAIR SPACE */
case 0x202f:    /* NARROW NO-BREAK SPACE */
case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
case 0x3000:    /* IDEOGRAPHIC SPACE */
1523            OK = TRUE;            OK = TRUE;
1524            break;            break;
1525
# Line 1456  for (;;) Line 1560  for (;;)
1560        if (clen > 0)        if (clen > 0)
1561          {          {
1562          BOOL OK;          BOOL OK;
1563            const pcre_uint32 *cp;
1564          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1565          switch(code[2])          switch(code[2])
1566            {            {
# Line 1487  for (;;) Line 1592  for (;;)
1592                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1593            break;            break;
1594
1595              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1596              which means that Perl space and POSIX space are now identical. PCRE
1597              was changed at release 8.34. */
1598
1599            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;

1600            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1601            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1602                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
# Line 1504  for (;;) Line 1609  for (;;)
1609                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1610            break;            break;
1611
1612              case PT_CLIST:
1613              cp = PRIV(ucd_caseless_sets) + code[3];
1614              for (;;)
1615                {
1616                if (c < *cp) { OK = FALSE; break; }
1617                if (c == *cp++) { OK = TRUE; break; }
1618                }
1619              break;
1620
1621              case PT_UCNC:
1622              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1623                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1624                   c >= 0xe000;
1625              break;
1626
1627            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1628
1629            default:            default:
# Line 1539  for (;;) Line 1659  for (;;)
1659        QS2:        QS2:
1660
1661        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1662        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1663          {          {
1664            int lgb, rgb;
1665          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1666          int ncount = 0;          int ncount = 0;
1667          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
# Line 1549  for (;;) Line 1670  for (;;)
1670            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1671            next_active_state--;            next_active_state--;
1672            }            }
1673            lgb = UCD_GRAPHBREAK(c);
1674          while (nptr < end_subject)          while (nptr < end_subject)
1675            {            {
1676            int nd;            dlen = 1;
1677            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1678            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1679            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1680            ncount++;            ncount++;
1681            nptr += ndlen;            lgb = rgb;
1682              nptr += dlen;
1683            }            }
1684          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1685          }          }
# Line 1582  for (;;) Line 1705  for (;;)
1705          int ncount = 0;          int ncount = 0;
1706          switch (c)          switch (c)
1707            {            {
1708            case 0x000b:            case CHAR_VT:
1709            case 0x000c:            case CHAR_FF:
1710            case 0x0085:            case CHAR_NEL:
1711    #ifndef EBCDIC
1712            case 0x2028:            case 0x2028:
1713            case 0x2029:            case 0x2029:
1714    #endif  /* Not EBCDIC */
1715            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1716            goto ANYNL02;            goto ANYNL02;
1717
1718            case 0x000d:            case CHAR_CR:
1719            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1720            /* Fall through */            /* Fall through */
1721
1722            ANYNL02:            ANYNL02:
1723            case 0x000a:            case CHAR_LF:
1724            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1725                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1726              {              {
1727              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1728              next_active_state--;              next_active_state--;
1729              }              }
1730            ADD_NEW_DATA(-(state_offset + count), 0, ncount);            ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
1731            break;            break;
1732
1733            default:            default:
# Line 1630  for (;;) Line 1755  for (;;)
1755          BOOL OK;          BOOL OK;
1756          switch (c)          switch (c)
1757            {            {
1758            case 0x000a:            VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
1759            OK = TRUE;            OK = TRUE;
1760            break;            break;
1761
# Line 1652  for (;;) Line 1771  for (;;)
1771              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1772              next_active_state--;              next_active_state--;
1773              }              }
1774            ADD_NEW_DATA(-(state_offset + count), 0, 0);            ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1775            }            }
1776          }          }
1777        break;        break;
# Line 1676  for (;;) Line 1795  for (;;)
1795          BOOL OK;          BOOL OK;
1796          switch (c)          switch (c)
1797            {            {
1798            case 0x09:      /* HT */            HSPACE_CASES:
case 0x20:      /* SPACE */
case 0xa0:      /* NBSP */
case 0x1680:    /* OGHAM SPACE MARK */
case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000:    /* EN QUAD */
case 0x2001:    /* EM QUAD */
case 0x2002:    /* EN SPACE */
case 0x2003:    /* EM SPACE */
case 0x2004:    /* THREE-PER-EM SPACE */
case 0x2005:    /* FOUR-PER-EM SPACE */
case 0x2006:    /* SIX-PER-EM SPACE */
case 0x2007:    /* FIGURE SPACE */
case 0x2008:    /* PUNCTUATION SPACE */
case 0x2009:    /* THIN SPACE */
case 0x200A:    /* HAIR SPACE */
case 0x202f:    /* NARROW NO-BREAK SPACE */
case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
case 0x3000:    /* IDEOGRAPHIC SPACE */
1799            OK = TRUE;            OK = TRUE;
1800            break;            break;
1801
# Line 1711  for (;;) Line 1812  for (;;)
1812              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1813              next_active_state--;              next_active_state--;
1814              }              }
1815            ADD_NEW_DATA(-(state_offset + count), 0, 0);            ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1816            }            }
1817          }          }
1818        break;        break;
# Line 1728  for (;;) Line 1829  for (;;)
1829        if (clen > 0)        if (clen > 0)
1830          {          {
1831          BOOL OK;          BOOL OK;
1832            const pcre_uint32 *cp;
1833          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1834          switch(code[1 + IMM2_SIZE + 1])          switch(code[1 + IMM2_SIZE + 1])
1835            {            {
# Line 1759  for (;;) Line 1861  for (;;)
1861                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1862            break;            break;
1863
1864              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1865              which means that Perl space and POSIX space are now identical. PCRE
1866              was changed at release 8.34. */
1867
1868            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;

1869            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1870            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1871                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
# Line 1776  for (;;) Line 1878  for (;;)
1878                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1879            break;            break;
1880
1881              case PT_CLIST:
1882              cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
1883              for (;;)
1884                {
1885                if (c < *cp) { OK = FALSE; break; }
1886                if (c == *cp++) { OK = TRUE; break; }
1887                }
1888              break;
1889
1890              case PT_UCNC:
1891              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1892                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1893                   c >= 0xe000;
1894              break;
1895
1896            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1897
1898            default:            default:
# Line 1790  for (;;) Line 1907  for (;;)
1907              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1908              next_active_state--;              next_active_state--;
1909              }              }
1910            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1911              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1912            else            else
1913              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
# Line 1806  for (;;) Line 1923  for (;;)
1923        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1924          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1925        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1926        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1927          {          {
1928            int lgb, rgb;
1929          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1930          int ncount = 0;          int ncount = 0;
1931          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
# Line 1815  for (;;) Line 1933  for (;;)
1933            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1934            next_active_state--;            next_active_state--;
1935            }            }
1936            lgb = UCD_GRAPHBREAK(c);
1937          while (nptr < end_subject)          while (nptr < end_subject)
1938            {            {
1939            int nd;            dlen = 1;
1940            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1941            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1942            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1943            ncount++;            ncount++;
1944            nptr += ndlen;            lgb = rgb;
1945              nptr += dlen;
1946            }            }
1947          if (++count >= GET2(code, 1))          if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1948                reset_could_continue = TRUE;
1949            if (++count >= (int)GET2(code, 1))
1950            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1951          else          else
1952            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
# Line 1845  for (;;) Line 1967  for (;;)
1967          int ncount = 0;          int ncount = 0;
1968          switch (c)          switch (c)
1969            {            {
1970            case 0x000b:            case CHAR_VT:
1971            case 0x000c:            case CHAR_FF:
1972            case 0x0085:            case CHAR_NEL:
1973    #ifndef EBCDIC
1974            case 0x2028:            case 0x2028:
1975            case 0x2029:            case 0x2029:
1976    #endif  /* Not EBCDIC */
1977            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1978            goto ANYNL03;            goto ANYNL03;
1979
1980            case 0x000d:            case CHAR_CR:
1981            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1982            /* Fall through */            /* Fall through */
1983
1984            ANYNL03:            ANYNL03:
1985            case 0x000a:            case CHAR_LF:
1986            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1987              {              {
1988              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1989              next_active_state--;              next_active_state--;
1990              }              }
1991            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1992              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1993            else            else
1994              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, ncount); }
# Line 1889  for (;;) Line 2013  for (;;)
2013          BOOL OK;          BOOL OK;
2014          switch (c)          switch (c)
2015            {            {
2016            case 0x000a:            VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
2017            OK = TRUE;            OK = TRUE;
2018            break;            break;
2019
# Line 1910  for (;;) Line 2028  for (;;)
2028              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
2029              next_active_state--;              next_active_state--;
2030              }              }
2031            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2032              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2033            else            else
2034              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
# Line 1931  for (;;) Line 2049  for (;;)
2049          BOOL OK;          BOOL OK;
2050          switch (c)          switch (c)
2051            {            {
2052            case 0x09:      /* HT */            HSPACE_CASES:
case 0x20:      /* SPACE */
case 0xa0:      /* NBSP */
case 0x1680:    /* OGHAM SPACE MARK */
case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000:    /* EN QUAD */
case 0x2001:    /* EM QUAD */
case 0x2002:    /* EN SPACE */
case 0x2003:    /* EM SPACE */
case 0x2004:    /* THREE-PER-EM SPACE */
case 0x2005:    /* FOUR-PER-EM SPACE */
case 0x2006:    /* SIX-PER-EM SPACE */
case 0x2007:    /* FIGURE SPACE */
case 0x2008:    /* PUNCTUATION SPACE */
case 0x2009:    /* THIN SPACE */
case 0x200A:    /* HAIR SPACE */
case 0x202f:    /* NARROW NO-BREAK SPACE */
case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
case 0x3000:    /* IDEOGRAPHIC SPACE */
2053            OK = TRUE;            OK = TRUE;
2054            break;            break;
2055
# Line 1965  for (;;) Line 2065  for (;;)
2065              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
2066              next_active_state--;              next_active_state--;
2067              }              }
2068            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2069              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2070            else            else
2071              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
# Line 2025  for (;;) Line 2125  for (;;)
2125        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
2126
2127        case OP_EXTUNI:        case OP_EXTUNI:
2128        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
2129          {          {
2130            int lgb, rgb;
2131          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
2132          int ncount = 0;          int ncount = 0;
2133            lgb = UCD_GRAPHBREAK(c);
2134          while (nptr < end_subject)          while (nptr < end_subject)
2135            {            {
2136            int nclen = 1;            dlen = 1;
2137            GETCHARLEN(c, nptr, nclen);            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2138            if (UCD_CATEGORY(c) != ucp_M) break;            rgb = UCD_GRAPHBREAK(d);
2139              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2140            ncount++;            ncount++;
2141            nptr += nclen;            lgb = rgb;
2142              nptr += dlen;
2143            }            }
2144            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2145                reset_could_continue = TRUE;
2146          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2147          }          }
2148        break;        break;
# Line 2050  for (;;) Line 2156  for (;;)
2156        case OP_ANYNL:        case OP_ANYNL:
2157        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2158          {          {
2159          case 0x000b:          case CHAR_VT:
2160          case 0x000c:          case CHAR_FF:
2161          case 0x0085:          case CHAR_NEL:
2162    #ifndef EBCDIC
2163          case 0x2028:          case 0x2028:
2164          case 0x2029:          case 0x2029:
2165    #endif  /* Not EBCDIC */
2166          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2167
2168          case 0x000a:          case CHAR_LF:
2169          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2170          break;          break;
2171
2172          case 0x000d:          case CHAR_CR:
2173          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 >= end_subject)
2174              {
2175              ADD_NEW(state_offset + 1, 0);
2176              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2177                reset_could_continue = TRUE;
2178              }
2179            else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
2180            {            {
2181            ADD_NEW_DATA(-(state_offset + 1), 0, 1);            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2182            }            }
# Line 2078  for (;;) Line 2192  for (;;)
2192        case OP_NOT_VSPACE:        case OP_NOT_VSPACE:
2193        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2194          {          {
2195          case 0x000a:          VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
2196          break;          break;
2197
2198          default:          default:
# Line 2097  for (;;) Line 2205  for (;;)
2205        case OP_VSPACE:        case OP_VSPACE:
2206        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2207          {          {
2208          case 0x000a:          VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
2209          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2210          break;          break;
2211
2212          default: break;          default:
2213            break;
2214          }          }
2215        break;        break;
2216
# Line 2115  for (;;) Line 2218  for (;;)
2218        case OP_NOT_HSPACE:        case OP_NOT_HSPACE:
2219        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2220          {          {
2221          case 0x09:      /* HT */          HSPACE_CASES:
case 0x20:      /* SPACE */
case 0xa0:      /* NBSP */
case 0x1680:    /* OGHAM SPACE MARK */
case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000:    /* EN QUAD */
case 0x2001:    /* EM QUAD */
case 0x2002:    /* EN SPACE */
case 0x2003:    /* EM SPACE */
case 0x2004:    /* THREE-PER-EM SPACE */
case 0x2005:    /* FOUR-PER-EM SPACE */
case 0x2006:    /* SIX-PER-EM SPACE */
case 0x2007:    /* FIGURE SPACE */
case 0x2008:    /* PUNCTUATION SPACE */
case 0x2009:    /* THIN SPACE */
case 0x200A:    /* HAIR SPACE */
case 0x202f:    /* NARROW NO-BREAK SPACE */
case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
case 0x3000:    /* IDEOGRAPHIC SPACE */
2222          break;          break;
2223
2224          default:          default:
# Line 2146  for (;;) Line 2231  for (;;)
2231        case OP_HSPACE:        case OP_HSPACE:
2232        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2233          {          {
2234          case 0x09:      /* HT */          HSPACE_CASES:
case 0x20:      /* SPACE */
case 0xa0:      /* NBSP */
case 0x1680:    /* OGHAM SPACE MARK */
case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000:    /* EN QUAD */
case 0x2001:    /* EM QUAD */
case 0x2002:    /* EN SPACE */
case 0x2003:    /* EM SPACE */
case 0x2004:    /* THREE-PER-EM SPACE */
case 0x2005:    /* FOUR-PER-EM SPACE */
case 0x2006:    /* SIX-PER-EM SPACE */
case 0x2007:    /* FIGURE SPACE */
case 0x2008:    /* PUNCTUATION SPACE */
case 0x2009:    /* THIN SPACE */
case 0x200A:    /* HAIR SPACE */
case 0x202f:    /* NARROW NO-BREAK SPACE */
case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
case 0x3000:    /* IDEOGRAPHIC SPACE */
2235          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2236          break;          break;
2237
2238            default:
2239            break;
2240          }          }
2241        break;        break;
2242
2243        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2244        /* Match a negated single character casefully. This is only used for        /* Match a negated single character casefully. */
one-byte characters, that is, we know that d < 256. The character we are
checking (c) can be multibyte. */
2245
2246        case OP_NOT:        case OP_NOT:
2247        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2248        break;        break;
2249
2250        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2251        /* Match a negated single character caselessly. This is only used for        /* Match a negated single character caselessly. */
one-byte characters, that is, we know that d < 256. The character we are
checking (c) can be multibyte. */
2252
2253        case OP_NOTI:        case OP_NOTI:
2254        if (clen > 0 && c != d && c != fcc[d])        if (clen > 0)
2255          { ADD_NEW(state_offset + dlen + 1, 0); }          {
2256            unsigned int otherd;
2257    #ifdef SUPPORT_UTF
2258            if (utf && d >= 128)
2259              {
2260    #ifdef SUPPORT_UCP
2261              otherd = UCD_OTHERCASE(d);
2262    #endif  /* SUPPORT_UCP */
2263              }
2264            else
2265    #endif  /* SUPPORT_UTF */
2266            otherd = TABLE_GET(d, fcc, d);
2267            if (c != d && c != otherd)
2268              { ADD_NEW(state_offset + dlen + 1, 0); }
2269            }
2270        break;        break;
2271
2272        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 2210  for (;;) Line 2290  for (;;)
2290        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
2291        if (clen > 0)        if (clen > 0)
2292          {          {
2293          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2294          if (caseless)          if (caseless)
2295            {            {
2296  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 2257  for (;;) Line 2337  for (;;)
2337        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
2338        if (clen > 0)        if (clen > 0)
2339          {          {
2340          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2341          if (caseless)          if (caseless)
2342            {            {
2343  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 2302  for (;;) Line 2382  for (;;)
2382        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
2383        if (clen > 0)        if (clen > 0)
2384          {          {
2385          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2386          if (caseless)          if (caseless)
2387            {            {
2388  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 2339  for (;;) Line 2419  for (;;)
2419        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2420        if (clen > 0)        if (clen > 0)
2421          {          {
2422          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2423          if (caseless)          if (caseless)
2424            {            {
2425  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 2355  for (;;) Line 2435  for (;;)
2435            }            }
2436          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2437            {            {
2438            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2439              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2440            else            else
2441              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
# Line 2383  for (;;) Line 2463  for (;;)
2463        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2464        if (clen > 0)        if (clen > 0)
2465          {          {
2466          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2467          if (caseless)          if (caseless)
2468            {            {
2469  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 2404  for (;;) Line 2484  for (;;)
2484              active_count--;             /* Remove non-match possibility */              active_count--;             /* Remove non-match possibility */
2485              next_active_state--;              next_active_state--;
2486              }              }
2487            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2488              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2489            else            else
2490              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
# Line 2477  for (;;) Line 2557  for (;;)
2557            case OP_CRRANGE:            case OP_CRRANGE:
2558            case OP_CRMINRANGE:            case OP_CRMINRANGE:
2559            count = current_state->count;  /* Already matched */            count = current_state->count;  /* Already matched */
2560            if (count >= GET2(ecode, 1))            if (count >= (int)GET2(ecode, 1))
2561              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2562            if (isinclass)            if (isinclass)
2563              {              {
2564              int max = GET2(ecode, 1 + IMM2_SIZE);              int max = (int)GET2(ecode, 1 + IMM2_SIZE);
2565              if (++count >= max && max != 0)   /* Max 0 => no limit */              if (++count >= max && max != 0)   /* Max 0 => no limit */
2566                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2567              else              else
# Line 2557  for (;;) Line 2637  for (;;)
2637              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2638              cb.callout_number   = code[LINK_SIZE+2];              cb.callout_number   = code[LINK_SIZE+2];
2639              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
2640    #if defined COMPILE_PCRE8
2641              cb.subject          = (PCRE_SPTR)start_subject;              cb.subject          = (PCRE_SPTR)start_subject;
2642    #elif defined COMPILE_PCRE16
2643                cb.subject          = (PCRE_SPTR16)start_subject;
2644    #elif defined COMPILE_PCRE32
2645                cb.subject          = (PCRE_SPTR32)start_subject;
2646    #endif
2647              cb.subject_length   = (int)(end_subject - start_subject);              cb.subject_length   = (int)(end_subject - start_subject);
2648              cb.start_match      = (int)(current_subject - start_subject);              cb.start_match      = (int)(current_subject - start_subject);
2649              cb.current_position = (int)(ptr - start_subject);              cb.current_position = (int)(ptr - start_subject);
# Line 2686  for (;;) Line 2772  for (;;)
2772            {            {
2773            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2774              {              {
const pcre_uchar *p = start_subject + local_offsets[rc];
const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2775              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2776  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2777              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;              if (utf)
2778                  {
2779                  const pcre_uchar *p = start_subject + local_offsets[rc];
2780                  const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2781                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2782                  }
2783  #endif  #endif
2784              if (charcount > 0)              if (charcount > 0)
2785                {                {
# Line 2788  for (;;) Line 2877  for (;;)
2877              const pcre_uchar *p = ptr;              const pcre_uchar *p = ptr;
2878              const pcre_uchar *pp = local_ptr;              const pcre_uchar *pp = local_ptr;
2879              charcount = (int)(pp - p);              charcount = (int)(pp - p);
2880  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2881              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;              if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2882  #endif  #endif
2883              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2884              }              }
# Line 2870  for (;;) Line 2959  for (;;)
2959              }              }
2960            else            else
2961              {              {
2962  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2963              const pcre_uchar *p = start_subject + local_offsets[0];              if (utf)
2964              const pcre_uchar *pp = start_subject + local_offsets[1];                {
2965              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;                const pcre_uchar *p = start_subject + local_offsets[0];
2966                  const pcre_uchar *pp = start_subject + local_offsets[1];
2967                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2968                  }
2969  #endif  #endif
2970              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2971              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
# Line 2896  for (;;) Line 2988  for (;;)
2988          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2989          cb.callout_number   = code[1];          cb.callout_number   = code[1];
2990          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
2991    #if defined COMPILE_PCRE8
2992          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
2993    #elif defined COMPILE_PCRE16
2994            cb.subject          = (PCRE_SPTR16)start_subject;
2995    #elif defined COMPILE_PCRE32
2996            cb.subject          = (PCRE_SPTR32)start_subject;
2997    #endif
2998          cb.subject_length   = (int)(end_subject - start_subject);          cb.subject_length   = (int)(end_subject - start_subject);
2999          cb.start_match      = (int)(current_subject - start_subject);          cb.start_match      = (int)(current_subject - start_subject);
3000          cb.current_position = (int)(ptr - start_subject);          cb.current_position = (int)(ptr - start_subject);
# Line 2938  for (;;) Line 3036  for (;;)
3036    if (new_count <= 0)    if (new_count <= 0)
3037      {      {
3038      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
3039          could_continue &&                            /* Some could go on */          could_continue &&                            /* Some could go on, and */
3040          forced_fail != workspace[1] &&               /* Not all forced fail & */          forced_fail != workspace[1] &&               /* Not all forced fail & */
3041          (                                            /* either... */          (                                            /* either... */
3042          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
# Line 2946  for (;;) Line 3044  for (;;)
3044          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
3045           match_count < 0)                            /* no matches */           match_count < 0)                            /* no matches */
3046          ) &&                                         /* And... */          ) &&                                         /* And... */
3047          ptr >= end_subject &&                  /* Reached end of subject */          (
3048          ptr > md->start_used_ptr)              /* Inspected non-empty string */          partial_newline ||                           /* Either partial NL */
3049        {            (                                          /* or ... */
3050        if (offsetcount >= 2)            ptr >= end_subject &&                /* End of subject and */
3051          {            ptr > md->start_used_ptr)            /* Inspected non-empty string */
3052          offsets[0] = (int)(md->start_used_ptr - start_subject);            )
3053          offsets[1] = (int)(end_subject - start_subject);          )
}
3054        match_count = PCRE_ERROR_PARTIAL;        match_count = PCRE_ERROR_PARTIAL;
}

3055      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
3056        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
3057        rlevel*2-2, SP));        rlevel*2-2, SP));
# Line 3006  Returns:          > 0 => number of match Line 3101  Returns:          > 0 => number of match
3101                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3102  */  */
3103
3104  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
3105  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3106  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3107    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
3108    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3109  #else  #elif defined COMPILE_PCRE16
3110  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3111  pcre16_dfa_exec(const pcre *argument_re, const pcre16_extra *extra_data,  pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3112    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3113    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3114    #elif defined COMPILE_PCRE32
3115    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3116    pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
3117      PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
3118      int offsetcount, int *workspace, int wscount)
3119  #endif  #endif
3120  {  {
3121  real_pcre *re = (real_pcre *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
3122  dfa_match_data match_block;  dfa_match_data match_block;
3123  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3124  BOOL utf, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3125  const pcre_uchar *current_subject, *end_subject;  const pcre_uchar *current_subject, *end_subject;
const pcre_uint8 *lcc;

3126  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
3127
3128  const pcre_uchar *req_char_ptr;  const pcre_uchar *req_char_ptr;
# Line 3044  if (re == NULL || subject == NULL || wor Line 3142  if (re == NULL || subject == NULL || wor
3142     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3143  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3144  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3145    if (length < 0) return PCRE_ERROR_BADLENGTH;
3146  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3147
3148  /* We need to find the pointer to any study data before we test for byte  /* Check that the first field in the block is the magic number. If it is not,
3149  flipping, so we scan the extra_data block first. This may set two fields in the  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3150  match block, so we must initialize them beforehand. However, the other fields  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3151  in the match block must not be set until after the byte flipping. */  means that the pattern is likely compiled with different endianness. */
3152
3153    if (re->magic_number != MAGIC_NUMBER)
3154      return re->magic_number == REVERSED_MAGIC_NUMBER?
3156    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3157
3158    /* If restarting after a partial match, do some sanity checks on the contents
3159    of the workspace. */
3160
3161    if ((options & PCRE_DFA_RESTART) != 0)
3162      {
3163      if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3164        workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3166      }
3167
3168    /* Set up study, callout, and table data */
3169
3170  md->tables = re->tables;  md->tables = re->tables;
3171  md->callout_data = NULL;  md->callout_data = NULL;
# Line 3068  if (extra_data != NULL) Line 3184  if (extra_data != NULL)
3184      md->tables = extra_data->tables;      md->tables = extra_data->tables;
3185    }    }
3186
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
means that the pattern is likely compiled with different endianness. */

if (re->magic_number != MAGIC_NUMBER)
return re->magic_number == REVERSED_MAGIC_NUMBER?
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;

3187  /* Set some local values */  /* Set some local values */
3188
3189  current_subject = (const pcre_uchar *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
# Line 3085  end_subject = (const pcre_uchar *)subjec Line 3191  end_subject = (const pcre_uchar *)subjec
3191  req_char_ptr = current_subject - 1;  req_char_ptr = current_subject - 1;
3192
3193  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3194  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
3195  utf = (re->options & PCRE_UTF8) != 0;  utf = (re->options & PCRE_UTF8) != 0;
3196  #else  #else
3197  utf = FALSE;  utf = FALSE;
# Line 3171  if (utf && (options & PCRE_NO_UTF8_CHECK Line 3277  if (utf && (options & PCRE_NO_UTF8_CHECK
3277        offsets[0] = erroroffset;        offsets[0] = erroroffset;
3278        offsets[1] = errorcode;        offsets[1] = errorcode;
3279        }        }
3280      return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?  #if defined COMPILE_PCRE8
3281        return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
3282        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3283    #elif defined COMPILE_PCRE16
3284        return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
3285          PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
3286    #elif defined COMPILE_PCRE32
3288    #endif
3289      }      }
3290    #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
3291    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
3292          NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))          NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3294    #endif
3295    }    }
3296  #endif  #endif
3297
# Line 3186  in other programs later. */ Line 3301  in other programs later. */
3301
3302  if (md->tables == NULL) md->tables = PRIV(default_tables);  if (md->tables == NULL) md->tables = PRIV(default_tables);
3303
3304  /* The lower casing table and the "must be at the start of a line" flag are  /* The "must be at the start of a line" flags are used in a loop when finding
3305  used in a loop when finding where to start. */  where to start. */
3306
lcc = md->tables + lcc_offset;
3307  startline = (re->flags & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
3308  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
3309
# Line 3204  if (!anchored) Line 3318  if (!anchored)
3318    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3319      {      {
3320      has_first_char = TRUE;      has_first_char = TRUE;
3321      first_char = first_char2 = re->first_char;      first_char = first_char2 = (pcre_uchar)(re->first_char);
3322      if ((re->flags & PCRE_FCH_CASELESS) != 0)      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3323        {        {
3324        first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);        first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
# Line 3228  character" set. */ Line 3342  character" set. */
3342  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3343    {    {
3344    has_req_char = TRUE;    has_req_char = TRUE;
3345    req_char = req_char2 = re->req_char;    req_char = req_char2 = (pcre_uchar)(re->req_char);
3346    if ((re->flags & PCRE_RCH_CASELESS) != 0)    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3347      {      {
3348      req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);      req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
# Line 3287  for (;;) Line 3401  for (;;)
3401        if (has_first_char)        if (has_first_char)
3402          {          {
3403          if (first_char != first_char2)          if (first_char != first_char2)
3404              {
3405              pcre_uchar csc;
3406            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3407                *current_subject != first_char && *current_subject != first_char2)                   (csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
3408              current_subject++;              current_subject++;
3409              }
3410          else          else
3411            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3412                   *current_subject != first_char)                   RAWUCHARTEST(current_subject) != first_char)
3413              current_subject++;              current_subject++;
3414          }          }
3415
# Line 3322  for (;;) Line 3439  for (;;)
3439            ANYCRLF, and we are now at a LF, advance the match position by one            ANYCRLF, and we are now at a LF, advance the match position by one
3440            more character. */            more character. */
3441
3442            if (current_subject[-1] == CHAR_CR &&            if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
3443                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
3444                 current_subject < end_subject &&                 current_subject < end_subject &&
3445                 *current_subject == CHAR_NL)                 RAWUCHARTEST(current_subject) == CHAR_NL)
3446              current_subject++;              current_subject++;
3447            }            }
3448          }          }
# Line 3336  for (;;) Line 3453  for (;;)
3453          {          {
3454          while (current_subject < end_subject)          while (current_subject < end_subject)
3455            {            {
3456            register unsigned int c = *current_subject;            register pcre_uint32 c = RAWUCHARTEST(current_subject);
3457  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3458            if (c > 255) c = 255;            if (c > 255) c = 255;
3459  #endif  #endif
# Line 3402  for (;;) Line 3519  for (;;)
3519              {              {
3520              while (p < end_subject)              while (p < end_subject)
3521                {                {
3522                register int pp = *p++;                register pcre_uint32 pp = RAWUCHARINCTEST(p);
3523                if (pp == req_char || pp == req_char2) { p--; break; }                if (pp == req_char || pp == req_char2) { p--; break; }
3524                }                }
3525              }              }
# Line 3410  for (;;) Line 3527  for (;;)
3527              {              {
3528              while (p < end_subject)              while (p < end_subject)
3529                {                {
3530                if (*p++ == req_char) { p--; break; }                if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
3531                }                }
3532              }              }
3533
# Line 3448  for (;;) Line 3565  for (;;)
3565    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
3566    on only if not anchored. */    on only if not anchored. */
3567
3568    if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;    if (rc != PCRE_ERROR_NOMATCH || anchored)
3569        {
3570        if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
3571          {
3572          offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
3573          offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
3574          if (offsetcount > 2)
3575            offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
3576          }
3577        return rc;
3578        }
3579
3580    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
3581    and firstline is set. */    and firstline is set. */
# Line 3468  for (;;) Line 3595  for (;;)
3595    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
3596    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. */
3597
3598    if (current_subject[-1] == CHAR_CR &&    if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
3599        current_subject < end_subject &&        current_subject < end_subject &&
3600        *current_subject == CHAR_NL &&        RAWUCHARTEST(current_subject) == CHAR_NL &&
3601        (re->flags & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
3602          (md->nltype == NLTYPE_ANY ||          (md->nltype == NLTYPE_ANY ||
3603           md->nltype == NLTYPE_ANYCRLF ||           md->nltype == NLTYPE_ANYCRLF ||

Legend:
 Removed from v.850 changed lines Added in v.1364