/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 960 by ph10, Thu Apr 19 17:30:38 2012 UTC revision 1260 by ph10, Wed Feb 27 15:41:22 2013 UTC
# Line 7  and semantics are as close as possible t Line 7  and semantics are as close as possible t
7  below for why this module is different).  below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
39  */  */
40    
   
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a sort of DFA algorithm (not a true  alternative matching function that uses a sort of DFA algorithm (not a true
43  FSM). This is NOT Perl-compatible, but it has advantages in certain  FSM). This is NOT Perl-compatible, but it has advantages in certain
# Line 303  Returns:       nothing Line 302  Returns:       nothing
302  static void  static void
303  pchars(const pcre_uchar *p, int length, FILE *f)  pchars(const pcre_uchar *p, int length, FILE *f)
304  {  {
305  int c;  pcre_uint32 c;
306  while (length-- > 0)  while (length-- > 0)
307    {    {
308    if (isprint(c = *(p++)))    if (isprint(c = *(p++)))
309      fprintf(f, "%c", c);      fprintf(f, "%c", c);
310    else    else
311      fprintf(f, "\\x%02x", c);      fprintf(f, "\\x{%02x}", c);
312    }    }
313  }  }
314  #endif  #endif
# Line 382  for the current character, one for the f Line 381  for the current character, one for the f
381      next_new_state->count  = (y); \      next_new_state->count  = (y); \
382      next_new_state->data   = (z); \      next_new_state->data   = (z); \
383      next_new_state++; \      next_new_state++; \
384      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
385          (x), (y), (z), __LINE__)); \
386      } \      } \
387    else return PCRE_ERROR_DFA_WSSIZE    else return PCRE_ERROR_DFA_WSSIZE
388    
# Line 571  for (;;) Line 571  for (;;)
571    {    {
572    int i, j;    int i, j;
573    int clen, dlen;    int clen, dlen;
574    unsigned int c, d;    pcre_uint32 c, d;
575    int forced_fail = 0;    int forced_fail = 0;
576    BOOL partial_newline = FALSE;    BOOL partial_newline = FALSE;
577    BOOL could_continue = reset_could_continue;    BOOL could_continue = reset_could_continue;
578    reset_could_continue = FALSE;    reset_could_continue = FALSE;
579    
580    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
581    new state list. */    new state list. */
582    
# Line 611  for (;;) Line 611  for (;;)
611    
612    if (ptr < end_subject)    if (ptr < end_subject)
613      {      {
614      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of data items in the character */
615  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
616      if (utf) { GETCHARLEN(c, ptr, clen); } else      GETCHARLENTEST(c, ptr, clen);
617  #endif  /* SUPPORT_UTF */  #else
618      c = *ptr;      c = *ptr;
619    #endif  /* SUPPORT_UTF */
620      }      }
621    else    else
622      {      {
# Line 634  for (;;) Line 635  for (;;)
635      BOOL caseless = FALSE;      BOOL caseless = FALSE;
636      const pcre_uchar *code;      const pcre_uchar *code;
637      int state_offset = current_state->offset;      int state_offset = current_state->offset;
638      int count, codevalue, rrc;      int codevalue, rrc;
639        unsigned int count;
640    
641  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
642      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 645  for (;;) Line 647  for (;;)
647    
648      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
649      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
650      been skipped". If the could_continue flag was passed over from a previous      been skipped". If the could_continue flag was passed over from a previous
651      state, arrange for it to passed on. */      state, arrange for it to passed on. */
652    
653      if (state_offset < 0)      if (state_offset < 0)
# Line 695  for (;;) Line 697  for (;;)
697      permitted.      permitted.
698    
699      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
700      argument that is not a data character - but is always one byte long because      argument that is not a data character - but is always one byte long because
701      the values are small. We have to take special action to deal with  \P, \p,      the values are small. We have to take special action to deal with  \P, \p,
702      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
703      these ones to new opcodes. */      these ones to new opcodes. */
# Line 789  for (;;) Line 791  for (;;)
791              offsets[0] = (int)(current_subject - start_subject);              offsets[0] = (int)(current_subject - start_subject);
792              offsets[1] = (int)(ptr - start_subject);              offsets[1] = (int)(ptr - start_subject);
793              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
794                offsets[1] - offsets[0], current_subject));                offsets[1] - offsets[0], (char *)current_subject));
795              }              }
796            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
797              {              {
# Line 894  for (;;) Line 896  for (;;)
896        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
897        case OP_ANY:        case OP_ANY:
898        if (clen > 0 && !IS_NEWLINE(ptr))        if (clen > 0 && !IS_NEWLINE(ptr))
899          {          {
900          if (ptr + 1 >= md->end_subject &&          if (ptr + 1 >= md->end_subject &&
901              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
902              NLBLOCK->nltype == NLTYPE_FIXED &&              NLBLOCK->nltype == NLTYPE_FIXED &&
903              NLBLOCK->nllen == 2 &&              NLBLOCK->nllen == 2 &&
904              c == NLBLOCK->nl[0])              c == NLBLOCK->nl[0])
905            {            {
906            could_continue = partial_newline = TRUE;            could_continue = partial_newline = TRUE;
907            }            }
908          else          else
909            {            {
910            ADD_NEW(state_offset + 1, 0);            ADD_NEW(state_offset + 1, 0);
911            }            }
912          }          }
913        break;        break;
914    
# Line 938  for (;;) Line 940  for (;;)
940          else if (ptr + 1 >= md->end_subject &&          else if (ptr + 1 >= md->end_subject &&
941                   (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&                   (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
942                   NLBLOCK->nltype == NLTYPE_FIXED &&                   NLBLOCK->nltype == NLTYPE_FIXED &&
943                   NLBLOCK->nllen == 2 &&                   NLBLOCK->nllen == 2 &&
944                   c == NLBLOCK->nl[0])                   c == NLBLOCK->nl[0])
945            {            {
946            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
947              {              {
948              reset_could_continue = TRUE;              reset_could_continue = TRUE;
949              ADD_NEW_DATA(-(state_offset + 1), 0, 1);              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
950              }              }
951            else could_continue = partial_newline = TRUE;            else could_continue = partial_newline = TRUE;
952            }            }
953          }          }
954        break;        break;
955    
# Line 963  for (;;) Line 965  for (;;)
965          else if (ptr + 1 >= md->end_subject &&          else if (ptr + 1 >= md->end_subject &&
966                   (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&                   (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
967                   NLBLOCK->nltype == NLTYPE_FIXED &&                   NLBLOCK->nltype == NLTYPE_FIXED &&
968                   NLBLOCK->nllen == 2 &&                   NLBLOCK->nllen == 2 &&
969                   c == NLBLOCK->nl[0])                   c == NLBLOCK->nl[0])
970            {            {
971            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
972              {              {
973              reset_could_continue = TRUE;              reset_could_continue = TRUE;
974              ADD_NEW_DATA(-(state_offset + 1), 0, 1);              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
975              }              }
976            else could_continue = partial_newline = TRUE;            else could_continue = partial_newline = TRUE;
977            }            }
978          }          }
979        else if (IS_NEWLINE(ptr))        else if (IS_NEWLINE(ptr))
980          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
# Line 1007  for (;;) Line 1009  for (;;)
1009            {            {
1010            const pcre_uchar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
1011            if (temp < md->start_used_ptr) md->start_used_ptr = temp;            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1012  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1013            if (utf) { BACKCHAR(temp); }            if (utf) { BACKCHAR(temp); }
1014  #endif  #endif
1015            GETCHARTEST(d, temp);            GETCHARTEST(d, temp);
# Line 1060  for (;;) Line 1062  for (;;)
1062        if (clen > 0)        if (clen > 0)
1063          {          {
1064          BOOL OK;          BOOL OK;
1065            const pcre_uint32 *cp;
1066          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1067          switch(code[1])          switch(code[1])
1068            {            {
# Line 1108  for (;;) Line 1111  for (;;)
1111                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1112            break;            break;
1113    
1114              case PT_CLIST:
1115              cp = PRIV(ucd_caseless_sets) + code[2];
1116              for (;;)
1117                {
1118                if (c < *cp) { OK = FALSE; break; }
1119                if (c == *cp++) { OK = TRUE; break; }
1120                }
1121              break;
1122    
1123              case PT_UCNC:
1124              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1125                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1126                   c >= 0xe000;
1127              break;
1128    
1129            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1130    
1131            default:            default:
# Line 1138  for (;;) Line 1156  for (;;)
1156          if (d == OP_ANY && ptr + 1 >= md->end_subject &&          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1157              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1158              NLBLOCK->nltype == NLTYPE_FIXED &&              NLBLOCK->nltype == NLTYPE_FIXED &&
1159              NLBLOCK->nllen == 2 &&              NLBLOCK->nllen == 2 &&
1160              c == NLBLOCK->nl[0])              c == NLBLOCK->nl[0])
1161            {            {
1162            could_continue = partial_newline = TRUE;            could_continue = partial_newline = TRUE;
1163            }            }
1164          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1165              (c < 256 &&              (c < 256 &&
1166                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
# Line 1169  for (;;) Line 1187  for (;;)
1187          if (d == OP_ANY && ptr + 1 >= md->end_subject &&          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1188              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1189              NLBLOCK->nltype == NLTYPE_FIXED &&              NLBLOCK->nltype == NLTYPE_FIXED &&
1190              NLBLOCK->nllen == 2 &&              NLBLOCK->nllen == 2 &&
1191              c == NLBLOCK->nl[0])              c == NLBLOCK->nl[0])
1192            {            {
1193            could_continue = partial_newline = TRUE;            could_continue = partial_newline = TRUE;
1194            }            }
1195          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1196              (c < 256 &&              (c < 256 &&
1197                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
# Line 1199  for (;;) Line 1217  for (;;)
1217          if (d == OP_ANY && ptr + 1 >= md->end_subject &&          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1218              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1219              NLBLOCK->nltype == NLTYPE_FIXED &&              NLBLOCK->nltype == NLTYPE_FIXED &&
1220              NLBLOCK->nllen == 2 &&              NLBLOCK->nllen == 2 &&
1221              c == NLBLOCK->nl[0])              c == NLBLOCK->nl[0])
1222            {            {
1223            could_continue = partial_newline = TRUE;            could_continue = partial_newline = TRUE;
1224            }            }
1225          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1226              (c < 256 &&              (c < 256 &&
1227                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
# Line 1227  for (;;) Line 1245  for (;;)
1245          if (d == OP_ANY && ptr + 1 >= md->end_subject &&          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1246              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1247              NLBLOCK->nltype == NLTYPE_FIXED &&              NLBLOCK->nltype == NLTYPE_FIXED &&
1248              NLBLOCK->nllen == 2 &&              NLBLOCK->nllen == 2 &&
1249              c == NLBLOCK->nl[0])              c == NLBLOCK->nl[0])
1250            {            {
1251            could_continue = partial_newline = TRUE;            could_continue = partial_newline = TRUE;
1252            }            }
1253          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1254              (c < 256 &&              (c < 256 &&
1255                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
# Line 1256  for (;;) Line 1274  for (;;)
1274          if (d == OP_ANY && ptr + 1 >= md->end_subject &&          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1275              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&              (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1276              NLBLOCK->nltype == NLTYPE_FIXED &&              NLBLOCK->nltype == NLTYPE_FIXED &&
1277              NLBLOCK->nllen == 2 &&              NLBLOCK->nllen == 2 &&
1278              c == NLBLOCK->nl[0])              c == NLBLOCK->nl[0])
1279            {            {
1280            could_continue = partial_newline = TRUE;            could_continue = partial_newline = TRUE;
1281            }            }
1282          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1283              (c < 256 &&              (c < 256 &&
1284                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
# Line 1294  for (;;) Line 1312  for (;;)
1312        if (clen > 0)        if (clen > 0)
1313          {          {
1314          BOOL OK;          BOOL OK;
1315            const pcre_uint32 *cp;
1316          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1317          switch(code[2])          switch(code[2])
1318            {            {
# Line 1342  for (;;) Line 1361  for (;;)
1361                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1362            break;            break;
1363    
1364              case PT_CLIST:
1365              cp = PRIV(ucd_caseless_sets) + code[3];
1366              for (;;)
1367                {
1368                if (c < *cp) { OK = FALSE; break; }
1369                if (c == *cp++) { OK = TRUE; break; }
1370                }
1371              break;
1372    
1373              case PT_UCNC:
1374              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1375                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1376                   c >= 0xe000;
1377              break;
1378    
1379            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1380    
1381            default:            default:
# Line 1368  for (;;) Line 1402  for (;;)
1402        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1403        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1404        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1405        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1406          {          {
1407            int lgb, rgb;
1408          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1409          int ncount = 0;          int ncount = 0;
1410          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
# Line 1377  for (;;) Line 1412  for (;;)
1412            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1413            next_active_state--;            next_active_state--;
1414            }            }
1415            lgb = UCD_GRAPHBREAK(c);
1416          while (nptr < end_subject)          while (nptr < end_subject)
1417            {            {
1418            int nd;            dlen = 1;
1419            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1420            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1421            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1422            ncount++;            ncount++;
1423            nptr += ndlen;            lgb = rgb;
1424              nptr += dlen;
1425            }            }
1426          count++;          count++;
1427          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
# Line 1403  for (;;) Line 1440  for (;;)
1440          int ncount = 0;          int ncount = 0;
1441          switch (c)          switch (c)
1442            {            {
1443            case 0x000b:            case CHAR_VT:
1444            case 0x000c:            case CHAR_FF:
1445            case 0x0085:            case CHAR_NEL:
1446    #ifndef EBCDIC
1447            case 0x2028:            case 0x2028:
1448            case 0x2029:            case 0x2029:
1449    #endif  /* Not EBCDIC */
1450            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1451            goto ANYNL01;            goto ANYNL01;
1452    
1453            case 0x000d:            case CHAR_CR:
1454            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1455            /* Fall through */            /* Fall through */
1456    
1457            ANYNL01:            ANYNL01:
1458            case 0x000a:            case CHAR_LF:
1459            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1460              {              {
1461              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1443  for (;;) Line 1482  for (;;)
1482          BOOL OK;          BOOL OK;
1483          switch (c)          switch (c)
1484            {            {
1485            case 0x000a:            VSPACE_CASES:
           case 0x000b:  
           case 0x000c:  
           case 0x000d:  
           case 0x0085:  
           case 0x2028:  
           case 0x2029:  
1486            OK = TRUE;            OK = TRUE;
1487            break;            break;
1488    
# Line 1482  for (;;) Line 1515  for (;;)
1515          BOOL OK;          BOOL OK;
1516          switch (c)          switch (c)
1517            {            {
1518            case 0x09:      /* HT */            HSPACE_CASES:
           case 0x20:      /* SPACE */  
           case 0xa0:      /* NBSP */  
           case 0x1680:    /* OGHAM SPACE MARK */  
           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
           case 0x2000:    /* EN QUAD */  
           case 0x2001:    /* EM QUAD */  
           case 0x2002:    /* EN SPACE */  
           case 0x2003:    /* EM SPACE */  
           case 0x2004:    /* THREE-PER-EM SPACE */  
           case 0x2005:    /* FOUR-PER-EM SPACE */  
           case 0x2006:    /* SIX-PER-EM SPACE */  
           case 0x2007:    /* FIGURE SPACE */  
           case 0x2008:    /* PUNCTUATION SPACE */  
           case 0x2009:    /* THIN SPACE */  
           case 0x200A:    /* HAIR SPACE */  
           case 0x202f:    /* NARROW NO-BREAK SPACE */  
           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
           case 0x3000:    /* IDEOGRAPHIC SPACE */  
1519            OK = TRUE;            OK = TRUE;
1520            break;            break;
1521    
# Line 1541  for (;;) Line 1556  for (;;)
1556        if (clen > 0)        if (clen > 0)
1557          {          {
1558          BOOL OK;          BOOL OK;
1559            const pcre_uint32 *cp;
1560          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1561          switch(code[2])          switch(code[2])
1562            {            {
# Line 1589  for (;;) Line 1605  for (;;)
1605                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1606            break;            break;
1607    
1608              case PT_CLIST:
1609              cp = PRIV(ucd_caseless_sets) + code[3];
1610              for (;;)
1611                {
1612                if (c < *cp) { OK = FALSE; break; }
1613                if (c == *cp++) { OK = TRUE; break; }
1614                }
1615              break;
1616    
1617              case PT_UCNC:
1618              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1619                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1620                   c >= 0xe000;
1621              break;
1622    
1623            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1624    
1625            default:            default:
# Line 1624  for (;;) Line 1655  for (;;)
1655        QS2:        QS2:
1656    
1657        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1658        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1659          {          {
1660            int lgb, rgb;
1661          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1662          int ncount = 0;          int ncount = 0;
1663          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
# Line 1634  for (;;) Line 1666  for (;;)
1666            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1667            next_active_state--;            next_active_state--;
1668            }            }
1669            lgb = UCD_GRAPHBREAK(c);
1670          while (nptr < end_subject)          while (nptr < end_subject)
1671            {            {
1672            int nd;            dlen = 1;
1673            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1674            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1675            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1676            ncount++;            ncount++;
1677            nptr += ndlen;            lgb = rgb;
1678              nptr += dlen;
1679            }            }
1680          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1681          }          }
# Line 1667  for (;;) Line 1701  for (;;)
1701          int ncount = 0;          int ncount = 0;
1702          switch (c)          switch (c)
1703            {            {
1704            case 0x000b:            case CHAR_VT:
1705            case 0x000c:            case CHAR_FF:
1706            case 0x0085:            case CHAR_NEL:
1707    #ifndef EBCDIC
1708            case 0x2028:            case 0x2028:
1709            case 0x2029:            case 0x2029:
1710    #endif  /* Not EBCDIC */
1711            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1712            goto ANYNL02;            goto ANYNL02;
1713    
1714            case 0x000d:            case CHAR_CR:
1715            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1716            /* Fall through */            /* Fall through */
1717    
1718            ANYNL02:            ANYNL02:
1719            case 0x000a:            case CHAR_LF:
1720            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1721                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1722              {              {
1723              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1724              next_active_state--;              next_active_state--;
1725              }              }
1726            ADD_NEW_DATA(-(state_offset + count), 0, ncount);            ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
1727            break;            break;
1728    
1729            default:            default:
# Line 1715  for (;;) Line 1751  for (;;)
1751          BOOL OK;          BOOL OK;
1752          switch (c)          switch (c)
1753            {            {
1754            case 0x000a:            VSPACE_CASES:
           case 0x000b:  
           case 0x000c:  
           case 0x000d:  
           case 0x0085:  
           case 0x2028:  
           case 0x2029:  
1755            OK = TRUE;            OK = TRUE;
1756            break;            break;
1757    
# Line 1737  for (;;) Line 1767  for (;;)
1767              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1768              next_active_state--;              next_active_state--;
1769              }              }
1770            ADD_NEW_DATA(-(state_offset + count), 0, 0);            ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1771            }            }
1772          }          }
1773        break;        break;
# Line 1761  for (;;) Line 1791  for (;;)
1791          BOOL OK;          BOOL OK;
1792          switch (c)          switch (c)
1793            {            {
1794            case 0x09:      /* HT */            HSPACE_CASES:
           case 0x20:      /* SPACE */  
           case 0xa0:      /* NBSP */  
           case 0x1680:    /* OGHAM SPACE MARK */  
           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
           case 0x2000:    /* EN QUAD */  
           case 0x2001:    /* EM QUAD */  
           case 0x2002:    /* EN SPACE */  
           case 0x2003:    /* EM SPACE */  
           case 0x2004:    /* THREE-PER-EM SPACE */  
           case 0x2005:    /* FOUR-PER-EM SPACE */  
           case 0x2006:    /* SIX-PER-EM SPACE */  
           case 0x2007:    /* FIGURE SPACE */  
           case 0x2008:    /* PUNCTUATION SPACE */  
           case 0x2009:    /* THIN SPACE */  
           case 0x200A:    /* HAIR SPACE */  
           case 0x202f:    /* NARROW NO-BREAK SPACE */  
           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
           case 0x3000:    /* IDEOGRAPHIC SPACE */  
1795            OK = TRUE;            OK = TRUE;
1796            break;            break;
1797    
# Line 1796  for (;;) Line 1808  for (;;)
1808              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1809              next_active_state--;              next_active_state--;
1810              }              }
1811            ADD_NEW_DATA(-(state_offset + count), 0, 0);            ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1812            }            }
1813          }          }
1814        break;        break;
# Line 1813  for (;;) Line 1825  for (;;)
1825        if (clen > 0)        if (clen > 0)
1826          {          {
1827          BOOL OK;          BOOL OK;
1828            const pcre_uint32 *cp;
1829          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1830          switch(code[1 + IMM2_SIZE + 1])          switch(code[1 + IMM2_SIZE + 1])
1831            {            {
# Line 1861  for (;;) Line 1874  for (;;)
1874                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1875            break;            break;
1876    
1877              case PT_CLIST:
1878              cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
1879              for (;;)
1880                {
1881                if (c < *cp) { OK = FALSE; break; }
1882                if (c == *cp++) { OK = TRUE; break; }
1883                }
1884              break;
1885    
1886              case PT_UCNC:
1887              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1888                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1889                   c >= 0xe000;
1890              break;
1891    
1892            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1893    
1894            default:            default:
# Line 1891  for (;;) Line 1919  for (;;)
1919        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1920          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1921        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1922        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1923          {          {
1924            int lgb, rgb;
1925          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1926          int ncount = 0;          int ncount = 0;
1927          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
# Line 1900  for (;;) Line 1929  for (;;)
1929            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1930            next_active_state--;            next_active_state--;
1931            }            }
1932            lgb = UCD_GRAPHBREAK(c);
1933          while (nptr < end_subject)          while (nptr < end_subject)
1934            {            {
1935            int nd;            dlen = 1;
1936            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1937            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1938            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1939            ncount++;            ncount++;
1940            nptr += ndlen;            lgb = rgb;
1941              nptr += dlen;
1942            }            }
1943          if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)          if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1944              reset_could_continue = TRUE;              reset_could_continue = TRUE;
1945          if (++count >= GET2(code, 1))          if (++count >= GET2(code, 1))
1946            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1947          else          else
# Line 1932  for (;;) Line 1963  for (;;)
1963          int ncount = 0;          int ncount = 0;
1964          switch (c)          switch (c)
1965            {            {
1966            case 0x000b:            case CHAR_VT:
1967            case 0x000c:            case CHAR_FF:
1968            case 0x0085:            case CHAR_NEL:
1969    #ifndef EBCDIC
1970            case 0x2028:            case 0x2028:
1971            case 0x2029:            case 0x2029:
1972    #endif  /* Not EBCDIC */
1973            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1974            goto ANYNL03;            goto ANYNL03;
1975    
1976            case 0x000d:            case CHAR_CR:
1977            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1978            /* Fall through */            /* Fall through */
1979    
1980            ANYNL03:            ANYNL03:
1981            case 0x000a:            case CHAR_LF:
1982            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1983              {              {
1984              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1976  for (;;) Line 2009  for (;;)
2009          BOOL OK;          BOOL OK;
2010          switch (c)          switch (c)
2011            {            {
2012            case 0x000a:            VSPACE_CASES:
           case 0x000b:  
           case 0x000c:  
           case 0x000d:  
           case 0x0085:  
           case 0x2028:  
           case 0x2029:  
2013            OK = TRUE;            OK = TRUE;
2014            break;            break;
2015    
# Line 2018  for (;;) Line 2045  for (;;)
2045          BOOL OK;          BOOL OK;
2046          switch (c)          switch (c)
2047            {            {
2048            case 0x09:      /* HT */            HSPACE_CASES:
           case 0x20:      /* SPACE */  
           case 0xa0:      /* NBSP */  
           case 0x1680:    /* OGHAM SPACE MARK */  
           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
           case 0x2000:    /* EN QUAD */  
           case 0x2001:    /* EM QUAD */  
           case 0x2002:    /* EN SPACE */  
           case 0x2003:    /* EM SPACE */  
           case 0x2004:    /* THREE-PER-EM SPACE */  
           case 0x2005:    /* FOUR-PER-EM SPACE */  
           case 0x2006:    /* SIX-PER-EM SPACE */  
           case 0x2007:    /* FIGURE SPACE */  
           case 0x2008:    /* PUNCTUATION SPACE */  
           case 0x2009:    /* THIN SPACE */  
           case 0x200A:    /* HAIR SPACE */  
           case 0x202f:    /* NARROW NO-BREAK SPACE */  
           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
           case 0x3000:    /* IDEOGRAPHIC SPACE */  
2049            OK = TRUE;            OK = TRUE;
2050            break;            break;
2051    
# Line 2112  for (;;) Line 2121  for (;;)
2121        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
2122    
2123        case OP_EXTUNI:        case OP_EXTUNI:
2124        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
2125          {          {
2126            int lgb, rgb;
2127          const pcre_uchar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
2128          int ncount = 0;          int ncount = 0;
2129            lgb = UCD_GRAPHBREAK(c);
2130          while (nptr < end_subject)          while (nptr < end_subject)
2131            {            {
2132            int nclen = 1;            dlen = 1;
2133            GETCHARLEN(c, nptr, nclen);            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2134            if (UCD_CATEGORY(c) != ucp_M) break;            rgb = UCD_GRAPHBREAK(d);
2135              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2136            ncount++;            ncount++;
2137            nptr += nclen;            lgb = rgb;
2138              nptr += dlen;
2139            }            }
2140          if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)          if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2141              reset_could_continue = TRUE;              reset_could_continue = TRUE;
2142          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2143          }          }
2144        break;        break;
# Line 2139  for (;;) Line 2152  for (;;)
2152        case OP_ANYNL:        case OP_ANYNL:
2153        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2154          {          {
2155          case 0x000b:          case CHAR_VT:
2156          case 0x000c:          case CHAR_FF:
2157          case 0x0085:          case CHAR_NEL:
2158    #ifndef EBCDIC
2159          case 0x2028:          case 0x2028:
2160          case 0x2029:          case 0x2029:
2161    #endif  /* Not EBCDIC */
2162          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2163    
2164          case 0x000a:          case CHAR_LF:
2165          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2166          break;          break;
2167    
2168          case 0x000d:          case CHAR_CR:
2169          if (ptr + 1 >= end_subject)          if (ptr + 1 >= end_subject)
2170            {            {
2171            ADD_NEW(state_offset + 1, 0);            ADD_NEW(state_offset + 1, 0);
2172            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2173              reset_could_continue = TRUE;              reset_could_continue = TRUE;
2174            }            }
2175          else if (ptr[1] == 0x0a)          else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
2176            {            {
2177            ADD_NEW_DATA(-(state_offset + 1), 0, 1);            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2178            }            }
2179          else          else
2180            {            {
2181            ADD_NEW(state_offset + 1, 0);            ADD_NEW(state_offset + 1, 0);
2182            }            }
2183          break;          break;
2184          }          }
2185        break;        break;
# Line 2173  for (;;) Line 2188  for (;;)
2188        case OP_NOT_VSPACE:        case OP_NOT_VSPACE:
2189        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2190          {          {
2191          case 0x000a:          VSPACE_CASES:
         case 0x000b:  
         case 0x000c:  
         case 0x000d:  
         case 0x0085:  
         case 0x2028:  
         case 0x2029:  
2192          break;          break;
2193    
2194          default:          default:
# Line 2192  for (;;) Line 2201  for (;;)
2201        case OP_VSPACE:        case OP_VSPACE:
2202        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2203          {          {
2204          case 0x000a:          VSPACE_CASES:
         case 0x000b:  
         case 0x000c:  
         case 0x000d:  
         case 0x0085:  
         case 0x2028:  
         case 0x2029:  
2205          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2206          break;          break;
2207    
2208          default: break;          default:
2209            break;
2210          }          }
2211        break;        break;
2212    
# Line 2210  for (;;) Line 2214  for (;;)
2214        case OP_NOT_HSPACE:        case OP_NOT_HSPACE:
2215        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2216          {          {
2217          case 0x09:      /* HT */          HSPACE_CASES:
         case 0x20:      /* SPACE */  
         case 0xa0:      /* NBSP */  
         case 0x1680:    /* OGHAM SPACE MARK */  
         case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
         case 0x2000:    /* EN QUAD */  
         case 0x2001:    /* EM QUAD */  
         case 0x2002:    /* EN SPACE */  
         case 0x2003:    /* EM SPACE */  
         case 0x2004:    /* THREE-PER-EM SPACE */  
         case 0x2005:    /* FOUR-PER-EM SPACE */  
         case 0x2006:    /* SIX-PER-EM SPACE */  
         case 0x2007:    /* FIGURE SPACE */  
         case 0x2008:    /* PUNCTUATION SPACE */  
         case 0x2009:    /* THIN SPACE */  
         case 0x200A:    /* HAIR SPACE */  
         case 0x202f:    /* NARROW NO-BREAK SPACE */  
         case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
         case 0x3000:    /* IDEOGRAPHIC SPACE */  
2218          break;          break;
2219    
2220          default:          default:
# Line 2241  for (;;) Line 2227  for (;;)
2227        case OP_HSPACE:        case OP_HSPACE:
2228        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2229          {          {
2230          case 0x09:      /* HT */          HSPACE_CASES:
         case 0x20:      /* SPACE */  
         case 0xa0:      /* NBSP */  
         case 0x1680:    /* OGHAM SPACE MARK */  
         case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
         case 0x2000:    /* EN QUAD */  
         case 0x2001:    /* EM QUAD */  
         case 0x2002:    /* EN SPACE */  
         case 0x2003:    /* EM SPACE */  
         case 0x2004:    /* THREE-PER-EM SPACE */  
         case 0x2005:    /* FOUR-PER-EM SPACE */  
         case 0x2006:    /* SIX-PER-EM SPACE */  
         case 0x2007:    /* FIGURE SPACE */  
         case 0x2008:    /* PUNCTUATION SPACE */  
         case 0x2009:    /* THIN SPACE */  
         case 0x200A:    /* HAIR SPACE */  
         case 0x202f:    /* NARROW NO-BREAK SPACE */  
         case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
         case 0x3000:    /* IDEOGRAPHIC SPACE */  
2231          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2232          break;          break;
2233    
2234            default:
2235            break;
2236          }          }
2237        break;        break;
2238    
# Line 2277  for (;;) Line 2248  for (;;)
2248    
2249        case OP_NOTI:        case OP_NOTI:
2250        if (clen > 0)        if (clen > 0)
2251          {          {
2252          unsigned int otherd;          unsigned int otherd;
2253  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
2254          if (utf && d >= 128)          if (utf && d >= 128)
# Line 2291  for (;;) Line 2262  for (;;)
2262          otherd = TABLE_GET(d, fcc, d);          otherd = TABLE_GET(d, fcc, d);
2263          if (c != d && c != otherd)          if (c != d && c != otherd)
2264            { ADD_NEW(state_offset + dlen + 1, 0); }            { ADD_NEW(state_offset + dlen + 1, 0); }
2265          }          }
2266        break;        break;
2267    
2268        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 2315  for (;;) Line 2286  for (;;)
2286        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
2287        if (clen > 0)        if (clen > 0)
2288          {          {
2289          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2290          if (caseless)          if (caseless)
2291            {            {
2292  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 2362  for (;;) Line 2333  for (;;)
2333        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
2334        if (clen > 0)        if (clen > 0)
2335          {          {
2336          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2337          if (caseless)          if (caseless)
2338            {            {
2339  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 2407  for (;;) Line 2378  for (;;)
2378        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
2379        if (clen > 0)        if (clen > 0)
2380          {          {
2381          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2382          if (caseless)          if (caseless)
2383            {            {
2384  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 2444  for (;;) Line 2415  for (;;)
2415        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2416        if (clen > 0)        if (clen > 0)
2417          {          {
2418          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2419          if (caseless)          if (caseless)
2420            {            {
2421  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 2488  for (;;) Line 2459  for (;;)
2459        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2460        if (clen > 0)        if (clen > 0)
2461          {          {
2462          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2463          if (caseless)          if (caseless)
2464            {            {
2465  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 2586  for (;;) Line 2557  for (;;)
2557              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2558            if (isinclass)            if (isinclass)
2559              {              {
2560              int max = GET2(ecode, 1 + IMM2_SIZE);              unsigned int max = GET2(ecode, 1 + IMM2_SIZE);
2561              if (++count >= max && max != 0)   /* Max 0 => no limit */              if (++count >= max && max != 0)   /* Max 0 => no limit */
2562                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2563              else              else
# Line 2662  for (;;) Line 2633  for (;;)
2633              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2634              cb.callout_number   = code[LINK_SIZE+2];              cb.callout_number   = code[LINK_SIZE+2];
2635              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
2636  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
2637              cb.subject          = (PCRE_SPTR)start_subject;              cb.subject          = (PCRE_SPTR)start_subject;
2638  #else  #elif defined COMPILE_PCRE16
2639              cb.subject          = (PCRE_SPTR16)start_subject;              cb.subject          = (PCRE_SPTR16)start_subject;
2640    #elif defined COMPILE_PCRE32
2641                cb.subject          = (PCRE_SPTR32)start_subject;
2642  #endif  #endif
2643              cb.subject_length   = (int)(end_subject - start_subject);              cb.subject_length   = (int)(end_subject - start_subject);
2644              cb.start_match      = (int)(current_subject - start_subject);              cb.start_match      = (int)(current_subject - start_subject);
# Line 2796  for (;;) Line 2769  for (;;)
2769            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2770              {              {
2771              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2772  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2773              const pcre_uchar *p = start_subject + local_offsets[rc];              if (utf)
2774              const pcre_uchar *pp = start_subject + local_offsets[rc+1];                {
2775              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;                const pcre_uchar *p = start_subject + local_offsets[rc];
2776                  const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2777                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2778                  }
2779  #endif  #endif
2780              if (charcount > 0)              if (charcount > 0)
2781                {                {
# Line 2897  for (;;) Line 2873  for (;;)
2873              const pcre_uchar *p = ptr;              const pcre_uchar *p = ptr;
2874              const pcre_uchar *pp = local_ptr;              const pcre_uchar *pp = local_ptr;
2875              charcount = (int)(pp - p);              charcount = (int)(pp - p);
2876  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2877              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;              if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2878  #endif  #endif
2879              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2880              }              }
# Line 2979  for (;;) Line 2955  for (;;)
2955              }              }
2956            else            else
2957              {              {
2958  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2959              const pcre_uchar *p = start_subject + local_offsets[0];              if (utf)
2960              const pcre_uchar *pp = start_subject + local_offsets[1];                {
2961              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;                const pcre_uchar *p = start_subject + local_offsets[0];
2962                  const pcre_uchar *pp = start_subject + local_offsets[1];
2963                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2964                  }
2965  #endif  #endif
2966              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2967              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
# Line 3005  for (;;) Line 2984  for (;;)
2984          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2985          cb.callout_number   = code[1];          cb.callout_number   = code[1];
2986          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
2987  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
2988          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
2989  #else  #elif defined COMPILE_PCRE16
2990          cb.subject          = (PCRE_SPTR16)start_subject;          cb.subject          = (PCRE_SPTR16)start_subject;
2991    #elif defined COMPILE_PCRE32
2992            cb.subject          = (PCRE_SPTR32)start_subject;
2993  #endif  #endif
2994          cb.subject_length   = (int)(end_subject - start_subject);          cb.subject_length   = (int)(end_subject - start_subject);
2995          cb.start_match      = (int)(current_subject - start_subject);          cb.start_match      = (int)(current_subject - start_subject);
# Line 3047  for (;;) Line 3028  for (;;)
3028    
3029    The "could_continue" variable is true if a state could have continued but    The "could_continue" variable is true if a state could have continued but
3030    for the fact that the end of the subject was reached. */    for the fact that the end of the subject was reached. */
3031    
3032    if (new_count <= 0)    if (new_count <= 0)
3033      {      {
3034      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
# Line 3064  for (;;) Line 3045  for (;;)
3045            (                                          /* or ... */            (                                          /* or ... */
3046            ptr >= end_subject &&                /* End of subject and */            ptr >= end_subject &&                /* End of subject and */
3047            ptr > md->start_used_ptr)            /* Inspected non-empty string */            ptr > md->start_used_ptr)            /* Inspected non-empty string */
3048            )            )
3049          )          )
       {  
       if (offsetcount >= 2)  
         {  
         offsets[0] = (int)(md->start_used_ptr - start_subject);  
         offsets[1] = (int)(end_subject - start_subject);  
         }  
3050        match_count = PCRE_ERROR_PARTIAL;        match_count = PCRE_ERROR_PARTIAL;
       }  
   
3051      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
3052        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
3053        rlevel*2-2, SP));        rlevel*2-2, SP));
# Line 3124  Returns:          > 0 => number of match Line 3097  Returns:          > 0 => number of match
3097                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3098  */  */
3099    
3100  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
3101  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3102  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3103    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
3104    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3105  #else  #elif defined COMPILE_PCRE16
3106  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3107  pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,  pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3108    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3109    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3110    #elif defined COMPILE_PCRE32
3111    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3112    pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
3113      PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
3114      int offsetcount, int *workspace, int wscount)
3115  #endif  #endif
3116  {  {
3117  REAL_PCRE *re = (REAL_PCRE *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
# Line 3160  if (re == NULL || subject == NULL || wor Line 3138  if (re == NULL || subject == NULL || wor
3138     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3139  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3140  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3141    if (length < 0) return PCRE_ERROR_BADLENGTH;
3142  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3143    
3144  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
# Line 3172  if (re->magic_number != MAGIC_NUMBER) Line 3151  if (re->magic_number != MAGIC_NUMBER)
3151      PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;      PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3152  if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;  if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3153    
3154  /* If restarting after a partial match, do some sanity checks on the contents  /* If restarting after a partial match, do some sanity checks on the contents
3155  of the workspace. */  of the workspace. */
3156    
3157  if ((options & PCRE_DFA_RESTART) != 0)  if ((options & PCRE_DFA_RESTART) != 0)
3158    {    {
3159    if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||    if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3160      workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)      workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3161        return PCRE_ERROR_DFA_BADRESTART;        return PCRE_ERROR_DFA_BADRESTART;
3162    }    }
3163    
3164  /* Set up study, callout, and table data */  /* Set up study, callout, and table data */
3165    
# Line 3208  end_subject = (const pcre_uchar *)subjec Line 3187  end_subject = (const pcre_uchar *)subjec
3187  req_char_ptr = current_subject - 1;  req_char_ptr = current_subject - 1;
3188    
3189  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3190  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
3191  utf = (re->options & PCRE_UTF8) != 0;  utf = (re->options & PCRE_UTF8) != 0;
3192  #else  #else
3193  utf = FALSE;  utf = FALSE;
# Line 3294  if (utf && (options & PCRE_NO_UTF8_CHECK Line 3273  if (utf && (options & PCRE_NO_UTF8_CHECK
3273        offsets[0] = erroroffset;        offsets[0] = erroroffset;
3274        offsets[1] = errorcode;        offsets[1] = errorcode;
3275        }        }
3276      return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?  #if defined COMPILE_PCRE8
3277        return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
3278        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3279    #elif defined COMPILE_PCRE16
3280        return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
3281          PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
3282    #elif defined COMPILE_PCRE32
3283        return PCRE_ERROR_BADUTF32;
3284    #endif
3285      }      }
3286    #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
3287    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
3288          NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))          NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3289      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
3290    #endif
3291    }    }
3292  #endif  #endif
3293    
# Line 3409  for (;;) Line 3397  for (;;)
3397        if (has_first_char)        if (has_first_char)
3398          {          {
3399          if (first_char != first_char2)          if (first_char != first_char2)
3400              {
3401              pcre_uchar csc;
3402            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3403                *current_subject != first_char && *current_subject != first_char2)                   (csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
3404              current_subject++;              current_subject++;
3405              }
3406          else          else
3407            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3408                   *current_subject != first_char)                   RAWUCHARTEST(current_subject) != first_char)
3409              current_subject++;              current_subject++;
3410          }          }
3411    
# Line 3444  for (;;) Line 3435  for (;;)
3435            ANYCRLF, and we are now at a LF, advance the match position by one            ANYCRLF, and we are now at a LF, advance the match position by one
3436            more character. */            more character. */
3437    
3438            if (current_subject[-1] == CHAR_CR &&            if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
3439                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
3440                 current_subject < end_subject &&                 current_subject < end_subject &&
3441                 *current_subject == CHAR_NL)                 RAWUCHARTEST(current_subject) == CHAR_NL)
3442              current_subject++;              current_subject++;
3443            }            }
3444          }          }
# Line 3458  for (;;) Line 3449  for (;;)
3449          {          {
3450          while (current_subject < end_subject)          while (current_subject < end_subject)
3451            {            {
3452            register unsigned int c = *current_subject;            register pcre_uint32 c = RAWUCHARTEST(current_subject);
3453  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3454            if (c > 255) c = 255;            if (c > 255) c = 255;
3455  #endif  #endif
# Line 3524  for (;;) Line 3515  for (;;)
3515              {              {
3516              while (p < end_subject)              while (p < end_subject)
3517                {                {
3518                register int pp = *p++;                register pcre_uint32 pp = RAWUCHARINCTEST(p);
3519                if (pp == req_char || pp == req_char2) { p--; break; }                if (pp == req_char || pp == req_char2) { p--; break; }
3520                }                }
3521              }              }
# Line 3532  for (;;) Line 3523  for (;;)
3523              {              {
3524              while (p < end_subject)              while (p < end_subject)
3525                {                {
3526                if (*p++ == req_char) { p--; break; }                if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
3527                }                }
3528              }              }
3529    
# Line 3570  for (;;) Line 3561  for (;;)
3561    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
3562    on only if not anchored. */    on only if not anchored. */
3563    
3564    if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;    if (rc != PCRE_ERROR_NOMATCH || anchored)
3565        {
3566        if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
3567          {
3568          offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
3569          offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
3570          if (offsetcount > 2)
3571            offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
3572          }
3573        return rc;
3574        }
3575    
3576    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
3577    and firstline is set. */    and firstline is set. */
# Line 3590  for (;;) Line 3591  for (;;)
3591    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
3592    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. */
3593    
3594    if (current_subject[-1] == CHAR_CR &&    if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
3595        current_subject < end_subject &&        current_subject < end_subject &&
3596        *current_subject == CHAR_NL &&        RAWUCHARTEST(current_subject) == CHAR_NL &&
3597        (re->flags & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
3598          (md->nltype == NLTYPE_ANY ||          (md->nltype == NLTYPE_ANY ||
3599           md->nltype == NLTYPE_ANYCRLF ||           md->nltype == NLTYPE_ANYCRLF ||

Legend:
Removed from v.960  
changed lines
  Added in v.1260

  ViewVC Help
Powered by ViewVC 1.1.5