/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2006 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 43  alternative matching function that uses Line 43  alternative matching function that uses
43  compatible, but it has advantages in certain applications. */  compatible, but it has advantages in certain applications. */
44    
45    
46    #define NLBLOCK md           /* The block containing newline information */
47  #include "pcre_internal.h"  #include "pcre_internal.h"
48    
49    
# Line 288  const uschar *start_subject = md->start_ Line 289  const uschar *start_subject = md->start_
289  const uschar *end_subject = md->end_subject;  const uschar *end_subject = md->end_subject;
290  const uschar *start_code = md->start_code;  const uschar *start_code = md->start_code;
291    
292    #ifdef SUPPORT_UTF8
293  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
294    #endif
295    
296  rlevel++;  rlevel++;
297  offsetcount &= (-2);  offsetcount &= (-2);
# Line 421  ptr = current_subject; Line 424  ptr = current_subject;
424  for (;;)  for (;;)
425    {    {
426    int i, j;    int i, j;
427    int c, d, clen, dlen;    int clen, dlen;
428      unsigned int c, d;
429    
430    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
431    new state list. */    new state list. */
# Line 480  for (;;) Line 484  for (;;)
484      const uschar *code;      const uschar *code;
485      int state_offset = current_state->offset;      int state_offset = current_state->offset;
486      int count, codevalue;      int count, codevalue;
487      int chartype, othercase;      int chartype, script;
488    
489  #ifdef DEBUG  #ifdef DEBUG
490      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 645  for (;;) Line 649  for (;;)
649        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
650        case OP_CIRC:        case OP_CIRC:
651        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
652            ((ims & PCRE_MULTILINE) != 0 && ptr[-1] == NEWLINE))            ((ims & PCRE_MULTILINE) != 0 &&
653                ptr >= start_subject + md->nllen &&
654                ptr != end_subject &&
655                IS_NEWLINE(ptr - md->nllen)))
656          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
657        break;        break;
658    
# Line 679  for (;;) Line 686  for (;;)
686    
687        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
688        case OP_ANY:        case OP_ANY:
689        if (clen > 0 && (c != NEWLINE || (ims & PCRE_DOTALL) != 0))        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 ||
690                           ptr > end_subject - md->nllen ||
691                           !IS_NEWLINE(ptr)))
692          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
693        break;        break;
694    
695        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
696        case OP_EODN:        case OP_EODN:
697        if (clen == 0 || (c == NEWLINE && ptr + 1 == end_subject))        if (clen == 0 ||
698               (ptr == end_subject - md->nllen && IS_NEWLINE(ptr)))
699          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
700        break;        break;
701    
# Line 693  for (;;) Line 703  for (;;)
703        case OP_DOLL:        case OP_DOLL:
704        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
705          {          {
706          if (clen == 0 || (c == NEWLINE && (ptr + 1 == end_subject ||          if (clen == 0 ||
707                                  (ims & PCRE_MULTILINE) != 0)))              (ptr <= end_subject - md->nllen && IS_NEWLINE(ptr) &&
708                   ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
709                ))
710            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
711          }          }
712        else if (c == NEWLINE && (ims & PCRE_MULTILINE) != 0)        else if ((ims & PCRE_MULTILINE) != 0 &&
713                   ptr <= end_subject - md->nllen && IS_NEWLINE(ptr))
714          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
715        break;        break;
716    
# Line 757  for (;;) Line 770  for (;;)
770        case OP_NOTPROP:        case OP_NOTPROP:
771        if (clen > 0)        if (clen > 0)
772          {          {
773          int rqdtype, category;          BOOL OK;
774          category = _pcre_ucp_findchar(c, &chartype, &othercase);          int category = _pcre_ucp_findprop(c, &chartype, &script);
775          rqdtype = code[1];          switch(code[1])
         if (rqdtype >= 128)  
           {  
           if ((rqdtype - 128 == category) == (codevalue == OP_PROP))  
             { ADD_NEW(state_offset + 2, 0); }  
           }  
         else  
776            {            {
777            if ((rqdtype == chartype) == (codevalue == OP_PROP))            case PT_ANY:
778              { ADD_NEW(state_offset + 2, 0); }            OK = TRUE;
779              break;
780    
781              case PT_LAMP:
782              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
783              break;
784    
785              case PT_GC:
786              OK = category == code[2];
787              break;
788    
789              case PT_PC:
790              OK = chartype == code[2];
791              break;
792    
793              case PT_SC:
794              OK = script == code[2];
795              break;
796    
797              /* Should never occur, but keep compilers from grumbling. */
798    
799              default:
800              OK = codevalue != OP_PROP;
801              break;
802            }            }
803    
804            if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
805          }          }
806        break;        break;
807  #endif  #endif
# Line 790  for (;;) Line 822  for (;;)
822          {          {
823          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
824              (c < 256 &&              (c < 256 &&
825                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
826                   (ims & PCRE_DOTALL) != 0 ||
827                   ptr > end_subject - md->nllen ||
828                   !IS_NEWLINE(ptr)
829                  ) &&
830                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
831            {            {
832            count++;            count++;
# Line 807  for (;;) Line 843  for (;;)
843          {          {
844          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
845              (c < 256 &&              (c < 256 &&
846                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
847                   (ims & PCRE_DOTALL) != 0 ||
848                   ptr > end_subject - md->nllen ||
849                   !IS_NEWLINE(ptr)
850                  ) &&
851                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
852            {            {
853            ADD_NEW(state_offset + 2, 0);            ADD_NEW(state_offset + 2, 0);
# Line 823  for (;;) Line 863  for (;;)
863          {          {
864          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
865              (c < 256 &&              (c < 256 &&
866                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
867                   (ims & PCRE_DOTALL) != 0 ||
868                   ptr > end_subject - md->nllen ||
869                   !IS_NEWLINE(ptr)
870                  ) &&
871                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
872            {            {
873            ADD_NEW(state_offset, 0);            ADD_NEW(state_offset, 0);
# Line 842  for (;;) Line 886  for (;;)
886          {          {
887          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
888              (c < 256 &&              (c < 256 &&
889                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
890                   (ims & PCRE_DOTALL) != 0 ||
891                   ptr > end_subject - md->nllen ||
892                   !IS_NEWLINE(ptr)
893                  ) &&
894                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
895            {            {
896            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
# Line 862  for (;;) Line 910  for (;;)
910        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
911        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
912        count = current_state->count;           /* Already matched */        count = current_state->count;           /* Already matched */
913        if (count > 0) { ADD_ACTIVE(state_offset + 3, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
914        if (clen > 0)        if (clen > 0)
915          {          {
916          int category = _pcre_ucp_findchar(c, &chartype, &othercase);          BOOL OK;
917          int rqdtype = code[2];          int category = _pcre_ucp_findprop(c, &chartype, &script);
918          if ((d == OP_PROP) ==          switch(code[2])
919              (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))            {
920            { count++; ADD_NEW(state_offset, count); }            case PT_ANY:
921              OK = TRUE;
922              break;
923    
924              case PT_LAMP:
925              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
926              break;
927    
928              case PT_GC:
929              OK = category == code[3];
930              break;
931    
932              case PT_PC:
933              OK = chartype == code[3];
934              break;
935    
936              case PT_SC:
937              OK = script == code[3];
938              break;
939    
940              /* Should never occur, but keep compilers from grumbling. */
941    
942              default:
943              OK = codevalue != OP_PROP;
944              break;
945              }
946    
947            if (OK == (d == OP_PROP)) { count++; ADD_NEW(state_offset, count); }
948          }          }
949        break;        break;
950    
# Line 878  for (;;) Line 953  for (;;)
953        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
954        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
955        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
956        if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
957          {          {
958          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
959          int ncount = 0;          int ncount = 0;
# Line 887  for (;;) Line 962  for (;;)
962            int nd;            int nd;
963            int ndlen = 1;            int ndlen = 1;
964            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
965            if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
966            ncount++;            ncount++;
967            nptr += ndlen;            nptr += ndlen;
968            }            }
# Line 899  for (;;) Line 974  for (;;)
974        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
975        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
976        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
977        count = 3;        count = 4;
978        goto QS1;        goto QS1;
979    
980        case OP_PROP_EXTRA + OP_TYPESTAR:        case OP_PROP_EXTRA + OP_TYPESTAR:
# Line 908  for (;;) Line 983  for (;;)
983    
984        QS1:        QS1:
985    
986        ADD_ACTIVE(state_offset + 3, 0);        ADD_ACTIVE(state_offset + 4, 0);
987        if (clen > 0)        if (clen > 0)
988          {          {
989          int category = _pcre_ucp_findchar(c, &chartype, &othercase);          BOOL OK;
990          int rqdtype = code[2];          int category = _pcre_ucp_findprop(c, &chartype, &script);
991          if ((d == OP_PROP) ==          switch(code[2])
992              (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))            {
993            { ADD_NEW(state_offset + count, 0); }            case PT_ANY:
994              OK = TRUE;
995              break;
996    
997              case PT_LAMP:
998              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
999              break;
1000    
1001              case PT_GC:
1002              OK = category == code[3];
1003              break;
1004    
1005              case PT_PC:
1006              OK = chartype == code[3];
1007              break;
1008    
1009              case PT_SC:
1010              OK = script == code[3];
1011              break;
1012    
1013              /* Should never occur, but keep compilers from grumbling. */
1014    
1015              default:
1016              OK = codevalue != OP_PROP;
1017              break;
1018              }
1019    
1020            if (OK == (d == OP_PROP)) { ADD_NEW(state_offset + count, 0); }
1021          }          }
1022        break;        break;
1023    
# Line 932  for (;;) Line 1034  for (;;)
1034        QS2:        QS2:
1035    
1036        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1037        if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1038          {          {
1039          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1040          int ncount = 0;          int ncount = 0;
# Line 941  for (;;) Line 1043  for (;;)
1043            int nd;            int nd;
1044            int ndlen = 1;            int ndlen = 1;
1045            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1046            if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1047            ncount++;            ncount++;
1048            nptr += ndlen;            nptr += ndlen;
1049            }            }
# Line 954  for (;;) Line 1056  for (;;)
1056        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1057        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1058        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1059          { ADD_ACTIVE(state_offset + 5, 0); }          { ADD_ACTIVE(state_offset + 6, 0); }
1060        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1061        if (clen > 0)        if (clen > 0)
1062          {          {
1063          int category = _pcre_ucp_findchar(c, &chartype, &othercase);          BOOL OK;
1064          int rqdtype = code[4];          int category = _pcre_ucp_findprop(c, &chartype, &script);
1065          if ((d == OP_PROP) ==          switch(code[4])
1066              (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))            {
1067              case PT_ANY:
1068              OK = TRUE;
1069              break;
1070    
1071              case PT_LAMP:
1072              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
1073              break;
1074    
1075              case PT_GC:
1076              OK = category == code[5];
1077              break;
1078    
1079              case PT_PC:
1080              OK = chartype == code[5];
1081              break;
1082    
1083              case PT_SC:
1084              OK = script == code[5];
1085              break;
1086    
1087              /* Should never occur, but keep compilers from grumbling. */
1088    
1089              default:
1090              OK = codevalue != OP_PROP;
1091              break;
1092              }
1093    
1094            if (OK == (d == OP_PROP))
1095            {            {
1096            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1097              { ADD_NEW(state_offset + 5, 0); }              { ADD_NEW(state_offset + 6, 0); }
1098            else            else
1099              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1100            }            }
# Line 978  for (;;) Line 1108  for (;;)
1108        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1109          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1110        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1111        if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1112          {          {
1113          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1114          int ncount = 0;          int ncount = 0;
# Line 987  for (;;) Line 1117  for (;;)
1117            int nd;            int nd;
1118            int ndlen = 1;            int ndlen = 1;
1119            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1120            if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1121            ncount++;            ncount++;
1122            nptr += ndlen;            nptr += ndlen;
1123            }            }
# Line 1018  for (;;) Line 1148  for (;;)
1148          {          {
1149          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1150            {            {
1151              int othercase;
1152            if (c < 128) othercase = fcc[c]; else            if (c < 128) othercase = fcc[c]; else
1153    
1154            /* If we have Unicode property support, we can use it to test the            /* If we have Unicode property support, we can use it to test the
1155            other case of the character, if there is one. The result of            other case of the character. */
           _pcre_ucp_findchar() is < 0 if the char isn't found, and othercase is  
           returned as zero if there isn't another case. */  
1156    
1157  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1158            if (_pcre_ucp_findchar(c, &chartype, &othercase) < 0)            othercase = _pcre_ucp_othercase(c);
1159    #else
1160              othercase = -1;
1161  #endif  #endif
             othercase = -1;  
1162    
1163            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
1164            }            }
# Line 1050  for (;;) Line 1180  for (;;)
1180        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
1181    
1182        case OP_EXTUNI:        case OP_EXTUNI:
1183        if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1184          {          {
1185          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1186          int ncount = 0;          int ncount = 0;
# Line 1058  for (;;) Line 1188  for (;;)
1188            {            {
1189            int nclen = 1;            int nclen = 1;
1190            GETCHARLEN(c, nptr, nclen);            GETCHARLEN(c, nptr, nclen);
1191            if (_pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
1192            ncount++;            ncount++;
1193            nptr += nclen;            nptr += nclen;
1194            }            }
# Line 1093  for (;;) Line 1223  for (;;)
1223          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1224            {            {
1225  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1226            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1227              {              {
1228  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1229              if (_pcre_ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1230  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1231              }              }
1232            else            else
# Line 1117  for (;;) Line 1247  for (;;)
1247        if (clen > 0)        if (clen > 0)
1248          {          {
1249          int otherd = -1;          int otherd = -1;
1250          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1251            {            {
1252  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1253            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1254              {              {
1255  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1256              if (_pcre_ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1257  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1258              }              }
1259            else            else
# Line 1144  for (;;) Line 1274  for (;;)
1274        if (clen > 0)        if (clen > 0)
1275          {          {
1276          int otherd = -1;          int otherd = -1;
1277          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1278            {            {
1279  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1280            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1281              {              {
1282  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1283              if (_pcre_ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1284  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1285              }              }
1286            else            else
# Line 1178  for (;;) Line 1308  for (;;)
1308          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1309            {            {
1310  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1311            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1312              {              {
1313  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1314              if (_pcre_ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1315  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1316              }              }
1317            else            else
# Line 1267  for (;;) Line 1397  for (;;)
1397              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 5, 0); }
1398            if (isinclass)            if (isinclass)
1399              {              {
1400              if (++count >= GET2(ecode, 3))              int max = GET2(ecode, 3);
1401                if (++count >= max && max != 0)   /* Max 0 => no limit */
1402                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 5, 0); }
1403              else              else
1404                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
# Line 1519  for (;;) Line 1650  for (;;)
1650          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
1651          cb.callout_number   = code[1];          cb.callout_number   = code[1];
1652          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
1653          cb.subject          = (char *)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
1654          cb.subject_length   = end_subject - start_subject;          cb.subject_length   = end_subject - start_subject;
1655          cb.start_match      = current_subject - start_subject;          cb.start_match      = current_subject - start_subject;
1656          cb.current_position = ptr - start_subject;          cb.current_position = ptr - start_subject;
# Line 1567  for (;;) Line 1698  for (;;)
1698      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
1699        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
1700        rlevel*2-2, SP));        rlevel*2-2, SP));
1701      return match_count;      break;        /* In effect, "return", but see the comment below */
1702      }      }
1703    
1704    /* One or more states are active for the next character. */    /* One or more states are active for the next character. */
# Line 1575  for (;;) Line 1706  for (;;)
1706    ptr += clen;    /* Advance to next subject character */    ptr += clen;    /* Advance to next subject character */
1707    }               /* Loop to move along the subject string */    }               /* Loop to move along the subject string */
1708    
1709  /* Control never gets here, but we must keep the compiler happy. */  /* Control gets here from "break" a few lines above. We do it this way because
1710    if we use "return" above, we have compiler trouble. Some compilers warn if
1711    there's nothing here because they think the function doesn't return a value. On
1712    the other hand, if we put a dummy statement here, some more clever compilers
1713    complain that it can't be reached. Sigh. */
1714    
1715  DPRINTF(("%.*s+++ Unexpected end of internal_dfa_exec %d +++\n"  return match_count;
   "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, rlevel*2-2, SP));  
 return PCRE_ERROR_NOMATCH;  
1716  }  }
1717    
1718    
# Line 1611  Returns:          > 0 => number of match Line 1744  Returns:          > 0 => number of match
1744                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
1745  */  */
1746    
1747  PCRE_EXPORT int  PCRE_DATA_SCOPE int
1748  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
1749    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
1750    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
1751  {  {
1752  real_pcre *re = (real_pcre *)argument_re;  real_pcre *re = (real_pcre *)argument_re;
1753  dfa_match_data match_block;  dfa_match_data match_block;
1754    dfa_match_data *md = &match_block;
1755  BOOL utf8, anchored, startline, firstline;  BOOL utf8, anchored, startline, firstline;
1756  const uschar *current_subject, *end_subject, *lcc;  const uschar *current_subject, *end_subject, *lcc;
1757    
# Line 1632  BOOL req_byte_caseless = FALSE; Line 1766  BOOL req_byte_caseless = FALSE;
1766  int first_byte = -1;  int first_byte = -1;
1767  int req_byte = -1;  int req_byte = -1;
1768  int req_byte2 = -1;  int req_byte2 = -1;
1769    int newline;
1770    
1771  /* Plausibility checks */  /* Plausibility checks */
1772    
# Line 1646  flipping, so we scan the extra_data bloc Line 1781  flipping, so we scan the extra_data bloc
1781  match block, so we must initialize them beforehand. However, the other fields  match block, so we must initialize them beforehand. However, the other fields
1782  in the match block must not be set until after the byte flipping. */  in the match block must not be set until after the byte flipping. */
1783    
1784  match_block.tables = re->tables;  md->tables = re->tables;
1785  match_block.callout_data = NULL;  md->callout_data = NULL;
1786    
1787  if (extra_data != NULL)  if (extra_data != NULL)
1788    {    {
# Line 1655  if (extra_data != NULL) Line 1790  if (extra_data != NULL)
1790    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
1791      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
1792    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
1793      if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
1794        return PCRE_ERROR_DFA_UMLIMIT;
1795    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
1796      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
1797    if ((flags & PCRE_EXTRA_TABLES) != 0)    if ((flags & PCRE_EXTRA_TABLES) != 0)
1798      match_block.tables = extra_data->tables;      md->tables = extra_data->tables;
1799    }    }
1800    
1801  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
# Line 1679  current_subject = (const unsigned char * Line 1816  current_subject = (const unsigned char *
1816  end_subject = (const unsigned char *)subject + length;  end_subject = (const unsigned char *)subject + length;
1817  req_byte_ptr = current_subject - 1;  req_byte_ptr = current_subject - 1;
1818    
1819    #ifdef SUPPORT_UTF8
1820  utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = (re->options & PCRE_UTF8) != 0;
1821  anchored = (options & PCRE_ANCHORED) != 0 || (re->options & PCRE_ANCHORED) != 0;  #else
1822    utf8 = FALSE;
1823    #endif
1824    
1825    anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
1826      (re->options & PCRE_ANCHORED) != 0;
1827    
1828  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
1829    
1830  match_block.start_code = (const uschar *)argument_re +  md->start_code = (const uschar *)argument_re +
1831      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
1832  match_block.start_subject = (const unsigned char *)subject;  md->start_subject = (const unsigned char *)subject;
1833  match_block.end_subject = end_subject;  md->end_subject = end_subject;
1834  match_block.moptions = options;  md->moptions = options;
1835  match_block.poptions = re->options;  md->poptions = re->options;
1836    
1837    /* Handle different types of newline. The two bits give four cases. If nothing
1838    is set at run time, whatever was used at compile time applies. */
1839    
1840    switch ((((options & PCRE_NEWLINE_CRLF) == 0)? re->options : options) &
1841             PCRE_NEWLINE_CRLF)
1842      {
1843      default:              newline = NEWLINE; break;   /* Compile-time default */
1844      case PCRE_NEWLINE_CR: newline = '\r'; break;
1845      case PCRE_NEWLINE_LF: newline = '\n'; break;
1846      case PCRE_NEWLINE_CR+
1847           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
1848      }
1849    
1850    if (newline > 255)
1851      {
1852      md->nllen = 2;
1853      md->nl[0] = (newline >> 8) & 255;
1854      md->nl[1] = newline & 255;
1855      }
1856    else
1857      {
1858      md->nllen = 1;
1859      md->nl[0] = newline;
1860      }
1861    
1862  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
1863  back the character offset. */  back the character offset. */
# Line 1715  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 1883  if (utf8 && (options & PCRE_NO_UTF8_CHEC
1883  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
1884  in other programs later. */  in other programs later. */
1885    
1886  if (match_block.tables == NULL) match_block.tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = _pcre_default_tables;
1887    
1888  /* The lower casing table and the "must be at the start of a line" flag are  /* The lower casing table and the "must be at the start of a line" flag are
1889  used in a loop when finding where to start. */  used in a loop when finding where to start. */
1890    
1891  lcc = match_block.tables + lcc_offset;  lcc = md->tables + lcc_offset;
1892  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->options & PCRE_STARTLINE) != 0;
1893  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
1894    
# Line 1753  if ((re->options & PCRE_REQCHSET) != 0) Line 1921  if ((re->options & PCRE_REQCHSET) != 0)
1921    {    {
1922    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
1923    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
1924    req_byte2 = (match_block.tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */
1925    }    }
1926    
1927  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 1771  for (;;) Line 1939  for (;;)
1939    
1940      /* Advance to a unique first char if possible. If firstline is TRUE, the      /* Advance to a unique first char if possible. If firstline is TRUE, the
1941      start of the match is constrained to the first line of a multiline string.      start of the match is constrained to the first line of a multiline string.
1942      Implement this by temporarily adjusting end_subject so that we stop scanning      Implement this by temporarily adjusting end_subject so that we stop
1943      at a newline. If the match fails at the newline, later code breaks this loop.      scanning at a newline. If the match fails at the newline, later code breaks
1944      */      this loop. */
1945    
1946      if (firstline)      if (firstline)
1947        {        {
1948        const uschar *t = current_subject;        const uschar *t = current_subject;
1949        while (t < save_end_subject && *t != '\n') t++;        while (t <= save_end_subject - md->nllen && !IS_NEWLINE(t)) t++;
1950        end_subject = t;        end_subject = t;
1951        }        }
1952    
# Line 1793  for (;;) Line 1961  for (;;)
1961            current_subject++;            current_subject++;
1962        }        }
1963    
1964      /* Or to just after \n for a multiline match if possible */      /* Or to just after a linebreak for a multiline match if possible */
1965    
1966      else if (startline)      else if (startline)
1967        {        {
1968        if (current_subject > match_block.start_subject + start_offset)        if (current_subject > md->start_subject + md->nllen +
1969              start_offset)
1970          {          {
1971          while (current_subject < end_subject && current_subject[-1] != NEWLINE)          while (current_subject <= end_subject &&
1972                   !IS_NEWLINE(current_subject - md->nllen))
1973            current_subject++;            current_subject++;
1974          }          }
1975        }        }
# Line 1880  for (;;) Line 2050  for (;;)
2050    /* OK, now we can do the business */    /* OK, now we can do the business */
2051    
2052    rc = internal_dfa_exec(    rc = internal_dfa_exec(
2053      &match_block,                              /* fixed match data */      md,                                /* fixed match data */
2054      match_block.start_code,                    /* this subexpression's code */      md->start_code,                    /* this subexpression's code */
2055      current_subject,                           /* where we currently are */      current_subject,                   /* where we currently are */
2056      start_offset,                              /* start offset in subject */      start_offset,                      /* start offset in subject */
2057      offsets,                                   /* offset vector */      offsets,                           /* offset vector */
2058      offsetcount,                               /* size of same */      offsetcount,                       /* size of same */
2059      workspace,                                 /* workspace vector */      workspace,                         /* workspace vector */
2060      wscount,                                   /* size of same */      wscount,                           /* size of same */
2061      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */
2062      0,                                         /* function recurse level */      0,                                 /* function recurse level */
2063      0);                                        /* regex recurse level */      0);                                /* regex recurse level */
2064    
2065    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
2066    on only if not anchored. */    on only if not anchored. */
# Line 1900  for (;;) Line 2070  for (;;)
2070    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
2071    and firstline is set. */    and firstline is set. */
2072    
2073    if (firstline && *current_subject == NEWLINE) break;    if (firstline &&
2074          current_subject <= end_subject - md->nllen &&
2075          IS_NEWLINE(current_subject)) break;
2076    current_subject++;    current_subject++;
   
 #ifdef SUPPORT_UTF8  
2077    if (utf8)    if (utf8)
2078      {      {
2079      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
2080        current_subject++;        current_subject++;
2081      }      }
 #endif  
   
2082    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2083    }    }
2084    

Legend:
Removed from v.85  
changed lines
  Added in v.91

  ViewVC Help
Powered by ViewVC 1.1.5