/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 200 by ph10, Wed Aug 1 09:10:40 2007 UTC revision 383 by ph10, Sun Mar 8 15:26:59 2009 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* PCRE is a library of functions to support regular expressions whose syntax  /* PCRE is a library of functions to support regular expressions whose syntax
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language (but see
7    below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 45  applications. */ Line 46  applications. */
46    
47    
48  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
49  #include <config.h>  #include "config.h"
50  #endif  #endif
51    
52  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
# Line 84  centralize the loading of these characte Line 85  centralize the loading of these characte
85  small value. ***NOTE*** If the start of this table is modified, the two tables  small value. ***NOTE*** If the start of this table is modified, the two tables
86  that follow must also be modified. */  that follow must also be modified. */
87    
88  static uschar coptable[] = {  static const uschar coptable[] = {
89    0,                             /* End                                    */    0,                             /* End                                    */
90    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
91    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
92    0, 0,                          /* Any, Anybyte                           */    0, 0, 0,                       /* Any, AllAny, Anybyte                   */
93    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
94    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
95    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
# Line 130  static uschar coptable[] = { Line 131  static uschar coptable[] = {
131    0,                             /* CREF                                   */    0,                             /* CREF                                   */
132    0,                             /* RREF                                   */    0,                             /* RREF                                   */
133    0,                             /* DEF                                    */    0,                             /* DEF                                    */
134    0, 0                           /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
135      0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
136      0, 0, 0                        /* FAIL, ACCEPT, SKIPZERO                 */
137  };  };
138    
139  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
140  and \w */  and \w */
141    
142  static uschar toptable1[] = {  static const uschar toptable1[] = {
143    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
144    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
145    ctype_space, ctype_space,    ctype_space, ctype_space,
146    ctype_word,  ctype_word,    ctype_word,  ctype_word,
147    0                               /* OP_ANY */    0, 0                            /* OP_ANY, OP_ALLANY */
148  };  };
149    
150  static uschar toptable2[] = {  static const uschar toptable2[] = {
151    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
152    ctype_digit, 0,    ctype_digit, 0,
153    ctype_space, 0,    ctype_space, 0,
154    ctype_word,  0,    ctype_word,  0,
155    1                               /* OP_ANY */    1, 1                            /* OP_ANY, OP_ALLANY */
156  };  };
157    
158    
# Line 221  Arguments: Line 224  Arguments:
224    rlevel            function call recursion level    rlevel            function call recursion level
225    recursing         regex recursive call level    recursing         regex recursive call level
226    
227  Returns:            > 0 =>  Returns:            > 0 => number of match offset pairs placed in offsets
228                      = 0 =>                      = 0 => offsets overflowed; longest matches are present
229                       -1 => failed to match                       -1 => failed to match
230                     < -1 => some kind of unexpected problem                     < -1 => some kind of unexpected problem
231    
# Line 510  for (;;) Line 513  for (;;)
513      const uschar *code;      const uschar *code;
514      int state_offset = current_state->offset;      int state_offset = current_state->offset;
515      int count, codevalue;      int count, codevalue;
 #ifdef SUPPORT_UCP  
     int chartype, script;  
 #endif  
516    
517  #ifdef DEBUG  #ifdef DEBUG
518      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 692  for (;;) Line 692  for (;;)
692        break;        break;
693    
694        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
695          case OP_SKIPZERO:
696          code += 1 + GET(code, 2);
697          while (*code == OP_ALT) code += GET(code, 1);
698          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
699          break;
700    
701          /*-----------------------------------------------------------------*/
702        case OP_CIRC:        case OP_CIRC:
703        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
704            ((ims & PCRE_MULTILINE) != 0 &&            ((ims & PCRE_MULTILINE) != 0 &&
# Line 730  for (;;) Line 737  for (;;)
737    
738        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
739        case OP_ANY:        case OP_ANY:
740        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))        if (clen > 0 && !IS_NEWLINE(ptr))
741            { ADD_NEW(state_offset + 1, 0); }
742          break;
743    
744          /*-----------------------------------------------------------------*/
745          case OP_ALLANY:
746          if (clen > 0)
747          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
748        break;        break;
749    
# Line 745  for (;;) Line 758  for (;;)
758        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
759          {          {
760          if (clen == 0 ||          if (clen == 0 ||
761              (IS_NEWLINE(ptr) &&              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
762                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
763              ))              ))
764            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
# Line 810  for (;;) Line 823  for (;;)
823        if (clen > 0)        if (clen > 0)
824          {          {
825          BOOL OK;          BOOL OK;
826          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
827          switch(code[1])          switch(code[1])
828            {            {
829            case PT_ANY:            case PT_ANY:
# Line 818  for (;;) Line 831  for (;;)
831            break;            break;
832    
833            case PT_LAMP:            case PT_LAMP:
834            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
835            break;            break;
836    
837            case PT_GC:            case PT_GC:
838            OK = category == code[2];            OK = _pcre_ucp_gentype[prop->chartype] == code[2];
839            break;            break;
840    
841            case PT_PC:            case PT_PC:
842            OK = chartype == code[2];            OK = prop->chartype == code[2];
843            break;            break;
844    
845            case PT_SC:            case PT_SC:
846            OK = script == code[2];            OK = prop->script == code[2];
847            break;            break;
848    
849            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 850  for (;;) Line 863  for (;;)
863  /* ========================================================================== */  /* ========================================================================== */
864        /* These opcodes likewise inspect the subject character, but have an        /* These opcodes likewise inspect the subject character, but have an
865        argument that is not a data character. It is one of these opcodes:        argument that is not a data character. It is one of these opcodes:
866        OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR,        OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
867        OP_NOT_WORDCHAR. The value is loaded into d. */        OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
868    
869        case OP_TYPEPLUS:        case OP_TYPEPLUS:
870        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
# Line 862  for (;;) Line 875  for (;;)
875          {          {
876          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
877              (c < 256 &&              (c < 256 &&
878                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
879                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
880            {            {
881            if (count > 0 && codevalue == OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_TYPEPOSPLUS)
# Line 888  for (;;) Line 898  for (;;)
898          {          {
899          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
900              (c < 256 &&              (c < 256 &&
901                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
902                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
903            {            {
904            if (codevalue == OP_TYPEPOSQUERY)            if (codevalue == OP_TYPEPOSQUERY)
# Line 913  for (;;) Line 920  for (;;)
920          {          {
921          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
922              (c < 256 &&              (c < 256 &&
923                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
924                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
925            {            {
926            if (codevalue == OP_TYPEPOSSTAR)            if (codevalue == OP_TYPEPOSSTAR)
# Line 936  for (;;) Line 940  for (;;)
940          {          {
941          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
942              (c < 256 &&              (c < 256 &&
943                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
944                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
945            {            {
946            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
# Line 960  for (;;) Line 961  for (;;)
961          {          {
962          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
963              (c < 256 &&              (c < 256 &&
964                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
965                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
966            {            {
967            if (codevalue == OP_TYPEPOSUPTO)            if (codevalue == OP_TYPEPOSUPTO)
# Line 994  for (;;) Line 992  for (;;)
992        if (clen > 0)        if (clen > 0)
993          {          {
994          BOOL OK;          BOOL OK;
995          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
996          switch(code[2])          switch(code[2])
997            {            {
998            case PT_ANY:            case PT_ANY:
# Line 1002  for (;;) Line 1000  for (;;)
1000            break;            break;
1001    
1002            case PT_LAMP:            case PT_LAMP:
1003            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1004            break;            break;
1005    
1006            case PT_GC:            case PT_GC:
1007            OK = category == code[3];            OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1008            break;            break;
1009    
1010            case PT_PC:            case PT_PC:
1011            OK = chartype == code[3];            OK = prop->chartype == code[3];
1012            break;            break;
1013    
1014            case PT_SC:            case PT_SC:
1015            OK = script == code[3];            OK = prop->script == code[3];
1016            break;            break;
1017    
1018            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1043  for (;;) Line 1041  for (;;)
1041        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1042        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1043        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1044        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1045          {          {
1046          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1047          int ncount = 0;          int ncount = 0;
# Line 1057  for (;;) Line 1055  for (;;)
1055            int nd;            int nd;
1056            int ndlen = 1;            int ndlen = 1;
1057            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1058            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1059            ncount++;            ncount++;
1060            nptr += ndlen;            nptr += ndlen;
1061            }            }
# Line 1078  for (;;) Line 1076  for (;;)
1076          int ncount = 0;          int ncount = 0;
1077          switch (c)          switch (c)
1078            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1079            case 0x000b:            case 0x000b:
1080            case 0x000c:            case 0x000c:
1081            case 0x0085:            case 0x0085:
1082            case 0x2028:            case 0x2028:
1083            case 0x2029:            case 0x2029:
1084              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1085              goto ANYNL01;
1086    
1087              case 0x000d:
1088              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1089              /* Fall through */
1090    
1091              ANYNL01:
1092              case 0x000a:
1093            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1094              {              {
1095              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1095  for (;;) Line 1098  for (;;)
1098            count++;            count++;
1099            ADD_NEW_DATA(-state_offset, count, ncount);            ADD_NEW_DATA(-state_offset, count, ncount);
1100            break;            break;
1101    
1102            default:            default:
1103            break;            break;
1104            }            }
# Line 1210  for (;;) Line 1214  for (;;)
1214        if (clen > 0)        if (clen > 0)
1215          {          {
1216          BOOL OK;          BOOL OK;
1217          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
1218          switch(code[2])          switch(code[2])
1219            {            {
1220            case PT_ANY:            case PT_ANY:
# Line 1218  for (;;) Line 1222  for (;;)
1222            break;            break;
1223    
1224            case PT_LAMP:            case PT_LAMP:
1225            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1226            break;            break;
1227    
1228            case PT_GC:            case PT_GC:
1229            OK = category == code[3];            OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1230            break;            break;
1231    
1232            case PT_PC:            case PT_PC:
1233            OK = chartype == code[3];            OK = prop->chartype == code[3];
1234            break;            break;
1235    
1236            case PT_SC:            case PT_SC:
1237            OK = script == code[3];            OK = prop->script == code[3];
1238            break;            break;
1239    
1240            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1268  for (;;) Line 1272  for (;;)
1272        QS2:        QS2:
1273    
1274        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1275        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1276          {          {
1277          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1278          int ncount = 0;          int ncount = 0;
# Line 1283  for (;;) Line 1287  for (;;)
1287            int nd;            int nd;
1288            int ndlen = 1;            int ndlen = 1;
1289            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1290            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1291            ncount++;            ncount++;
1292            nptr += ndlen;            nptr += ndlen;
1293            }            }
# Line 1311  for (;;) Line 1315  for (;;)
1315          int ncount = 0;          int ncount = 0;
1316          switch (c)          switch (c)
1317            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1318            case 0x000b:            case 0x000b:
1319            case 0x000c:            case 0x000c:
1320            case 0x0085:            case 0x0085:
1321            case 0x2028:            case 0x2028:
1322            case 0x2029:            case 0x2029:
1323              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1324              goto ANYNL02;
1325    
1326              case 0x000d:
1327              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1328              /* Fall through */
1329    
1330              ANYNL02:
1331              case 0x000a:
1332            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1333                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1334              {              {
# Line 1328  for (;;) Line 1337  for (;;)
1337              }              }
1338            ADD_NEW_DATA(-(state_offset + count), 0, ncount);            ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1339            break;            break;
1340    
1341            default:            default:
1342            break;            break;
1343            }            }
# Line 1451  for (;;) Line 1461  for (;;)
1461        if (clen > 0)        if (clen > 0)
1462          {          {
1463          BOOL OK;          BOOL OK;
1464          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
1465          switch(code[4])          switch(code[4])
1466            {            {
1467            case PT_ANY:            case PT_ANY:
# Line 1459  for (;;) Line 1469  for (;;)
1469            break;            break;
1470    
1471            case PT_LAMP:            case PT_LAMP:
1472            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1473            break;            break;
1474    
1475            case PT_GC:            case PT_GC:
1476            OK = category == code[5];            OK = _pcre_ucp_gentype[prop->chartype] == code[5];
1477            break;            break;
1478    
1479            case PT_PC:            case PT_PC:
1480            OK = chartype == code[5];            OK = prop->chartype == code[5];
1481            break;            break;
1482    
1483            case PT_SC:            case PT_SC:
1484            OK = script == code[5];            OK = prop->script == code[5];
1485            break;            break;
1486    
1487            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1504  for (;;) Line 1514  for (;;)
1514        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1515          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1516        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1517        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1518          {          {
1519          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1520          int ncount = 0;          int ncount = 0;
# Line 1518  for (;;) Line 1528  for (;;)
1528            int nd;            int nd;
1529            int ndlen = 1;            int ndlen = 1;
1530            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1531            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1532            ncount++;            ncount++;
1533            nptr += ndlen;            nptr += ndlen;
1534            }            }
# Line 1543  for (;;) Line 1553  for (;;)
1553          int ncount = 0;          int ncount = 0;
1554          switch (c)          switch (c)
1555            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1556            case 0x000b:            case 0x000b:
1557            case 0x000c:            case 0x000c:
1558            case 0x0085:            case 0x0085:
1559            case 0x2028:            case 0x2028:
1560            case 0x2029:            case 0x2029:
1561              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1562              goto ANYNL03;
1563    
1564              case 0x000d:
1565              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1566              /* Fall through */
1567    
1568              ANYNL03:
1569              case 0x000a:
1570            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1571              {              {
1572              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1562  for (;;) Line 1577  for (;;)
1577            else            else
1578              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, ncount); }
1579            break;            break;
1580    
1581            default:            default:
1582            break;            break;
1583            }            }
# Line 1692  for (;;) Line 1708  for (;;)
1708            other case of the character. */            other case of the character. */
1709    
1710  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1711            othercase = _pcre_ucp_othercase(c);            othercase = UCD_OTHERCASE(c);
1712  #else  #else
1713            othercase = NOTACHAR;            othercase = NOTACHAR;
1714  #endif  #endif
# Line 1717  for (;;) Line 1733  for (;;)
1733        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
1734    
1735        case OP_EXTUNI:        case OP_EXTUNI:
1736        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1737          {          {
1738          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1739          int ncount = 0;          int ncount = 0;
# Line 1725  for (;;) Line 1741  for (;;)
1741            {            {
1742            int nclen = 1;            int nclen = 1;
1743            GETCHARLEN(c, nptr, nclen);            GETCHARLEN(c, nptr, nclen);
1744            if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(c) != ucp_M) break;
1745            ncount++;            ncount++;
1746            nptr += nclen;            nptr += nclen;
1747            }            }
# Line 1742  for (;;) Line 1758  for (;;)
1758        case OP_ANYNL:        case OP_ANYNL:
1759        if (clen > 0) switch(c)        if (clen > 0) switch(c)
1760          {          {
         case 0x000a:  
1761          case 0x000b:          case 0x000b:
1762          case 0x000c:          case 0x000c:
1763          case 0x0085:          case 0x0085:
1764          case 0x2028:          case 0x2028:
1765          case 0x2029:          case 0x2029:
1766            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1767    
1768            case 0x000a:
1769          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
1770          break;          break;
1771    
1772          case 0x000d:          case 0x000d:
1773          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1774            {            {
# Line 1890  for (;;) Line 1909  for (;;)
1909            if (utf8 && d >= 128)            if (utf8 && d >= 128)
1910              {              {
1911  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1912              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
1913  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1914              }              }
1915            else            else
# Line 1928  for (;;) Line 1947  for (;;)
1947            if (utf8 && d >= 128)            if (utf8 && d >= 128)
1948              {              {
1949  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1950              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
1951  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1952              }              }
1953            else            else
# Line 1964  for (;;) Line 1983  for (;;)
1983            if (utf8 && d >= 128)            if (utf8 && d >= 128)
1984              {              {
1985  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1986              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
1987  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1988              }              }
1989            else            else
# Line 1996  for (;;) Line 2015  for (;;)
2015            if (utf8 && d >= 128)            if (utf8 && d >= 128)
2016              {              {
2017  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2018              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
2019  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2020              }              }
2021            else            else
# Line 2031  for (;;) Line 2050  for (;;)
2050            if (utf8 && d >= 128)            if (utf8 && d >= 128)
2051              {              {
2052  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2053              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
2054  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2055              }              }
2056            else            else
# Line 2139  for (;;) Line 2158  for (;;)
2158    
2159  /* ========================================================================== */  /* ========================================================================== */
2160        /* These are the opcodes for fancy brackets of various kinds. We have        /* These are the opcodes for fancy brackets of various kinds. We have
2161        to use recursion in order to handle them. */        to use recursion in order to handle them. The "always failing" assersion
2162          (?!) is optimised when compiling to OP_FAIL, so we have to support that,
2163          though the other "backtracking verbs" are not supported. */
2164    
2165          case OP_FAIL:
2166          break;
2167    
2168        case OP_ASSERT:        case OP_ASSERT:
2169        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
# Line 2482  Returns:          > 0 => number of match Line 2506  Returns:          > 0 => number of match
2506                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2507  */  */
2508    
2509  PCRE_EXP_DEFN int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
2510  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2511    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2512    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
# Line 2572  md->end_subject = end_subject; Line 2596  md->end_subject = end_subject;
2596  md->moptions = options;  md->moptions = options;
2597  md->poptions = re->options;  md->poptions = re->options;
2598    
2599    /* If the BSR option is not set at match time, copy what was set
2600    at compile time. */
2601    
2602    if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
2603      {
2604      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2605        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2606    #ifdef BSR_ANYCRLF
2607      else md->moptions |= PCRE_BSR_ANYCRLF;
2608    #endif
2609      }
2610    
2611  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
2612  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
2613    
# Line 2642  if (md->tables == NULL) md->tables = _pc Line 2678  if (md->tables == NULL) md->tables = _pc
2678  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2679    
2680  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
2681  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2682  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2683    
2684  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 2653  studied, there may be a bitmap of possib Line 2689  studied, there may be a bitmap of possib
2689    
2690  if (!anchored)  if (!anchored)
2691    {    {
2692    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2693      {      {
2694      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2695      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 2670  if (!anchored) Line 2706  if (!anchored)
2706  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2707  character" set. */  character" set. */
2708    
2709  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2710    {    {
2711    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2712    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 2698  for (;;) Line 2734  for (;;)
2734    
2735      if (firstline)      if (firstline)
2736        {        {
2737        const uschar *t = current_subject;        USPTR t = current_subject;
2738    #ifdef SUPPORT_UTF8
2739          if (utf8)
2740            {
2741            while (t < md->end_subject && !IS_NEWLINE(t))
2742              {
2743              t++;
2744              while (t < end_subject && (*t & 0xc0) == 0x80) t++;
2745              }
2746            }
2747          else
2748    #endif
2749        while (t < md->end_subject && !IS_NEWLINE(t)) t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2750        end_subject = t;        end_subject = t;
2751        }        }
# Line 2720  for (;;) Line 2767  for (;;)
2767        {        {
2768        if (current_subject > md->start_subject + start_offset)        if (current_subject > md->start_subject + start_offset)
2769          {          {
2770          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))  #ifdef SUPPORT_UTF8
2771            if (utf8)
2772              {
2773              while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
2774                {
2775                current_subject++;
2776                while(current_subject < end_subject &&
2777                      (*current_subject & 0xc0) == 0x80)
2778                  current_subject++;
2779                }
2780              }
2781            else
2782    #endif
2783            while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
2784            current_subject++;            current_subject++;
2785    
2786          /* If we have just passed a CR and the newline option is ANY or          /* If we have just passed a CR and the newline option is ANY or
# Line 2840  for (;;) Line 2900  for (;;)
2900      }      }
2901    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2902    
2903    /* If we have just passed a CR and the newline option is CRLF or ANY or    /* If we have just passed a CR and we are now at a LF, and the pattern does
2904    ANYCRLF, and we are now at a LF, advance the match position by one more    not contain any explicit matches for \r or \n, and the newline option is CRLF
2905    character. */    or ANY or ANYCRLF, advance the match position by one more character. */
2906    
2907    if (current_subject[-1] == '\r' &&    if (current_subject[-1] == '\r' &&
2908         (md->nltype == NLTYPE_ANY ||        current_subject < end_subject &&
2909          md->nltype == NLTYPE_ANYCRLF ||        *current_subject == '\n' &&
2910          md->nllen == 2) &&        (re->flags & PCRE_HASCRORLF) == 0 &&
2911         current_subject < end_subject &&          (md->nltype == NLTYPE_ANY ||
2912         *current_subject == '\n')           md->nltype == NLTYPE_ANYCRLF ||
2913             md->nllen == 2))
2914      current_subject++;      current_subject++;
2915    
2916    }   /* "Bumpalong" loop */    }   /* "Bumpalong" loop */

Legend:
Removed from v.200  
changed lines
  Added in v.383

  ViewVC Help
Powered by ViewVC 1.1.5