/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 227 by ph10, Tue Aug 21 15:00:15 2007 UTC revision 428 by ph10, Mon Aug 31 17:10:26 2009 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* PCRE is a library of functions to support regular expressions whose syntax  /* PCRE is a library of functions to support regular expressions whose syntax
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language (but see
7    below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 45  applications. */ Line 46  applications. */
46    
47    
48  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
49  #include <config.h>  #include "config.h"
50  #endif  #endif
51    
52  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
# Line 60  applications. */ Line 61  applications. */
61  #define SP "                   "  #define SP "                   "
62    
63    
   
64  /*************************************************  /*************************************************
65  *      Code parameters and static tables         *  *      Code parameters and static tables         *
66  *************************************************/  *************************************************/
# Line 84  centralize the loading of these characte Line 84  centralize the loading of these characte
84  small value. ***NOTE*** If the start of this table is modified, the two tables  small value. ***NOTE*** If the start of this table is modified, the two tables
85  that follow must also be modified. */  that follow must also be modified. */
86    
87  static uschar coptable[] = {  static const uschar coptable[] = {
88    0,                             /* End                                    */    0,                             /* End                                    */
89    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
90    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
91    0, 0,                          /* Any, Anybyte                           */    0, 0, 0,                       /* Any, AllAny, Anybyte                   */
92    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
93    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
94    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
# Line 132  static uschar coptable[] = { Line 132  static uschar coptable[] = {
132    0,                             /* DEF                                    */    0,                             /* DEF                                    */
133    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134    0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */    0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135    0, 0                           /* FAIL, ACCEPT                           */    0, 0, 0                        /* FAIL, ACCEPT, SKIPZERO                 */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
139  and \w */  and \w */
140    
141  static uschar toptable1[] = {  static const uschar toptable1[] = {
142    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
143    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
144    ctype_space, ctype_space,    ctype_space, ctype_space,
145    ctype_word,  ctype_word,    ctype_word,  ctype_word,
146    0                               /* OP_ANY */    0, 0                            /* OP_ANY, OP_ALLANY */
147  };  };
148    
149  static uschar toptable2[] = {  static const uschar toptable2[] = {
150    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
151    ctype_digit, 0,    ctype_digit, 0,
152    ctype_space, 0,    ctype_space, 0,
153    ctype_word,  0,    ctype_word,  0,
154    1                               /* OP_ANY */    1, 1                            /* OP_ANY, OP_ALLANY */
155  };  };
156    
157    
# Line 223  Arguments: Line 223  Arguments:
223    rlevel            function call recursion level    rlevel            function call recursion level
224    recursing         regex recursive call level    recursing         regex recursive call level
225    
226  Returns:            > 0 =>  Returns:            > 0 => number of match offset pairs placed in offsets
227                      = 0 =>                      = 0 => offsets overflowed; longest matches are present
228                       -1 => failed to match                       -1 => failed to match
229                     < -1 => some kind of unexpected problem                     < -1 => some kind of unexpected problem
230    
# Line 454  for (;;) Line 454  for (;;)
454    int i, j;    int i, j;
455    int clen, dlen;    int clen, dlen;
456    unsigned int c, d;    unsigned int c, d;
457      int forced_fail = 0;
458      int reached_end = 0;
459    
460    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
461    new state list. */    new state list. */
# Line 511  for (;;) Line 513  for (;;)
513      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
514      const uschar *code;      const uschar *code;
515      int state_offset = current_state->offset;      int state_offset = current_state->offset;
516      int count, codevalue;      int count, codevalue, rrc;
 #ifdef SUPPORT_UCP  
     int chartype, script;  
 #endif  
517    
518  #ifdef DEBUG  #ifdef DEBUG
519      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 627  for (;;) Line 626  for (;;)
626            ADD_ACTIVE(state_offset - GET(code, 1), 0);            ADD_ACTIVE(state_offset - GET(code, 1), 0);
627            }            }
628          }          }
629        else if (ptr > current_subject || (md->moptions & PCRE_NOTEMPTY) == 0)        else
630          {          {
631          if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;          reached_end++;    /* Count branches that reach the end */
632            else if (match_count > 0 && ++match_count * 2 >= offsetcount)          if (ptr > current_subject || (md->moptions & PCRE_NOTEMPTY) == 0)
633              match_count = 0;            {
634          count = ((match_count == 0)? offsetcount : match_count * 2) - 2;            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
635          if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));              else if (match_count > 0 && ++match_count * 2 >= offsetcount)
636          if (offsetcount >= 2)                match_count = 0;
637            {            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
638            offsets[0] = current_subject - start_subject;            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
639            offsets[1] = ptr - start_subject;            if (offsetcount >= 2)
640            DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              {
641              offsets[1] - offsets[0], current_subject));              offsets[0] = current_subject - start_subject;
642            }              offsets[1] = ptr - start_subject;
643          if ((md->moptions & PCRE_DFA_SHORTEST) != 0)              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
644            {                offsets[1] - offsets[0], current_subject));
645            DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"              }
646              "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
647              match_count, rlevel*2-2, SP));              {
648            return match_count;              DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
649            }                "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
650                  match_count, rlevel*2-2, SP));
651                return match_count;
652                }
653              }
654          }          }
655        break;        break;
656    
# Line 694  for (;;) Line 697  for (;;)
697        break;        break;
698    
699        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
700          case OP_SKIPZERO:
701          code += 1 + GET(code, 2);
702          while (*code == OP_ALT) code += GET(code, 1);
703          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
704          break;
705    
706          /*-----------------------------------------------------------------*/
707        case OP_CIRC:        case OP_CIRC:
708        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
709            ((ims & PCRE_MULTILINE) != 0 &&            ((ims & PCRE_MULTILINE) != 0 &&
# Line 732  for (;;) Line 742  for (;;)
742    
743        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
744        case OP_ANY:        case OP_ANY:
745        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))        if (clen > 0 && !IS_NEWLINE(ptr))
746            { ADD_NEW(state_offset + 1, 0); }
747          break;
748    
749          /*-----------------------------------------------------------------*/
750          case OP_ALLANY:
751          if (clen > 0)
752          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
753        break;        break;
754    
# Line 747  for (;;) Line 763  for (;;)
763        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
764          {          {
765          if (clen == 0 ||          if (clen == 0 ||
766              (IS_NEWLINE(ptr) &&              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
767                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
768              ))              ))
769            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
# Line 792  for (;;) Line 808  for (;;)
808            }            }
809          else left_word = 0;          else left_word = 0;
810    
811          if (clen > 0) right_word = c < 256 && (ctypes[c] & ctype_word) != 0;          if (clen > 0)
812            else right_word = 0;            right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
813            else              /* This is a fudge to ensure that if this is the */
814              {               /* last item in the pattern, we don't count it as */
815              reached_end--;  /* reached, thus disabling a partial match. */
816              right_word = 0;
817              }
818    
819          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
820            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
# Line 812  for (;;) Line 833  for (;;)
833        if (clen > 0)        if (clen > 0)
834          {          {
835          BOOL OK;          BOOL OK;
836          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
837          switch(code[1])          switch(code[1])
838            {            {
839            case PT_ANY:            case PT_ANY:
# Line 820  for (;;) Line 841  for (;;)
841            break;            break;
842    
843            case PT_LAMP:            case PT_LAMP:
844            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
845            break;            break;
846    
847            case PT_GC:            case PT_GC:
848            OK = category == code[2];            OK = _pcre_ucp_gentype[prop->chartype] == code[2];
849            break;            break;
850    
851            case PT_PC:            case PT_PC:
852            OK = chartype == code[2];            OK = prop->chartype == code[2];
853            break;            break;
854    
855            case PT_SC:            case PT_SC:
856            OK = script == code[2];            OK = prop->script == code[2];
857            break;            break;
858    
859            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 852  for (;;) Line 873  for (;;)
873  /* ========================================================================== */  /* ========================================================================== */
874        /* These opcodes likewise inspect the subject character, but have an        /* These opcodes likewise inspect the subject character, but have an
875        argument that is not a data character. It is one of these opcodes:        argument that is not a data character. It is one of these opcodes:
876        OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR,        OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
877        OP_NOT_WORDCHAR. The value is loaded into d. */        OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
878    
879        case OP_TYPEPLUS:        case OP_TYPEPLUS:
880        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
# Line 864  for (;;) Line 885  for (;;)
885          {          {
886          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
887              (c < 256 &&              (c < 256 &&
888                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
889                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
890            {            {
891            if (count > 0 && codevalue == OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_TYPEPOSPLUS)
# Line 890  for (;;) Line 908  for (;;)
908          {          {
909          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
910              (c < 256 &&              (c < 256 &&
911                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
912                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
913            {            {
914            if (codevalue == OP_TYPEPOSQUERY)            if (codevalue == OP_TYPEPOSQUERY)
# Line 915  for (;;) Line 930  for (;;)
930          {          {
931          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
932              (c < 256 &&              (c < 256 &&
933                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
934                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
935            {            {
936            if (codevalue == OP_TYPEPOSSTAR)            if (codevalue == OP_TYPEPOSSTAR)
# Line 938  for (;;) Line 950  for (;;)
950          {          {
951          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
952              (c < 256 &&              (c < 256 &&
953                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
954                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
955            {            {
956            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
# Line 962  for (;;) Line 971  for (;;)
971          {          {
972          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
973              (c < 256 &&              (c < 256 &&
974                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
975                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
976            {            {
977            if (codevalue == OP_TYPEPOSUPTO)            if (codevalue == OP_TYPEPOSUPTO)
# Line 996  for (;;) Line 1002  for (;;)
1002        if (clen > 0)        if (clen > 0)
1003          {          {
1004          BOOL OK;          BOOL OK;
1005          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
1006          switch(code[2])          switch(code[2])
1007            {            {
1008            case PT_ANY:            case PT_ANY:
# Line 1004  for (;;) Line 1010  for (;;)
1010            break;            break;
1011    
1012            case PT_LAMP:            case PT_LAMP:
1013            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1014            break;            break;
1015    
1016            case PT_GC:            case PT_GC:
1017            OK = category == code[3];            OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1018            break;            break;
1019    
1020            case PT_PC:            case PT_PC:
1021            OK = chartype == code[3];            OK = prop->chartype == code[3];
1022            break;            break;
1023    
1024            case PT_SC:            case PT_SC:
1025            OK = script == code[3];            OK = prop->script == code[3];
1026            break;            break;
1027    
1028            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1045  for (;;) Line 1051  for (;;)
1051        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1052        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1053        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1054        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1055          {          {
1056          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1057          int ncount = 0;          int ncount = 0;
# Line 1059  for (;;) Line 1065  for (;;)
1065            int nd;            int nd;
1066            int ndlen = 1;            int ndlen = 1;
1067            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1068            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1069            ncount++;            ncount++;
1070            nptr += ndlen;            nptr += ndlen;
1071            }            }
# Line 1080  for (;;) Line 1086  for (;;)
1086          int ncount = 0;          int ncount = 0;
1087          switch (c)          switch (c)
1088            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1089            case 0x000b:            case 0x000b:
1090            case 0x000c:            case 0x000c:
1091            case 0x0085:            case 0x0085:
1092            case 0x2028:            case 0x2028:
1093            case 0x2029:            case 0x2029:
1094              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1095              goto ANYNL01;
1096    
1097              case 0x000d:
1098              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1099              /* Fall through */
1100    
1101              ANYNL01:
1102              case 0x000a:
1103            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1104              {              {
1105              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1097  for (;;) Line 1108  for (;;)
1108            count++;            count++;
1109            ADD_NEW_DATA(-state_offset, count, ncount);            ADD_NEW_DATA(-state_offset, count, ncount);
1110            break;            break;
1111    
1112            default:            default:
1113            break;            break;
1114            }            }
# Line 1212  for (;;) Line 1224  for (;;)
1224        if (clen > 0)        if (clen > 0)
1225          {          {
1226          BOOL OK;          BOOL OK;
1227          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
1228          switch(code[2])          switch(code[2])
1229            {            {
1230            case PT_ANY:            case PT_ANY:
# Line 1220  for (;;) Line 1232  for (;;)
1232            break;            break;
1233    
1234            case PT_LAMP:            case PT_LAMP:
1235            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1236            break;            break;
1237    
1238            case PT_GC:            case PT_GC:
1239            OK = category == code[3];            OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1240            break;            break;
1241    
1242            case PT_PC:            case PT_PC:
1243            OK = chartype == code[3];            OK = prop->chartype == code[3];
1244            break;            break;
1245    
1246            case PT_SC:            case PT_SC:
1247            OK = script == code[3];            OK = prop->script == code[3];
1248            break;            break;
1249    
1250            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1270  for (;;) Line 1282  for (;;)
1282        QS2:        QS2:
1283    
1284        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1285        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1286          {          {
1287          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1288          int ncount = 0;          int ncount = 0;
# Line 1285  for (;;) Line 1297  for (;;)
1297            int nd;            int nd;
1298            int ndlen = 1;            int ndlen = 1;
1299            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1300            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1301            ncount++;            ncount++;
1302            nptr += ndlen;            nptr += ndlen;
1303            }            }
# Line 1313  for (;;) Line 1325  for (;;)
1325          int ncount = 0;          int ncount = 0;
1326          switch (c)          switch (c)
1327            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1328            case 0x000b:            case 0x000b:
1329            case 0x000c:            case 0x000c:
1330            case 0x0085:            case 0x0085:
1331            case 0x2028:            case 0x2028:
1332            case 0x2029:            case 0x2029:
1333              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1334              goto ANYNL02;
1335    
1336              case 0x000d:
1337              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1338              /* Fall through */
1339    
1340              ANYNL02:
1341              case 0x000a:
1342            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1343                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1344              {              {
# Line 1330  for (;;) Line 1347  for (;;)
1347              }              }
1348            ADD_NEW_DATA(-(state_offset + count), 0, ncount);            ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1349            break;            break;
1350    
1351            default:            default:
1352            break;            break;
1353            }            }
# Line 1453  for (;;) Line 1471  for (;;)
1471        if (clen > 0)        if (clen > 0)
1472          {          {
1473          BOOL OK;          BOOL OK;
1474          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
1475          switch(code[4])          switch(code[4])
1476            {            {
1477            case PT_ANY:            case PT_ANY:
# Line 1461  for (;;) Line 1479  for (;;)
1479            break;            break;
1480    
1481            case PT_LAMP:            case PT_LAMP:
1482            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1483            break;            break;
1484    
1485            case PT_GC:            case PT_GC:
1486            OK = category == code[5];            OK = _pcre_ucp_gentype[prop->chartype] == code[5];
1487            break;            break;
1488    
1489            case PT_PC:            case PT_PC:
1490            OK = chartype == code[5];            OK = prop->chartype == code[5];
1491            break;            break;
1492    
1493            case PT_SC:            case PT_SC:
1494            OK = script == code[5];            OK = prop->script == code[5];
1495            break;            break;
1496    
1497            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1506  for (;;) Line 1524  for (;;)
1524        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1525          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1526        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1527        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1528          {          {
1529          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1530          int ncount = 0;          int ncount = 0;
# Line 1520  for (;;) Line 1538  for (;;)
1538            int nd;            int nd;
1539            int ndlen = 1;            int ndlen = 1;
1540            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1541            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1542            ncount++;            ncount++;
1543            nptr += ndlen;            nptr += ndlen;
1544            }            }
# Line 1545  for (;;) Line 1563  for (;;)
1563          int ncount = 0;          int ncount = 0;
1564          switch (c)          switch (c)
1565            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1566            case 0x000b:            case 0x000b:
1567            case 0x000c:            case 0x000c:
1568            case 0x0085:            case 0x0085:
1569            case 0x2028:            case 0x2028:
1570            case 0x2029:            case 0x2029:
1571              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1572              goto ANYNL03;
1573    
1574              case 0x000d:
1575              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1576              /* Fall through */
1577    
1578              ANYNL03:
1579              case 0x000a:
1580            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1581              {              {
1582              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1564  for (;;) Line 1587  for (;;)
1587            else            else
1588              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, ncount); }
1589            break;            break;
1590    
1591            default:            default:
1592            break;            break;
1593            }            }
# Line 1694  for (;;) Line 1718  for (;;)
1718            other case of the character. */            other case of the character. */
1719    
1720  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1721            othercase = _pcre_ucp_othercase(c);            othercase = UCD_OTHERCASE(c);
1722  #else  #else
1723            othercase = NOTACHAR;            othercase = NOTACHAR;
1724  #endif  #endif
# Line 1719  for (;;) Line 1743  for (;;)
1743        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
1744    
1745        case OP_EXTUNI:        case OP_EXTUNI:
1746        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1747          {          {
1748          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1749          int ncount = 0;          int ncount = 0;
# Line 1727  for (;;) Line 1751  for (;;)
1751            {            {
1752            int nclen = 1;            int nclen = 1;
1753            GETCHARLEN(c, nptr, nclen);            GETCHARLEN(c, nptr, nclen);
1754            if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(c) != ucp_M) break;
1755            ncount++;            ncount++;
1756            nptr += nclen;            nptr += nclen;
1757            }            }
# Line 1744  for (;;) Line 1768  for (;;)
1768        case OP_ANYNL:        case OP_ANYNL:
1769        if (clen > 0) switch(c)        if (clen > 0) switch(c)
1770          {          {
         case 0x000a:  
1771          case 0x000b:          case 0x000b:
1772          case 0x000c:          case 0x000c:
1773          case 0x0085:          case 0x0085:
1774          case 0x2028:          case 0x2028:
1775          case 0x2029:          case 0x2029:
1776            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1777    
1778            case 0x000a:
1779          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
1780          break;          break;
1781    
1782          case 0x000d:          case 0x000d:
1783          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1784            {            {
# Line 1892  for (;;) Line 1919  for (;;)
1919            if (utf8 && d >= 128)            if (utf8 && d >= 128)
1920              {              {
1921  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1922              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
1923  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1924              }              }
1925            else            else
# Line 1930  for (;;) Line 1957  for (;;)
1957            if (utf8 && d >= 128)            if (utf8 && d >= 128)
1958              {              {
1959  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1960              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
1961  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1962              }              }
1963            else            else
# Line 1966  for (;;) Line 1993  for (;;)
1993            if (utf8 && d >= 128)            if (utf8 && d >= 128)
1994              {              {
1995  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1996              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
1997  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1998              }              }
1999            else            else
# Line 1998  for (;;) Line 2025  for (;;)
2025            if (utf8 && d >= 128)            if (utf8 && d >= 128)
2026              {              {
2027  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2028              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
2029  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2030              }              }
2031            else            else
# Line 2033  for (;;) Line 2060  for (;;)
2060            if (utf8 && d >= 128)            if (utf8 && d >= 128)
2061              {              {
2062  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2063              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
2064  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2065              }              }
2066            else            else
# Line 2141  for (;;) Line 2168  for (;;)
2168    
2169  /* ========================================================================== */  /* ========================================================================== */
2170        /* These are the opcodes for fancy brackets of various kinds. We have        /* These are the opcodes for fancy brackets of various kinds. We have
2171        to use recursion in order to handle them. */        to use recursion in order to handle them. The "always failing" assertion
2172          (?!) is optimised to OP_FAIL when compiling, so we have to support that,
2173          though the other "backtracking verbs" are not supported. */
2174    
2175          case OP_FAIL:
2176          forced_fail++;    /* Count FAILs for multiple states */
2177          break;
2178    
2179        case OP_ASSERT:        case OP_ASSERT:
2180        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
# Line 2179  for (;;) Line 2212  for (;;)
2212          {          {
2213          int local_offsets[1000];          int local_offsets[1000];
2214          int local_workspace[1000];          int local_workspace[1000];
2215          int condcode = code[LINK_SIZE+1];          int codelink = GET(code, 1);
2216            int condcode;
2217    
2218            /* Because of the way auto-callout works during compile, a callout item
2219            is inserted between OP_COND and an assertion condition. This does not
2220            happen for the other conditions. */
2221    
2222            if (code[LINK_SIZE+1] == OP_CALLOUT)
2223              {
2224              rrc = 0;
2225              if (pcre_callout != NULL)
2226                {
2227                pcre_callout_block cb;
2228                cb.version          = 1;   /* Version 1 of the callout block */
2229                cb.callout_number   = code[LINK_SIZE+2];
2230                cb.offset_vector    = offsets;
2231                cb.subject          = (PCRE_SPTR)start_subject;
2232                cb.subject_length   = end_subject - start_subject;
2233                cb.start_match      = current_subject - start_subject;
2234                cb.current_position = ptr - start_subject;
2235                cb.pattern_position = GET(code, LINK_SIZE + 3);
2236                cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2237                cb.capture_top      = 1;
2238                cb.capture_last     = -1;
2239                cb.callout_data     = md->callout_data;
2240                if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
2241                }
2242              if (rrc > 0) break;                      /* Fail this thread */
2243              code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */
2244              }
2245    
2246            condcode = code[LINK_SIZE+1];
2247    
2248          /* Back reference conditions are not supported */          /* Back reference conditions are not supported */
2249    
# Line 2188  for (;;) Line 2252  for (;;)
2252          /* The DEFINE condition is always false */          /* The DEFINE condition is always false */
2253    
2254          if (condcode == OP_DEF)          if (condcode == OP_DEF)
2255            {            { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
           ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);  
           }  
2256    
2257          /* The only supported version of OP_RREF is for the value RREF_ANY,          /* The only supported version of OP_RREF is for the value RREF_ANY,
2258          which means "test if in any recursion". We can't test for specifically          which means "test if in any recursion". We can't test for specifically
# Line 2200  for (;;) Line 2262  for (;;)
2262            {            {
2263            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2264            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2265            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0)
2266              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2267              else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2268            }            }
2269    
2270          /* Otherwise, the condition is an assertion */          /* Otherwise, the condition is an assertion */
# Line 2231  for (;;) Line 2294  for (;;)
2294                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2295              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
2296            else            else
2297              { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2298            }            }
2299          }          }
2300        break;        break;
# Line 2383  for (;;) Line 2446  for (;;)
2446        /* Handle callouts */        /* Handle callouts */
2447    
2448        case OP_CALLOUT:        case OP_CALLOUT:
2449          rrc = 0;
2450        if (pcre_callout != NULL)        if (pcre_callout != NULL)
2451          {          {
         int rrc;  
2452          pcre_callout_block cb;          pcre_callout_block cb;
2453          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2454          cb.callout_number   = code[1];          cb.callout_number   = code[1];
# Line 2400  for (;;) Line 2463  for (;;)
2463          cb.capture_last     = -1;          cb.capture_last     = -1;
2464          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
2465          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
         if (rrc == 0) { ADD_ACTIVE(state_offset + 2 + 2*LINK_SIZE, 0); }  
2466          }          }
2467          if (rrc == 0)
2468            { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }
2469        break;        break;
2470    
2471    
# Line 2417  for (;;) Line 2481  for (;;)
2481    /* We have finished the processing at the current subject character. If no    /* We have finished the processing at the current subject character. If no
2482    new states have been set for the next character, we have found all the    new states have been set for the next character, we have found all the
2483    matches that we are going to find. If we are at the top level and partial    matches that we are going to find. If we are at the top level and partial
2484    matching has been requested, check for appropriate conditions. */    matching has been requested, check for appropriate conditions. The "forced_
2485      fail" variable counts the number of (*F) encountered for the character. If it
2486      is equal to the original active_count (saved in workspace[1]) it means that
2487      (*F) was found on every active state. In this case we don't want to give a
2488      partial match. */
2489    
2490    if (new_count <= 0)    if (new_count <= 0)
2491      {      {
2492      if (match_count < 0 &&                     /* No matches found */      if (rlevel == 1 &&                               /* Top level, and */
2493          rlevel == 1 &&                         /* Top level match function */          reached_end != workspace[1] &&               /* Not all reached end */
2494          (md->moptions & PCRE_PARTIAL) != 0 &&  /* Want partial matching */          forced_fail != workspace[1] &&               /* Not all forced fail & */
2495          ptr >= end_subject &&                  /* Reached end of subject */          (                                            /* either... */
2496          ptr > current_subject)                 /* Matched non-empty string */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
2497            ||                                           /* or... */
2498            ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
2499             match_count < 0)                            /* no matches */
2500            ) &&                                         /* And... */
2501            ptr >= end_subject &&                     /* Reached end of subject */
2502            ptr > current_subject)                    /* Matched non-empty string */
2503        {        {
2504        if (offsetcount >= 2)        if (offsetcount >= 2)
2505          {          {
# Line 2484  Returns:          > 0 => number of match Line 2558  Returns:          > 0 => number of match
2558                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2559  */  */
2560    
2561  PCRE_EXP_DEFN int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
2562  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2563    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2564    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
# Line 2574  md->end_subject = end_subject; Line 2648  md->end_subject = end_subject;
2648  md->moptions = options;  md->moptions = options;
2649  md->poptions = re->options;  md->poptions = re->options;
2650    
2651    /* If the BSR option is not set at match time, copy what was set
2652    at compile time. */
2653    
2654    if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
2655      {
2656      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2657        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2658    #ifdef BSR_ANYCRLF
2659      else md->moptions |= PCRE_BSR_ANYCRLF;
2660    #endif
2661      }
2662    
2663  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
2664  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
2665    
# Line 2581  switch ((((options & PCRE_NEWLINE_BITS) Line 2667  switch ((((options & PCRE_NEWLINE_BITS)
2667           PCRE_NEWLINE_BITS)           PCRE_NEWLINE_BITS)
2668    {    {
2669    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
2670    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
2671    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
2672    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
2673         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
2674    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
2675    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2676    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
# Line 2644  if (md->tables == NULL) md->tables = _pc Line 2730  if (md->tables == NULL) md->tables = _pc
2730  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2731    
2732  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
2733  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2734  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2735    
2736  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 2655  studied, there may be a bitmap of possib Line 2741  studied, there may be a bitmap of possib
2741    
2742  if (!anchored)  if (!anchored)
2743    {    {
2744    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2745      {      {
2746      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2747      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 2672  if (!anchored) Line 2758  if (!anchored)
2758  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2759  character" set. */  character" set. */
2760    
2761  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2762    {    {
2763    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2764    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 2680  if ((re->options & PCRE_REQCHSET) != 0) Line 2766  if ((re->options & PCRE_REQCHSET) != 0)
2766    }    }
2767    
2768  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
2769  failed match. Unless restarting, optimize by moving to the first match  failed match. If not restarting, perform certain optimizations at the start of
2770  character if possible, when not anchored. Then unless wanting a partial match,  a match. */
 check for a required later character. */  
2771    
2772  for (;;)  for (;;)
2773    {    {
# Line 2692  for (;;) Line 2777  for (;;)
2777      {      {
2778      const uschar *save_end_subject = end_subject;      const uschar *save_end_subject = end_subject;
2779    
2780      /* Advance to a unique first char if possible. If firstline is TRUE, the      /* If firstline is TRUE, the start of the match is constrained to the first
2781      start of the match is constrained to the first line of a multiline string.      line of a multiline string. Implement this by temporarily adjusting
2782      Implement this by temporarily adjusting end_subject so that we stop      end_subject so that we stop scanning at a newline. If the match fails at
2783      scanning at a newline. If the match fails at the newline, later code breaks      the newline, later code breaks this loop. */
     this loop. */  
2784    
2785      if (firstline)      if (firstline)
2786        {        {
2787        const uschar *t = current_subject;        USPTR t = current_subject;
2788    #ifdef SUPPORT_UTF8
2789          if (utf8)
2790            {
2791            while (t < md->end_subject && !IS_NEWLINE(t))
2792              {
2793              t++;
2794              while (t < end_subject && (*t & 0xc0) == 0x80) t++;
2795              }
2796            }
2797          else
2798    #endif
2799        while (t < md->end_subject && !IS_NEWLINE(t)) t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2800        end_subject = t;        end_subject = t;
2801        }        }
2802    
2803      if (first_byte >= 0)      /* There are some optimizations that avoid running the match if a known
2804        starting point is not found, or if a known later character is not present.
2805        However, there is an option that disables these, for testing and for
2806        ensuring that all callouts do actually occur. */
2807    
2808        if ((options & PCRE_NO_START_OPTIMIZE) == 0)
2809        {        {
       if (first_byte_caseless)  
         while (current_subject < end_subject &&  
                lcc[*current_subject] != first_byte)  
           current_subject++;  
       else  
         while (current_subject < end_subject && *current_subject != first_byte)  
           current_subject++;  
       }  
2810    
2811      /* Or to just after a linebreak for a multiline match if possible */        /* Advance to a known first byte. */
2812    
2813      else if (startline)        if (first_byte >= 0)
2814        {          {
2815        if (current_subject > md->start_subject + start_offset)          if (first_byte_caseless)
2816              while (current_subject < end_subject &&
2817                     lcc[*current_subject] != first_byte)
2818                current_subject++;
2819            else
2820              while (current_subject < end_subject &&
2821                     *current_subject != first_byte)
2822                current_subject++;
2823            }
2824    
2825          /* Or to just after a linebreak for a multiline match if possible */
2826    
2827          else if (startline)
2828          {          {
2829          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))          if (current_subject > md->start_subject + start_offset)
2830            current_subject++;            {
2831    #ifdef SUPPORT_UTF8
2832              if (utf8)
2833                {
2834                while (current_subject < end_subject &&
2835                       !WAS_NEWLINE(current_subject))
2836                  {
2837                  current_subject++;
2838                  while(current_subject < end_subject &&
2839                        (*current_subject & 0xc0) == 0x80)
2840                    current_subject++;
2841                  }
2842                }
2843              else
2844    #endif
2845              while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
2846                current_subject++;
2847    
2848          /* If we have just passed a CR and the newline option is ANY or            /* If we have just passed a CR and the newline option is ANY or
2849          ANYCRLF, and we are now at a LF, advance the match position by one more            ANYCRLF, and we are now at a LF, advance the match position by one
2850          character. */            more character. */
2851    
2852          if (current_subject[-1] == '\r' &&            if (current_subject[-1] == CHAR_CR &&
2853               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2854               current_subject < end_subject &&                 current_subject < end_subject &&
2855               *current_subject == '\n')                 *current_subject == CHAR_NL)
2856            current_subject++;              current_subject++;
2857              }
2858          }          }
       }  
2859    
2860      /* Or to a non-unique first char after study */        /* Or to a non-unique first char after study */
2861    
2862      else if (start_bits != NULL)        else if (start_bits != NULL)
       {  
       while (current_subject < end_subject)  
2863          {          {
2864          register unsigned int c = *current_subject;          while (current_subject < end_subject)
2865          if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;            {
2866            else break;            register unsigned int c = *current_subject;
2867              if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
2868                else break;
2869              }
2870          }          }
2871        }        }
2872    
# Line 2767  for (;;) Line 2888  for (;;)
2888    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching /^C/ on a 32-megabyte string... so we
2889    don't do this when the string is sufficiently long.    don't do this when the string is sufficiently long.
2890    
2891    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested, and can
2892    */    also be explicitly deactivated. Furthermore, we have to disable when
2893      restarting after a partial match, because the required character may have
2894      already been matched. */
2895    
2896    if (req_byte >= 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
2897          req_byte >= 0 &&
2898        end_subject - current_subject < REQ_BYTE_MAX &&        end_subject - current_subject < REQ_BYTE_MAX &&
2899        (options & PCRE_PARTIAL) == 0)        (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_RESTART)) == 0)
2900      {      {
2901      register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);      register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
2902    
# Line 2846  for (;;) Line 2970  for (;;)
2970    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
2971    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. */
2972    
2973    if (current_subject[-1] == '\r' &&    if (current_subject[-1] == CHAR_CR &&
2974        current_subject < end_subject &&        current_subject < end_subject &&
2975        *current_subject == '\n' &&        *current_subject == CHAR_NL &&
2976        (re->options & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
2977          (md->nltype == NLTYPE_ANY ||          (md->nltype == NLTYPE_ANY ||
2978           md->nltype == NLTYPE_ANYCRLF ||           md->nltype == NLTYPE_ANYCRLF ||
2979           md->nllen == 2))           md->nllen == 2))

Legend:
Removed from v.227  
changed lines
  Added in v.428

  ViewVC Help
Powered by ViewVC 1.1.5