/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 341 by ph10, Sat Apr 19 16:41:04 2008 UTC revision 427 by ph10, Fri Aug 28 09:55:54 2009 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* PCRE is a library of functions to support regular expressions whose syntax  /* PCRE is a library of functions to support regular expressions whose syntax
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language (but see
7    below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 60  applications. */ Line 61  applications. */
61  #define SP "                   "  #define SP "                   "
62    
63    
   
64  /*************************************************  /*************************************************
65  *      Code parameters and static tables         *  *      Code parameters and static tables         *
66  *************************************************/  *************************************************/
# Line 223  Arguments: Line 223  Arguments:
223    rlevel            function call recursion level    rlevel            function call recursion level
224    recursing         regex recursive call level    recursing         regex recursive call level
225    
226  Returns:            > 0 => number of match offset pairs placed in offsets  Returns:            > 0 => number of match offset pairs placed in offsets
227                      = 0 => offsets overflowed; longest matches are present                      = 0 => offsets overflowed; longest matches are present
228                       -1 => failed to match                       -1 => failed to match
229                     < -1 => some kind of unexpected problem                     < -1 => some kind of unexpected problem
# Line 511  for (;;) Line 511  for (;;)
511      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
512      const uschar *code;      const uschar *code;
513      int state_offset = current_state->offset;      int state_offset = current_state->offset;
514      int count, codevalue;      int count, codevalue, rrc;
 #ifdef SUPPORT_UCP  
     int chartype, script;  
 #endif  
515    
516  #ifdef DEBUG  #ifdef DEBUG
517      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 739  for (;;) Line 736  for (;;)
736    
737        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
738        case OP_ANY:        case OP_ANY:
739        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))        if (clen > 0 && !IS_NEWLINE(ptr))
740          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
741        break;        break;
742    
# Line 760  for (;;) Line 757  for (;;)
757        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
758          {          {
759          if (clen == 0 ||          if (clen == 0 ||
760              (IS_NEWLINE(ptr) &&              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
761                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
762              ))              ))
763            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
# Line 825  for (;;) Line 822  for (;;)
822        if (clen > 0)        if (clen > 0)
823          {          {
824          BOOL OK;          BOOL OK;
825          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
826          switch(code[1])          switch(code[1])
827            {            {
828            case PT_ANY:            case PT_ANY:
# Line 833  for (;;) Line 830  for (;;)
830            break;            break;
831    
832            case PT_LAMP:            case PT_LAMP:
833            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
834            break;            break;
835    
836            case PT_GC:            case PT_GC:
837            OK = category == code[2];            OK = _pcre_ucp_gentype[prop->chartype] == code[2];
838            break;            break;
839    
840            case PT_PC:            case PT_PC:
841            OK = chartype == code[2];            OK = prop->chartype == code[2];
842            break;            break;
843    
844            case PT_SC:            case PT_SC:
845            OK = script == code[2];            OK = prop->script == code[2];
846            break;            break;
847    
848            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 877  for (;;) Line 874  for (;;)
874          {          {
875          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
876              (c < 256 &&              (c < 256 &&
877                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
878                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
879            {            {
880            if (count > 0 && codevalue == OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_TYPEPOSPLUS)
# Line 903  for (;;) Line 897  for (;;)
897          {          {
898          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
899              (c < 256 &&              (c < 256 &&
900                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
901                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
902            {            {
903            if (codevalue == OP_TYPEPOSQUERY)            if (codevalue == OP_TYPEPOSQUERY)
# Line 928  for (;;) Line 919  for (;;)
919          {          {
920          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
921              (c < 256 &&              (c < 256 &&
922                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
923                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
924            {            {
925            if (codevalue == OP_TYPEPOSSTAR)            if (codevalue == OP_TYPEPOSSTAR)
# Line 951  for (;;) Line 939  for (;;)
939          {          {
940          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
941              (c < 256 &&              (c < 256 &&
942                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
943                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
944            {            {
945            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
# Line 975  for (;;) Line 960  for (;;)
960          {          {
961          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
962              (c < 256 &&              (c < 256 &&
963                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
964                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
965            {            {
966            if (codevalue == OP_TYPEPOSUPTO)            if (codevalue == OP_TYPEPOSUPTO)
# Line 1009  for (;;) Line 991  for (;;)
991        if (clen > 0)        if (clen > 0)
992          {          {
993          BOOL OK;          BOOL OK;
994          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
995          switch(code[2])          switch(code[2])
996            {            {
997            case PT_ANY:            case PT_ANY:
# Line 1017  for (;;) Line 999  for (;;)
999            break;            break;
1000    
1001            case PT_LAMP:            case PT_LAMP:
1002            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1003            break;            break;
1004    
1005            case PT_GC:            case PT_GC:
1006            OK = category == code[3];            OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1007            break;            break;
1008    
1009            case PT_PC:            case PT_PC:
1010            OK = chartype == code[3];            OK = prop->chartype == code[3];
1011            break;            break;
1012    
1013            case PT_SC:            case PT_SC:
1014            OK = script == code[3];            OK = prop->script == code[3];
1015            break;            break;
1016    
1017            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1058  for (;;) Line 1040  for (;;)
1040        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1041        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1042        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1043        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1044          {          {
1045          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1046          int ncount = 0;          int ncount = 0;
# Line 1072  for (;;) Line 1054  for (;;)
1054            int nd;            int nd;
1055            int ndlen = 1;            int ndlen = 1;
1056            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1057            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1058            ncount++;            ncount++;
1059            nptr += ndlen;            nptr += ndlen;
1060            }            }
# Line 1231  for (;;) Line 1213  for (;;)
1213        if (clen > 0)        if (clen > 0)
1214          {          {
1215          BOOL OK;          BOOL OK;
1216          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
1217          switch(code[2])          switch(code[2])
1218            {            {
1219            case PT_ANY:            case PT_ANY:
# Line 1239  for (;;) Line 1221  for (;;)
1221            break;            break;
1222    
1223            case PT_LAMP:            case PT_LAMP:
1224            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1225            break;            break;
1226    
1227            case PT_GC:            case PT_GC:
1228            OK = category == code[3];            OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1229            break;            break;
1230    
1231            case PT_PC:            case PT_PC:
1232            OK = chartype == code[3];            OK = prop->chartype == code[3];
1233            break;            break;
1234    
1235            case PT_SC:            case PT_SC:
1236            OK = script == code[3];            OK = prop->script == code[3];
1237            break;            break;
1238    
1239            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1289  for (;;) Line 1271  for (;;)
1271        QS2:        QS2:
1272    
1273        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1274        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1275          {          {
1276          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1277          int ncount = 0;          int ncount = 0;
# Line 1304  for (;;) Line 1286  for (;;)
1286            int nd;            int nd;
1287            int ndlen = 1;            int ndlen = 1;
1288            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1289            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1290            ncount++;            ncount++;
1291            nptr += ndlen;            nptr += ndlen;
1292            }            }
# Line 1478  for (;;) Line 1460  for (;;)
1460        if (clen > 0)        if (clen > 0)
1461          {          {
1462          BOOL OK;          BOOL OK;
1463          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
1464          switch(code[4])          switch(code[4])
1465            {            {
1466            case PT_ANY:            case PT_ANY:
# Line 1486  for (;;) Line 1468  for (;;)
1468            break;            break;
1469    
1470            case PT_LAMP:            case PT_LAMP:
1471            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1472            break;            break;
1473    
1474            case PT_GC:            case PT_GC:
1475            OK = category == code[5];            OK = _pcre_ucp_gentype[prop->chartype] == code[5];
1476            break;            break;
1477    
1478            case PT_PC:            case PT_PC:
1479            OK = chartype == code[5];            OK = prop->chartype == code[5];
1480            break;            break;
1481    
1482            case PT_SC:            case PT_SC:
1483            OK = script == code[5];            OK = prop->script == code[5];
1484            break;            break;
1485    
1486            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1531  for (;;) Line 1513  for (;;)
1513        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1514          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1515        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1516        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1517          {          {
1518          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1519          int ncount = 0;          int ncount = 0;
# Line 1545  for (;;) Line 1527  for (;;)
1527            int nd;            int nd;
1528            int ndlen = 1;            int ndlen = 1;
1529            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1530            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1531            ncount++;            ncount++;
1532            nptr += ndlen;            nptr += ndlen;
1533            }            }
# Line 1725  for (;;) Line 1707  for (;;)
1707            other case of the character. */            other case of the character. */
1708    
1709  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1710            othercase = _pcre_ucp_othercase(c);            othercase = UCD_OTHERCASE(c);
1711  #else  #else
1712            othercase = NOTACHAR;            othercase = NOTACHAR;
1713  #endif  #endif
# Line 1750  for (;;) Line 1732  for (;;)
1732        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
1733    
1734        case OP_EXTUNI:        case OP_EXTUNI:
1735        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1736          {          {
1737          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1738          int ncount = 0;          int ncount = 0;
# Line 1758  for (;;) Line 1740  for (;;)
1740            {            {
1741            int nclen = 1;            int nclen = 1;
1742            GETCHARLEN(c, nptr, nclen);            GETCHARLEN(c, nptr, nclen);
1743            if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(c) != ucp_M) break;
1744            ncount++;            ncount++;
1745            nptr += nclen;            nptr += nclen;
1746            }            }
# Line 1926  for (;;) Line 1908  for (;;)
1908            if (utf8 && d >= 128)            if (utf8 && d >= 128)
1909              {              {
1910  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1911              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
1912  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1913              }              }
1914            else            else
# Line 1964  for (;;) Line 1946  for (;;)
1946            if (utf8 && d >= 128)            if (utf8 && d >= 128)
1947              {              {
1948  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1949              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
1950  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1951              }              }
1952            else            else
# Line 2000  for (;;) Line 1982  for (;;)
1982            if (utf8 && d >= 128)            if (utf8 && d >= 128)
1983              {              {
1984  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1985              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
1986  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1987              }              }
1988            else            else
# Line 2032  for (;;) Line 2014  for (;;)
2014            if (utf8 && d >= 128)            if (utf8 && d >= 128)
2015              {              {
2016  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2017              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
2018  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2019              }              }
2020            else            else
# Line 2067  for (;;) Line 2049  for (;;)
2049            if (utf8 && d >= 128)            if (utf8 && d >= 128)
2050              {              {
2051  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2052              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
2053  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2054              }              }
2055            else            else
# Line 2175  for (;;) Line 2157  for (;;)
2157    
2158  /* ========================================================================== */  /* ========================================================================== */
2159        /* These are the opcodes for fancy brackets of various kinds. We have        /* These are the opcodes for fancy brackets of various kinds. We have
2160        to use recursion in order to handle them. The "always failing" assersion        to use recursion in order to handle them. The "always failing" assertion
2161        (?!) is optimised when compiling to OP_FAIL, so we have to support that,        (?!) is optimised to OP_FAIL when compiling, so we have to support that,
2162        though the other "backtracking verbs" are not supported. */        though the other "backtracking verbs" are not supported. */
2163    
2164        case OP_FAIL:        case OP_FAIL:
2165        break;        break;
2166    
2167        case OP_ASSERT:        case OP_ASSERT:
2168        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
# Line 2218  for (;;) Line 2200  for (;;)
2200          {          {
2201          int local_offsets[1000];          int local_offsets[1000];
2202          int local_workspace[1000];          int local_workspace[1000];
2203          int condcode = code[LINK_SIZE+1];          int codelink = GET(code, 1);
2204            int condcode;
2205    
2206            /* Because of the way auto-callout works during compile, a callout item
2207            is inserted between OP_COND and an assertion condition. This does not
2208            happen for the other conditions. */
2209    
2210            if (code[LINK_SIZE+1] == OP_CALLOUT)
2211              {
2212              rrc = 0;
2213              if (pcre_callout != NULL)
2214                {
2215                pcre_callout_block cb;
2216                cb.version          = 1;   /* Version 1 of the callout block */
2217                cb.callout_number   = code[LINK_SIZE+2];
2218                cb.offset_vector    = offsets;
2219                cb.subject          = (PCRE_SPTR)start_subject;
2220                cb.subject_length   = end_subject - start_subject;
2221                cb.start_match      = current_subject - start_subject;
2222                cb.current_position = ptr - start_subject;
2223                cb.pattern_position = GET(code, LINK_SIZE + 3);
2224                cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2225                cb.capture_top      = 1;
2226                cb.capture_last     = -1;
2227                cb.callout_data     = md->callout_data;
2228                if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
2229                }
2230              if (rrc > 0) break;                      /* Fail this thread */
2231              code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */
2232              }
2233    
2234            condcode = code[LINK_SIZE+1];
2235    
2236          /* Back reference conditions are not supported */          /* Back reference conditions are not supported */
2237    
# Line 2227  for (;;) Line 2240  for (;;)
2240          /* The DEFINE condition is always false */          /* The DEFINE condition is always false */
2241    
2242          if (condcode == OP_DEF)          if (condcode == OP_DEF)
2243            {            { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
           ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);  
           }  
2244    
2245          /* The only supported version of OP_RREF is for the value RREF_ANY,          /* The only supported version of OP_RREF is for the value RREF_ANY,
2246          which means "test if in any recursion". We can't test for specifically          which means "test if in any recursion". We can't test for specifically
# Line 2239  for (;;) Line 2250  for (;;)
2250            {            {
2251            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2252            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2253            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0)
2254              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2255              else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2256            }            }
2257    
2258          /* Otherwise, the condition is an assertion */          /* Otherwise, the condition is an assertion */
# Line 2270  for (;;) Line 2282  for (;;)
2282                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2283              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
2284            else            else
2285              { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2286            }            }
2287          }          }
2288        break;        break;
# Line 2422  for (;;) Line 2434  for (;;)
2434        /* Handle callouts */        /* Handle callouts */
2435    
2436        case OP_CALLOUT:        case OP_CALLOUT:
2437          rrc = 0;
2438        if (pcre_callout != NULL)        if (pcre_callout != NULL)
2439          {          {
         int rrc;  
2440          pcre_callout_block cb;          pcre_callout_block cb;
2441          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2442          cb.callout_number   = code[1];          cb.callout_number   = code[1];
# Line 2439  for (;;) Line 2451  for (;;)
2451          cb.capture_last     = -1;          cb.capture_last     = -1;
2452          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
2453          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
         if (rrc == 0) { ADD_ACTIVE(state_offset + 2 + 2*LINK_SIZE, 0); }  
2454          }          }
2455          if (rrc == 0)
2456            { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }
2457        break;        break;
2458    
2459    
# Line 2460  for (;;) Line 2473  for (;;)
2473    
2474    if (new_count <= 0)    if (new_count <= 0)
2475      {      {
2476      if (match_count < 0 &&                     /* No matches found */      if (rlevel == 1 &&                               /* Top level, and */
2477          rlevel == 1 &&                         /* Top level match function */          (                                            /* either... */
2478          (md->moptions & PCRE_PARTIAL) != 0 &&  /* Want partial matching */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
2479          ptr >= end_subject &&                  /* Reached end of subject */          ||                                           /* or... */
2480          ptr > current_subject)                 /* Matched non-empty string */          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
2481             match_count < 0)                            /* no matches */
2482            ) &&                                         /* And... */
2483            ptr >= end_subject &&                     /* Reached end of subject */
2484            ptr > current_subject)                    /* Matched non-empty string */
2485        {        {
2486        if (offsetcount >= 2)        if (offsetcount >= 2)
2487          {          {
# Line 2523  Returns:          > 0 => number of match Line 2540  Returns:          > 0 => number of match
2540                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2541  */  */
2542    
2543  PCRE_EXP_DEFN int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
2544  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2545    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2546    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
# Line 2632  switch ((((options & PCRE_NEWLINE_BITS) Line 2649  switch ((((options & PCRE_NEWLINE_BITS)
2649           PCRE_NEWLINE_BITS)           PCRE_NEWLINE_BITS)
2650    {    {
2651    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
2652    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
2653    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
2654    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
2655         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
2656    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
2657    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2658    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
# Line 2731  if ((re->flags & PCRE_REQCHSET) != 0) Line 2748  if ((re->flags & PCRE_REQCHSET) != 0)
2748    }    }
2749    
2750  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
2751  failed match. Unless restarting, optimize by moving to the first match  failed match. If not restarting, perform certain optimizations at the start of
2752  character if possible, when not anchored. Then unless wanting a partial match,  a match. */
 check for a required later character. */  
2753    
2754  for (;;)  for (;;)
2755    {    {
# Line 2743  for (;;) Line 2759  for (;;)
2759      {      {
2760      const uschar *save_end_subject = end_subject;      const uschar *save_end_subject = end_subject;
2761    
2762      /* Advance to a unique first char if possible. If firstline is TRUE, the      /* If firstline is TRUE, the start of the match is constrained to the first
2763      start of the match is constrained to the first line of a multiline string.      line of a multiline string. Implement this by temporarily adjusting
2764      Implement this by temporarily adjusting end_subject so that we stop      end_subject so that we stop scanning at a newline. If the match fails at
2765      scanning at a newline. If the match fails at the newline, later code breaks      the newline, later code breaks this loop. */
     this loop. */  
2766    
2767      if (firstline)      if (firstline)
2768        {        {
2769        const uschar *t = current_subject;        USPTR t = current_subject;
2770    #ifdef SUPPORT_UTF8
2771          if (utf8)
2772            {
2773            while (t < md->end_subject && !IS_NEWLINE(t))
2774              {
2775              t++;
2776              while (t < end_subject && (*t & 0xc0) == 0x80) t++;
2777              }
2778            }
2779          else
2780    #endif
2781        while (t < md->end_subject && !IS_NEWLINE(t)) t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2782        end_subject = t;        end_subject = t;
2783        }        }
2784    
2785      if (first_byte >= 0)      /* There are some optimizations that avoid running the match if a known
2786        starting point is not found, or if a known later character is not present.
2787        However, there is an option that disables these, for testing and for
2788        ensuring that all callouts do actually occur. */
2789    
2790        if ((options & PCRE_NO_START_OPTIMIZE) == 0)
2791        {        {
       if (first_byte_caseless)  
         while (current_subject < end_subject &&  
                lcc[*current_subject] != first_byte)  
           current_subject++;  
       else  
         while (current_subject < end_subject && *current_subject != first_byte)  
           current_subject++;  
       }  
2792    
2793      /* Or to just after a linebreak for a multiline match if possible */        /* Advance to a known first byte. */
2794    
2795      else if (startline)        if (first_byte >= 0)
       {  
       if (current_subject > md->start_subject + start_offset)  
2796          {          {
2797          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))          if (first_byte_caseless)
2798            current_subject++;            while (current_subject < end_subject &&
2799                     lcc[*current_subject] != first_byte)
2800                current_subject++;
2801            else
2802              while (current_subject < end_subject &&
2803                     *current_subject != first_byte)
2804                current_subject++;
2805            }
2806    
2807          /* Or to just after a linebreak for a multiline match if possible */
2808    
2809          /* If we have just passed a CR and the newline option is ANY or        else if (startline)
2810          ANYCRLF, and we are now at a LF, advance the match position by one more          {
2811          character. */          if (current_subject > md->start_subject + start_offset)
2812              {
2813          if (current_subject[-1] == '\r' &&  #ifdef SUPPORT_UTF8
2814               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&            if (utf8)
2815               current_subject < end_subject &&              {
2816               *current_subject == '\n')              while (current_subject < end_subject &&
2817            current_subject++;                     !WAS_NEWLINE(current_subject))
2818                  {
2819                  current_subject++;
2820                  while(current_subject < end_subject &&
2821                        (*current_subject & 0xc0) == 0x80)
2822                    current_subject++;
2823                  }
2824                }
2825              else
2826    #endif
2827              while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
2828                current_subject++;
2829    
2830              /* If we have just passed a CR and the newline option is ANY or
2831              ANYCRLF, and we are now at a LF, advance the match position by one
2832              more character. */
2833    
2834              if (current_subject[-1] == CHAR_CR &&
2835                   (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2836                   current_subject < end_subject &&
2837                   *current_subject == CHAR_NL)
2838                current_subject++;
2839              }
2840          }          }
       }  
2841    
2842      /* Or to a non-unique first char after study */        /* Or to a non-unique first char after study */
2843    
2844      else if (start_bits != NULL)        else if (start_bits != NULL)
       {  
       while (current_subject < end_subject)  
2845          {          {
2846          register unsigned int c = *current_subject;          while (current_subject < end_subject)
2847          if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;            {
2848            else break;            register unsigned int c = *current_subject;
2849              if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
2850                else break;
2851              }
2852          }          }
2853        }        }
2854    
# Line 2818  for (;;) Line 2870  for (;;)
2870    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching /^C/ on a 32-megabyte string... so we
2871    don't do this when the string is sufficiently long.    don't do this when the string is sufficiently long.
2872    
2873    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested, and can
2874    */    also be explicitly deactivated. */
2875    
2876    if (req_byte >= 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
2877          req_byte >= 0 &&
2878        end_subject - current_subject < REQ_BYTE_MAX &&        end_subject - current_subject < REQ_BYTE_MAX &&
2879        (options & PCRE_PARTIAL) == 0)        (options & PCRE_PARTIAL) == 0)
2880      {      {
# Line 2897  for (;;) Line 2950  for (;;)
2950    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
2951    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. */
2952    
2953    if (current_subject[-1] == '\r' &&    if (current_subject[-1] == CHAR_CR &&
2954        current_subject < end_subject &&        current_subject < end_subject &&
2955        *current_subject == '\n' &&        *current_subject == CHAR_NL &&
2956        (re->flags & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
2957          (md->nltype == NLTYPE_ANY ||          (md->nltype == NLTYPE_ANY ||
2958           md->nltype == NLTYPE_ANYCRLF ||           md->nltype == NLTYPE_ANYCRLF ||

Legend:
Removed from v.341  
changed lines
  Added in v.427

  ViewVC Help
Powered by ViewVC 1.1.5