/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 850 by zherczeg, Wed Jan 4 17:29:11 2012 UTC revision 979 by ph10, Sun Jun 17 19:08:41 2012 UTC
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
39  */  */
40    
   
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a sort of DFA algorithm (not a true  alternative matching function that uses a sort of DFA algorithm (not a true
43  FSM). This is NOT Perl- compatible, but it has advantages in certain  FSM). This is NOT Perl-compatible, but it has advantages in certain
44  applications. */  applications. */
45    
46    
# Line 282  typedef struct stateblock { Line 281  typedef struct stateblock {
281    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
282  } stateblock;  } stateblock;
283    
284  #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))  #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
285    
286    
287  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 382  for the current character, one for the f Line 381  for the current character, one for the f
381      next_new_state->count  = (y); \      next_new_state->count  = (y); \
382      next_new_state->data   = (z); \      next_new_state->data   = (z); \
383      next_new_state++; \      next_new_state++; \
384      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
385          (x), (y), (z), __LINE__)); \
386      } \      } \
387    else return PCRE_ERROR_DFA_WSSIZE    else return PCRE_ERROR_DFA_WSSIZE
388    
# Line 424  BOOL utf = (md->poptions & PCRE_UTF8) != Line 424  BOOL utf = (md->poptions & PCRE_UTF8) !=
424  BOOL utf = FALSE;  BOOL utf = FALSE;
425  #endif  #endif
426    
427    BOOL reset_could_continue = FALSE;
428    
429  rlevel++;  rlevel++;
430  offsetcount &= (-2);  offsetcount &= (-2);
431    
# Line 571  for (;;) Line 573  for (;;)
573    int clen, dlen;    int clen, dlen;
574    unsigned int c, d;    unsigned int c, d;
575    int forced_fail = 0;    int forced_fail = 0;
576    BOOL could_continue = FALSE;    BOOL partial_newline = FALSE;
577      BOOL could_continue = reset_could_continue;
578      reset_could_continue = FALSE;
579    
580    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
581    new state list. */    new state list. */
# Line 607  for (;;) Line 611  for (;;)
611    
612    if (ptr < end_subject)    if (ptr < end_subject)
613      {      {
614      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of data items in the character */
615  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
616      if (utf) { GETCHARLEN(c, ptr, clen); } else      if (utf) { GETCHARLEN(c, ptr, clen); } else
617  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
# Line 641  for (;;) Line 645  for (;;)
645    
646      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
647      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
648      been skipped". */      been skipped". If the could_continue flag was passed over from a previous
649        state, arrange for it to passed on. */
650    
651      if (state_offset < 0)      if (state_offset < 0)
652        {        {
# Line 650  for (;;) Line 655  for (;;)
655          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
656          ADD_NEW_DATA(state_offset, current_state->count,          ADD_NEW_DATA(state_offset, current_state->count,
657            current_state->data - 1);            current_state->data - 1);
658            if (could_continue) reset_could_continue = TRUE;
659          continue;          continue;
660          }          }
661        else        else
# Line 689  for (;;) Line 695  for (;;)
695      permitted.      permitted.
696    
697      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
698      argument that is not a data character - but is always one byte long. We      argument that is not a data character - but is always one byte long because
699      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in      the values are small. We have to take special action to deal with  \P, \p,
700      this case. To keep the other cases fast, convert these ones to new opcodes.      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
701      */      these ones to new opcodes. */
702    
703      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
704        {        {
# Line 783  for (;;) Line 789  for (;;)
789              offsets[0] = (int)(current_subject - start_subject);              offsets[0] = (int)(current_subject - start_subject);
790              offsets[1] = (int)(ptr - start_subject);              offsets[1] = (int)(ptr - start_subject);
791              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
792                offsets[1] - offsets[0], current_subject));                offsets[1] - offsets[0], (char *)current_subject));
793              }              }
794            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
795              {              {
# Line 888  for (;;) Line 894  for (;;)
894        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
895        case OP_ANY:        case OP_ANY:
896        if (clen > 0 && !IS_NEWLINE(ptr))        if (clen > 0 && !IS_NEWLINE(ptr))
897          { ADD_NEW(state_offset + 1, 0); }          {
898            if (ptr + 1 >= md->end_subject &&
899                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
900                NLBLOCK->nltype == NLTYPE_FIXED &&
901                NLBLOCK->nllen == 2 &&
902                c == NLBLOCK->nl[0])
903              {
904              could_continue = partial_newline = TRUE;
905              }
906            else
907              {
908              ADD_NEW(state_offset + 1, 0);
909              }
910            }
911        break;        break;
912    
913        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 916  for (;;) Line 935  for (;;)
935                 (ptr == end_subject - md->nllen)                 (ptr == end_subject - md->nllen)
936              ))              ))
937            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
938            else if (ptr + 1 >= md->end_subject &&
939                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
940                     NLBLOCK->nltype == NLTYPE_FIXED &&
941                     NLBLOCK->nllen == 2 &&
942                     c == NLBLOCK->nl[0])
943              {
944              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
945                {
946                reset_could_continue = TRUE;
947                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
948                }
949              else could_continue = partial_newline = TRUE;
950              }
951          }          }
952        break;        break;
953    
# Line 928  for (;;) Line 960  for (;;)
960          else if (clen == 0 ||          else if (clen == 0 ||
961              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
962            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
963            else if (ptr + 1 >= md->end_subject &&
964                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
965                     NLBLOCK->nltype == NLTYPE_FIXED &&
966                     NLBLOCK->nllen == 2 &&
967                     c == NLBLOCK->nl[0])
968              {
969              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
970                {
971                reset_could_continue = TRUE;
972                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
973                }
974              else could_continue = partial_newline = TRUE;
975              }
976          }          }
977        else if (IS_NEWLINE(ptr))        else if (IS_NEWLINE(ptr))
978          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
# Line 1090  for (;;) Line 1135  for (;;)
1135        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1136        if (clen > 0)        if (clen > 0)
1137          {          {
1138          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1139                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1140                NLBLOCK->nltype == NLTYPE_FIXED &&
1141                NLBLOCK->nllen == 2 &&
1142                c == NLBLOCK->nl[0])
1143              {
1144              could_continue = partial_newline = TRUE;
1145              }
1146            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1147              (c < 256 &&              (c < 256 &&
1148                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1149                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1113  for (;;) Line 1166  for (;;)
1166        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1167        if (clen > 0)        if (clen > 0)
1168          {          {
1169          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1170                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1171                NLBLOCK->nltype == NLTYPE_FIXED &&
1172                NLBLOCK->nllen == 2 &&
1173                c == NLBLOCK->nl[0])
1174              {
1175              could_continue = partial_newline = TRUE;
1176              }
1177            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1178              (c < 256 &&              (c < 256 &&
1179                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1180                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1135  for (;;) Line 1196  for (;;)
1196        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1197        if (clen > 0)        if (clen > 0)
1198          {          {
1199          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1200                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1201                NLBLOCK->nltype == NLTYPE_FIXED &&
1202                NLBLOCK->nllen == 2 &&
1203                c == NLBLOCK->nl[0])
1204              {
1205              could_continue = partial_newline = TRUE;
1206              }
1207            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1208              (c < 256 &&              (c < 256 &&
1209                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1210                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1155  for (;;) Line 1224  for (;;)
1224        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1225        if (clen > 0)        if (clen > 0)
1226          {          {
1227          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1228                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1229                NLBLOCK->nltype == NLTYPE_FIXED &&
1230                NLBLOCK->nllen == 2 &&
1231                c == NLBLOCK->nl[0])
1232              {
1233              could_continue = partial_newline = TRUE;
1234              }
1235            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1236              (c < 256 &&              (c < 256 &&
1237                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1238                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1176  for (;;) Line 1253  for (;;)
1253        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1254        if (clen > 0)        if (clen > 0)
1255          {          {
1256          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1257                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1258                NLBLOCK->nltype == NLTYPE_FIXED &&
1259                NLBLOCK->nllen == 2 &&
1260                c == NLBLOCK->nl[0])
1261              {
1262              could_continue = partial_newline = TRUE;
1263              }
1264            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1265              (c < 256 &&              (c < 256 &&
1266                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1267                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1824  for (;;) Line 1909  for (;;)
1909            ncount++;            ncount++;
1910            nptr += ndlen;            nptr += ndlen;
1911            }            }
1912            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1913                reset_could_continue = TRUE;
1914          if (++count >= GET2(code, 1))          if (++count >= GET2(code, 1))
1915            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1916          else          else
# Line 2037  for (;;) Line 2124  for (;;)
2124            ncount++;            ncount++;
2125            nptr += nclen;            nptr += nclen;
2126            }            }
2127            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2128                reset_could_continue = TRUE;
2129          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2130          }          }
2131        break;        break;
# Line 2062  for (;;) Line 2151  for (;;)
2151          break;          break;
2152    
2153          case 0x000d:          case 0x000d:
2154          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 >= end_subject)
2155              {
2156              ADD_NEW(state_offset + 1, 0);
2157              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2158                reset_could_continue = TRUE;
2159              }
2160            else if (ptr[1] == 0x0a)
2161            {            {
2162            ADD_NEW_DATA(-(state_offset + 1), 0, 1);            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2163            }            }
# Line 2171  for (;;) Line 2266  for (;;)
2266        break;        break;
2267    
2268        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2269        /* Match a negated single character casefully. This is only used for        /* Match a negated single character casefully. */
       one-byte characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2270    
2271        case OP_NOT:        case OP_NOT:
2272        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2273        break;        break;
2274    
2275        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2276        /* Match a negated single character caselessly. This is only used for        /* Match a negated single character caselessly. */
       one-byte characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2277    
2278        case OP_NOTI:        case OP_NOTI:
2279        if (clen > 0 && c != d && c != fcc[d])        if (clen > 0)
2280          { ADD_NEW(state_offset + dlen + 1, 0); }          {
2281            unsigned int otherd;
2282    #ifdef SUPPORT_UTF
2283            if (utf && d >= 128)
2284              {
2285    #ifdef SUPPORT_UCP
2286              otherd = UCD_OTHERCASE(d);
2287    #endif  /* SUPPORT_UCP */
2288              }
2289            else
2290    #endif  /* SUPPORT_UTF */
2291            otherd = TABLE_GET(d, fcc, d);
2292            if (c != d && c != otherd)
2293              { ADD_NEW(state_offset + dlen + 1, 0); }
2294            }
2295        break;        break;
2296    
2297        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 2557  for (;;) Line 2662  for (;;)
2662              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2663              cb.callout_number   = code[LINK_SIZE+2];              cb.callout_number   = code[LINK_SIZE+2];
2664              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
2665    #ifdef COMPILE_PCRE8
2666              cb.subject          = (PCRE_SPTR)start_subject;              cb.subject          = (PCRE_SPTR)start_subject;
2667    #else
2668                cb.subject          = (PCRE_SPTR16)start_subject;
2669    #endif
2670              cb.subject_length   = (int)(end_subject - start_subject);              cb.subject_length   = (int)(end_subject - start_subject);
2671              cb.start_match      = (int)(current_subject - start_subject);              cb.start_match      = (int)(current_subject - start_subject);
2672              cb.current_position = (int)(ptr - start_subject);              cb.current_position = (int)(ptr - start_subject);
# Line 2686  for (;;) Line 2795  for (;;)
2795            {            {
2796            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2797              {              {
             const pcre_uchar *p = start_subject + local_offsets[rc];  
             const pcre_uchar *pp = start_subject + local_offsets[rc+1];  
2798              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2799  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
2800              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;              if (utf)
2801                  {
2802                  const pcre_uchar *p = start_subject + local_offsets[rc];
2803                  const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2804                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2805                  }
2806  #endif  #endif
2807              if (charcount > 0)              if (charcount > 0)
2808                {                {
# Line 2789  for (;;) Line 2901  for (;;)
2901              const pcre_uchar *pp = local_ptr;              const pcre_uchar *pp = local_ptr;
2902              charcount = (int)(pp - p);              charcount = (int)(pp - p);
2903  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
2904              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;              if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2905  #endif  #endif
2906              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2907              }              }
# Line 2871  for (;;) Line 2983  for (;;)
2983            else            else
2984              {              {
2985  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
2986              const pcre_uchar *p = start_subject + local_offsets[0];              if (utf)
2987              const pcre_uchar *pp = start_subject + local_offsets[1];                {
2988              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;                const pcre_uchar *p = start_subject + local_offsets[0];
2989                  const pcre_uchar *pp = start_subject + local_offsets[1];
2990                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2991                  }
2992  #endif  #endif
2993              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2994              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
# Line 2896  for (;;) Line 3011  for (;;)
3011          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
3012          cb.callout_number   = code[1];          cb.callout_number   = code[1];
3013          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
3014    #ifdef COMPILE_PCRE8
3015          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
3016    #else
3017            cb.subject          = (PCRE_SPTR16)start_subject;
3018    #endif
3019          cb.subject_length   = (int)(end_subject - start_subject);          cb.subject_length   = (int)(end_subject - start_subject);
3020          cb.start_match      = (int)(current_subject - start_subject);          cb.start_match      = (int)(current_subject - start_subject);
3021          cb.current_position = (int)(ptr - start_subject);          cb.current_position = (int)(ptr - start_subject);
# Line 2938  for (;;) Line 3057  for (;;)
3057    if (new_count <= 0)    if (new_count <= 0)
3058      {      {
3059      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
3060          could_continue &&                            /* Some could go on */          could_continue &&                            /* Some could go on, and */
3061          forced_fail != workspace[1] &&               /* Not all forced fail & */          forced_fail != workspace[1] &&               /* Not all forced fail & */
3062          (                                            /* either... */          (                                            /* either... */
3063          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
# Line 2946  for (;;) Line 3065  for (;;)
3065          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
3066           match_count < 0)                            /* no matches */           match_count < 0)                            /* no matches */
3067          ) &&                                         /* And... */          ) &&                                         /* And... */
3068          ptr >= end_subject &&                  /* Reached end of subject */          (
3069          ptr > md->start_used_ptr)              /* Inspected non-empty string */          partial_newline ||                           /* Either partial NL */
3070              (                                          /* or ... */
3071              ptr >= end_subject &&                /* End of subject and */
3072              ptr > md->start_used_ptr)            /* Inspected non-empty string */
3073              )
3074            )
3075        {        {
3076        if (offsetcount >= 2)        if (offsetcount >= 2)
3077          {          {
# Line 3013  pcre_dfa_exec(const pcre *argument_re, c Line 3137  pcre_dfa_exec(const pcre *argument_re, c
3137    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3138  #else  #else
3139  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3140  pcre16_dfa_exec(const pcre *argument_re, const pcre16_extra *extra_data,  pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3141    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3142    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3143  #endif  #endif
3144  {  {
3145  real_pcre *re = (real_pcre *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
3146  dfa_match_data match_block;  dfa_match_data match_block;
3147  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3148  BOOL utf, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3149  const pcre_uchar *current_subject, *end_subject;  const pcre_uchar *current_subject, *end_subject;
 const pcre_uint8 *lcc;  
   
3150  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
3151    
3152  const pcre_uchar *req_char_ptr;  const pcre_uchar *req_char_ptr;
# Line 3046  if (offsetcount < 0) return PCRE_ERROR_B Line 3168  if (offsetcount < 0) return PCRE_ERROR_B
3168  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3169  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3170    
3171  /* We need to find the pointer to any study data before we test for byte  /* Check that the first field in the block is the magic number. If it is not,
3172  flipping, so we scan the extra_data block first. This may set two fields in the  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3173  match block, so we must initialize them beforehand. However, the other fields  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3174  in the match block must not be set until after the byte flipping. */  means that the pattern is likely compiled with different endianness. */
3175    
3176    if (re->magic_number != MAGIC_NUMBER)
3177      return re->magic_number == REVERSED_MAGIC_NUMBER?
3178        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3179    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3180    
3181    /* If restarting after a partial match, do some sanity checks on the contents
3182    of the workspace. */
3183    
3184    if ((options & PCRE_DFA_RESTART) != 0)
3185      {
3186      if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3187        workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3188          return PCRE_ERROR_DFA_BADRESTART;
3189      }
3190    
3191    /* Set up study, callout, and table data */
3192    
3193  md->tables = re->tables;  md->tables = re->tables;
3194  md->callout_data = NULL;  md->callout_data = NULL;
# Line 3068  if (extra_data != NULL) Line 3207  if (extra_data != NULL)
3207      md->tables = extra_data->tables;      md->tables = extra_data->tables;
3208    }    }
3209    
 /* Check that the first field in the block is the magic number. If it is not,  
 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to  
 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which  
 means that the pattern is likely compiled with different endianness. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   return re->magic_number == REVERSED_MAGIC_NUMBER?  
     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;  
 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;  
   
3210  /* Set some local values */  /* Set some local values */
3211    
3212  current_subject = (const pcre_uchar *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
# Line 3186  in other programs later. */ Line 3315  in other programs later. */
3315    
3316  if (md->tables == NULL) md->tables = PRIV(default_tables);  if (md->tables == NULL) md->tables = PRIV(default_tables);
3317    
3318  /* The lower casing table and the "must be at the start of a line" flag are  /* The "must be at the start of a line" flags are used in a loop when finding
3319  used in a loop when finding where to start. */  where to start. */
3320    
 lcc = md->tables + lcc_offset;  
3321  startline = (re->flags & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
3322  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
3323    
# Line 3204  if (!anchored) Line 3332  if (!anchored)
3332    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3333      {      {
3334      has_first_char = TRUE;      has_first_char = TRUE;
3335      first_char = first_char2 = re->first_char;      first_char = first_char2 = (pcre_uchar)(re->first_char);
3336      if ((re->flags & PCRE_FCH_CASELESS) != 0)      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3337        {        {
3338        first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);        first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
# Line 3228  character" set. */ Line 3356  character" set. */
3356  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3357    {    {
3358    has_req_char = TRUE;    has_req_char = TRUE;
3359    req_char = req_char2 = re->req_char;    req_char = req_char2 = (pcre_uchar)(re->req_char);
3360    if ((re->flags & PCRE_RCH_CASELESS) != 0)    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3361      {      {
3362      req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);      req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);

Legend:
Removed from v.850  
changed lines
  Added in v.979

  ViewVC Help
Powered by ViewVC 1.1.5