/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 391 by ph10, Tue Mar 17 21:16:01 2009 UTC revision 426 by ph10, Wed Aug 26 15:38:32 2009 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* PCRE is a library of functions to support regular expressions whose syntax  /* PCRE is a library of functions to support regular expressions whose syntax
6  and semantics are as close as possible to those of the Perl 5 language (but see  and semantics are as close as possible to those of the Perl 5 language (but see
7  below for why this module is different).  below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
# Line 61  applications. */ Line 61  applications. */
61  #define SP "                   "  #define SP "                   "
62    
63    
   
64  /*************************************************  /*************************************************
65  *      Code parameters and static tables         *  *      Code parameters and static tables         *
66  *************************************************/  *************************************************/
# Line 512  for (;;) Line 511  for (;;)
511      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
512      const uschar *code;      const uschar *code;
513      int state_offset = current_state->offset;      int state_offset = current_state->offset;
514      int count, codevalue;      int count, codevalue, rrc;
515    
516  #ifdef DEBUG  #ifdef DEBUG
517      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 2158  for (;;) Line 2157  for (;;)
2157    
2158  /* ========================================================================== */  /* ========================================================================== */
2159        /* These are the opcodes for fancy brackets of various kinds. We have        /* These are the opcodes for fancy brackets of various kinds. We have
2160        to use recursion in order to handle them. The "always failing" assersion        to use recursion in order to handle them. The "always failing" assertion
2161        (?!) is optimised when compiling to OP_FAIL, so we have to support that,        (?!) is optimised to OP_FAIL when compiling, so we have to support that,
2162        though the other "backtracking verbs" are not supported. */        though the other "backtracking verbs" are not supported. */
2163    
2164        case OP_FAIL:        case OP_FAIL:
# Line 2201  for (;;) Line 2200  for (;;)
2200          {          {
2201          int local_offsets[1000];          int local_offsets[1000];
2202          int local_workspace[1000];          int local_workspace[1000];
2203          int condcode = code[LINK_SIZE+1];          int codelink = GET(code, 1);
2204            int condcode;
2205    
2206            /* Because of the way auto-callout works during compile, a callout item
2207            is inserted between OP_COND and an assertion condition. This does not
2208            happen for the other conditions. */
2209    
2210            if (code[LINK_SIZE+1] == OP_CALLOUT)
2211              {
2212              rrc = 0;
2213              if (pcre_callout != NULL)
2214                {
2215                pcre_callout_block cb;
2216                cb.version          = 1;   /* Version 1 of the callout block */
2217                cb.callout_number   = code[LINK_SIZE+2];
2218                cb.offset_vector    = offsets;
2219                cb.subject          = (PCRE_SPTR)start_subject;
2220                cb.subject_length   = end_subject - start_subject;
2221                cb.start_match      = current_subject - start_subject;
2222                cb.current_position = ptr - start_subject;
2223                cb.pattern_position = GET(code, LINK_SIZE + 3);
2224                cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2225                cb.capture_top      = 1;
2226                cb.capture_last     = -1;
2227                cb.callout_data     = md->callout_data;
2228                if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
2229                }
2230              if (rrc > 0) break;                      /* Fail this thread */
2231              code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */
2232              }
2233    
2234            condcode = code[LINK_SIZE+1];
2235    
2236          /* Back reference conditions are not supported */          /* Back reference conditions are not supported */
2237    
# Line 2210  for (;;) Line 2240  for (;;)
2240          /* The DEFINE condition is always false */          /* The DEFINE condition is always false */
2241    
2242          if (condcode == OP_DEF)          if (condcode == OP_DEF)
2243            {            { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
           ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);  
           }  
2244    
2245          /* The only supported version of OP_RREF is for the value RREF_ANY,          /* The only supported version of OP_RREF is for the value RREF_ANY,
2246          which means "test if in any recursion". We can't test for specifically          which means "test if in any recursion". We can't test for specifically
# Line 2222  for (;;) Line 2250  for (;;)
2250            {            {
2251            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2252            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2253            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0)
2254              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2255              else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2256            }            }
2257    
2258          /* Otherwise, the condition is an assertion */          /* Otherwise, the condition is an assertion */
# Line 2253  for (;;) Line 2282  for (;;)
2282                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2283              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
2284            else            else
2285              { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2286            }            }
2287          }          }
2288        break;        break;
# Line 2405  for (;;) Line 2434  for (;;)
2434        /* Handle callouts */        /* Handle callouts */
2435    
2436        case OP_CALLOUT:        case OP_CALLOUT:
2437          rrc = 0;
2438        if (pcre_callout != NULL)        if (pcre_callout != NULL)
2439          {          {
         int rrc;  
2440          pcre_callout_block cb;          pcre_callout_block cb;
2441          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2442          cb.callout_number   = code[1];          cb.callout_number   = code[1];
# Line 2422  for (;;) Line 2451  for (;;)
2451          cb.capture_last     = -1;          cb.capture_last     = -1;
2452          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
2453          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
         if (rrc == 0) { ADD_ACTIVE(state_offset + 2 + 2*LINK_SIZE, 0); }  
2454          }          }
2455          if (rrc == 0)
2456            { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }
2457        break;        break;
2458    
2459    
# Line 2747  for (;;) Line 2777  for (;;)
2777        while (t < md->end_subject && !IS_NEWLINE(t)) t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2778        end_subject = t;        end_subject = t;
2779        }        }
2780    
2781      /* There are some optimizations that avoid running the match if a known      /* There are some optimizations that avoid running the match if a known
2782      starting point is not found, or if a known later character is not present.      starting point is not found, or if a known later character is not present.
2783      However, there is an option that disables these, for testing and for      However, there is an option that disables these, for testing and for
2784      ensuring that all callouts do actually occur. */      ensuring that all callouts do actually occur. */
2785    
2786      if ((options & PCRE_NO_START_OPTIMIZE) == 0)      if ((options & PCRE_NO_START_OPTIMIZE) == 0)
2787        {        {
2788    
2789        /* Advance to a known first byte. */        /* Advance to a known first byte. */
2790    
2791        if (first_byte >= 0)        if (first_byte >= 0)
2792          {          {
2793          if (first_byte_caseless)          if (first_byte_caseless)
# Line 2765  for (;;) Line 2795  for (;;)
2795                   lcc[*current_subject] != first_byte)                   lcc[*current_subject] != first_byte)
2796              current_subject++;              current_subject++;
2797          else          else
2798            while (current_subject < end_subject &&            while (current_subject < end_subject &&
2799                   *current_subject != first_byte)                   *current_subject != first_byte)
2800              current_subject++;              current_subject++;
2801          }          }
2802    
2803        /* Or to just after a linebreak for a multiline match if possible */        /* Or to just after a linebreak for a multiline match if possible */
2804    
2805        else if (startline)        else if (startline)
2806          {          {
2807          if (current_subject > md->start_subject + start_offset)          if (current_subject > md->start_subject + start_offset)
# Line 2779  for (;;) Line 2809  for (;;)
2809  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2810            if (utf8)            if (utf8)
2811              {              {
2812              while (current_subject < end_subject &&              while (current_subject < end_subject &&
2813                     !WAS_NEWLINE(current_subject))                     !WAS_NEWLINE(current_subject))
2814                {                {
2815                current_subject++;                current_subject++;
# Line 2792  for (;;) Line 2822  for (;;)
2822  #endif  #endif
2823            while (current_subject < end_subject && !WAS_NEWLINE(current_subject))            while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
2824              current_subject++;              current_subject++;
2825    
2826            /* If we have just passed a CR and the newline option is ANY or            /* If we have just passed a CR and the newline option is ANY or
2827            ANYCRLF, and we are now at a LF, advance the match position by one            ANYCRLF, and we are now at a LF, advance the match position by one
2828            more character. */            more character. */
2829    
2830            if (current_subject[-1] == CHAR_CR &&            if (current_subject[-1] == CHAR_CR &&
2831                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2832                 current_subject < end_subject &&                 current_subject < end_subject &&
# Line 2804  for (;;) Line 2834  for (;;)
2834              current_subject++;              current_subject++;
2835            }            }
2836          }          }
2837    
2838        /* Or to a non-unique first char after study */        /* Or to a non-unique first char after study */
2839    
2840        else if (start_bits != NULL)        else if (start_bits != NULL)
2841          {          {
2842          while (current_subject < end_subject)          while (current_subject < end_subject)
# Line 2816  for (;;) Line 2846  for (;;)
2846              else break;              else break;
2847            }            }
2848          }          }
2849        }        }
2850    
2851      /* Restore fudged end_subject */      /* Restore fudged end_subject */
2852    
# Line 2836  for (;;) Line 2866  for (;;)
2866    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching /^C/ on a 32-megabyte string... so we
2867    don't do this when the string is sufficiently long.    don't do this when the string is sufficiently long.
2868    
2869    ALSO: this processing is disabled when partial matching is requested, and can    ALSO: this processing is disabled when partial matching is requested, and can
2870    also be explicitly deactivated. */    also be explicitly deactivated. */
2871    
2872    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&

Legend:
Removed from v.391  
changed lines
  Added in v.426

  ViewVC Help
Powered by ViewVC 1.1.5