/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 645 by ph10, Sun Jul 31 17:02:18 2011 UTC revision 723 by ph10, Sat Oct 8 15:55:23 2011 UTC
# Line 164  static const uschar coptable[] = { Line 164  static const uschar coptable[] = {
164    0,                             /* Assert not                             */    0,                             /* Assert not                             */
165    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
166    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
167    0, 0, 0, 0, 0, 0,              /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, COND */    0, 0,                          /* ONCE, ONCE_NC                          */
168      0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
169    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
170    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
171    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
# Line 232  static const uschar poptable[] = { Line 233  static const uschar poptable[] = {
233    0,                             /* Assert not                             */    0,                             /* Assert not                             */
234    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
235    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
236    0, 0, 0, 0, 0, 0,              /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, COND */    0, 0,                          /* ONCE, ONCE_NC                          */
237      0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
238    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
239    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
240    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
# Line 540  else Line 542  else
542      {      {
543      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
544        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
545          *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)?          *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)?
546          2:0);          2:0);
547      do      do
548        {        {
# Line 621  for (;;) Line 623  for (;;)
623    for (i = 0; i < active_count; i++)    for (i = 0; i < active_count; i++)
624      {      {
625      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
626      BOOL caseless = FALSE;      BOOL caseless = FALSE;
627      const uschar *code;      const uschar *code;
628      int state_offset = current_state->offset;      int state_offset = current_state->offset;
629      int count, codevalue, rrc;      int count, codevalue, rrc;
# Line 738  for (;;) Line 740  for (;;)
740    
741  /* ========================================================================== */  /* ========================================================================== */
742        /* Reached a closing bracket. If not at the end of the pattern, carry        /* Reached a closing bracket. If not at the end of the pattern, carry
743        on with the next opcode. For repeating opcodes, also add the repeat        on with the next opcode. For repeating opcodes, also add the repeat
744        state. Note that KETRPOS will always be encountered at the end of the        state. Note that KETRPOS will always be encountered at the end of the
745        subpattern, because the possessive subpattern repeats are always handled        subpattern, because the possessive subpattern repeats are always handled
746        using recursive calls. Thus, it never adds any new states.        using recursive calls. Thus, it never adds any new states.
747    
748        At the end of the (sub)pattern, unless we have an empty string and        At the end of the (sub)pattern, unless we have an empty string and
749        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
750        start of the subject, save the match data, shifting up all previous        start of the subject, save the match data, shifting up all previous
# Line 751  for (;;) Line 753  for (;;)
753        case OP_KET:        case OP_KET:
754        case OP_KETRMIN:        case OP_KETRMIN:
755        case OP_KETRMAX:        case OP_KETRMAX:
756        case OP_KETRPOS:        case OP_KETRPOS:
757        if (code != end_code)        if (code != end_code)
758          {          {
759          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
# Line 768  for (;;) Line 770  for (;;)
770                  current_subject > start_subject + md->start_offset)))                  current_subject > start_subject + md->start_offset)))
771            {            {
772            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
773              else if (match_count > 0 && ++match_count * 2 >= offsetcount)              else if (match_count > 0 && ++match_count * 2 > offsetcount)
774                match_count = 0;                match_count = 0;
775            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
776            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
# Line 2179  for (;;) Line 2181  for (;;)
2181        checking (c) can be multibyte. */        checking (c) can be multibyte. */
2182    
2183        case OP_NOTI:        case OP_NOTI:
2184        if (clen > 0 && c != d && c != fcc[d])        if (clen > 0 && c != d && c != fcc[d])
2185          { ADD_NEW(state_offset + dlen + 1, 0); }          { ADD_NEW(state_offset + dlen + 1, 0); }
2186        break;        break;
2187    
# Line 2192  for (;;) Line 2194  for (;;)
2194        case OP_NOTPOSPLUSI:        case OP_NOTPOSPLUSI:
2195        caseless = TRUE;        caseless = TRUE;
2196        codevalue -= OP_STARI - OP_STAR;        codevalue -= OP_STARI - OP_STAR;
2197    
2198        /* Fall through */        /* Fall through */
2199        case OP_PLUS:        case OP_PLUS:
2200        case OP_MINPLUS:        case OP_MINPLUS:
# Line 2560  for (;;) Line 2562  for (;;)
2562              cb.capture_top      = 1;              cb.capture_top      = 1;
2563              cb.capture_last     = -1;              cb.capture_last     = -1;
2564              cb.callout_data     = md->callout_data;              cb.callout_data     = md->callout_data;
2565              cb.mark             = NULL;   /* No (*MARK) support */              cb.mark             = NULL;   /* No (*MARK) support */
2566              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
2567              }              }
2568            if (rrc > 0) break;                      /* Fail this thread */            if (rrc > 0) break;                      /* Fail this thread */
# Line 2587  for (;;) Line 2589  for (;;)
2589            {            {
2590            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2591            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2592            if (md->recursive != NULL)            if (md->recursive != NULL)
2593              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2594            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2595            }            }
# Line 2626  for (;;) Line 2628  for (;;)
2628        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2629        case OP_RECURSE:        case OP_RECURSE:
2630          {          {
2631          dfa_recursion_info *ri;          dfa_recursion_info *ri;
2632          int local_offsets[1000];          int local_offsets[1000];
2633          int local_workspace[1000];          int local_workspace[1000];
2634          const uschar *callpat = start_code + GET(code, 1);          const uschar *callpat = start_code + GET(code, 1);
2635          int recno = (callpat == md->start_code)? 0 :          int recno = (callpat == md->start_code)? 0 :
2636            GET2(callpat, 1 + LINK_SIZE);            GET2(callpat, 1 + LINK_SIZE);
2637          int rc;          int rc;
2638    
2639          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2640    
2641          /* Check for repeating a recursion without advancing the subject          /* Check for repeating a recursion without advancing the subject
2642          pointer. This should catch convoluted mutual recursions. (Some simple          pointer. This should catch convoluted mutual recursions. (Some simple
2643          cases are caught at compile time.) */          cases are caught at compile time.) */
   
         for (ri = md->recursive; ri != NULL; ri = ri->prevrec)  
           if (recno == ri->group_num && ptr == ri->subject_position)  
             return PCRE_ERROR_RECURSELOOP;  
2644    
2645          /* Remember this recursion and where we started it so as to          for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2646              if (recno == ri->group_num && ptr == ri->subject_position)
2647                return PCRE_ERROR_RECURSELOOP;
2648    
2649            /* Remember this recursion and where we started it so as to
2650          catch infinite loops. */          catch infinite loops. */
2651    
2652          new_recursive.group_num = recno;          new_recursive.group_num = recno;
2653          new_recursive.subject_position = ptr;          new_recursive.subject_position = ptr;
2654          new_recursive.prevrec = md->recursive;          new_recursive.prevrec = md->recursive;
2655          md->recursive = &new_recursive;          md->recursive = &new_recursive;
2656    
2657          rc = internal_dfa_exec(          rc = internal_dfa_exec(
2658            md,                                   /* fixed match data */            md,                                   /* fixed match data */
# Line 2665  for (;;) Line 2667  for (;;)
2667    
2668          md->recursive = new_recursive.prevrec;  /* Done this recursion */          md->recursive = new_recursive.prevrec;  /* Done this recursion */
2669    
2670          DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,          DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2671            rc));            rc));
2672    
2673          /* Ran out of internal offsets */          /* Ran out of internal offsets */
# Line 2703  for (;;) Line 2705  for (;;)
2705        case OP_SBRAPOS:        case OP_SBRAPOS:
2706        case OP_CBRAPOS:        case OP_CBRAPOS:
2707        case OP_SCBRAPOS:        case OP_SCBRAPOS:
2708        case OP_BRAPOSZERO:        case OP_BRAPOSZERO:
2709          {          {
2710          int charcount, matched_count;          int charcount, matched_count;
2711          const uschar *local_ptr = ptr;          const uschar *local_ptr = ptr;
2712          BOOL allow_zero;          BOOL allow_zero;
2713    
2714          if (codevalue == OP_BRAPOSZERO)          if (codevalue == OP_BRAPOSZERO)
2715            {            {
2716            allow_zero = TRUE;            allow_zero = TRUE;
2717            codevalue = *(++code);  /* Codevalue will be one of above BRAs */            codevalue = *(++code);  /* Codevalue will be one of above BRAs */
2718            }            }
2719          else allow_zero = FALSE;          else allow_zero = FALSE;
2720    
2721          /* Loop to match the subpattern as many times as possible as if it were          /* Loop to match the subpattern as many times as possible as if it were
2722          a complete pattern. */          a complete pattern. */
2723    
2724          for (matched_count = 0;; matched_count++)          for (matched_count = 0;; matched_count++)
2725            {            {
2726            int local_offsets[2];            int local_offsets[2];
2727            int local_workspace[1000];            int local_workspace[1000];
2728    
2729            int rc = internal_dfa_exec(            int rc = internal_dfa_exec(
2730              md,                                   /* fixed match data */              md,                                   /* fixed match data */
2731              code,                                 /* this subexpression's code */              code,                                 /* this subexpression's code */
# Line 2734  for (;;) Line 2736  for (;;)
2736              local_workspace,                      /* workspace vector */              local_workspace,                      /* workspace vector */
2737              sizeof(local_workspace)/sizeof(int),  /* size of same */              sizeof(local_workspace)/sizeof(int),  /* size of same */
2738              rlevel);                              /* function recursion level */              rlevel);                              /* function recursion level */
2739    
2740            /* Failed to match */            /* Failed to match */
2741    
2742            if (rc < 0)            if (rc < 0)
2743              {              {
2744              if (rc != PCRE_ERROR_NOMATCH) return rc;              if (rc != PCRE_ERROR_NOMATCH) return rc;
2745              break;              break;
2746              }              }
2747    
2748            /* Matched: break the loop if zero characters matched. */            /* Matched: break the loop if zero characters matched. */
2749    
2750            charcount = local_offsets[1] - local_offsets[0];            charcount = local_offsets[1] - local_offsets[0];
2751            if (charcount == 0) break;            if (charcount == 0) break;
2752            local_ptr += charcount;    /* Advance temporary position ptr */            local_ptr += charcount;    /* Advance temporary position ptr */
2753            }            }
2754    
2755          /* At this point we have matched the subpattern matched_count          /* At this point we have matched the subpattern matched_count
2756          times, and local_ptr is pointing to the character after the end of the          times, and local_ptr is pointing to the character after the end of the
2757          last match. */          last match. */
2758    
2759          if (matched_count > 0 || allow_zero)          if (matched_count > 0 || allow_zero)
2760            {            {
2761            const uschar *end_subpattern = code;            const uschar *end_subpattern = code;
2762            int next_state_offset;            int next_state_offset;
2763    
2764            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
2765              while (*end_subpattern == OP_ALT);              while (*end_subpattern == OP_ALT);
2766            next_state_offset =            next_state_offset =
# Line 2779  for (;;) Line 2781  for (;;)
2781              {              {
2782              const uschar *p = ptr;              const uschar *p = ptr;
2783              const uschar *pp = local_ptr;              const uschar *pp = local_ptr;
2784              charcount = pp - p;              charcount = pp - p;
2785              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
2786              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2787              }              }
2788            }            }
2789          }          }
2790        break;        break;
2791    
2792        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2793        case OP_ONCE:        case OP_ONCE:
2794          case OP_ONCE_NC:
2795          {          {
2796          int local_offsets[2];          int local_offsets[2];
2797          int local_workspace[1000];          int local_workspace[1000];
# Line 2892  for (;;) Line 2895  for (;;)
2895          cb.capture_top      = 1;          cb.capture_top      = 1;
2896          cb.capture_last     = -1;          cb.capture_last     = -1;
2897          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
2898          cb.mark             = NULL;   /* No (*MARK) support */          cb.mark             = NULL;   /* No (*MARK) support */
2899          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
2900          }          }
2901        if (rrc == 0)        if (rrc == 0)
# Line 3143  back the character offset. */ Line 3146  back the character offset. */
3146  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3147  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3148    {    {
3149    int erroroffset;    int erroroffset;
3150    int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset);    int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset);
3151    if (errorcode != 0)    if (errorcode != 0)
3152      {      {
# Line 3151  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3154  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3154        {        {
3155        offsets[0] = erroroffset;        offsets[0] = erroroffset;
3156        offsets[1] = errorcode;        offsets[1] = errorcode;
3157        }        }
3158      return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?      return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
3159        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3160      }      }
3161    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
3162          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
3163      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
3164    }    }
3165  #endif  #endif
# Line 3395  for (;;) Line 3398  for (;;)
3398    /* OK, now we can do the business */    /* OK, now we can do the business */
3399    
3400    md->start_used_ptr = current_subject;    md->start_used_ptr = current_subject;
3401    md->recursive = NULL;    md->recursive = NULL;
3402    
3403    rc = internal_dfa_exec(    rc = internal_dfa_exec(
3404      md,                                /* fixed match data */      md,                                /* fixed match data */

Legend:
Removed from v.645  
changed lines
  Added in v.723

  ViewVC Help
Powered by ViewVC 1.1.5