/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 602 by ph10, Wed May 25 08:29:03 2011 UTC revision 680 by ph10, Tue Sep 6 09:15:54 2011 UTC
# Line 158  static const uschar coptable[] = { Line 158  static const uschar coptable[] = {
158    0,                             /* Ket                                    */    0,                             /* Ket                                    */
159    0,                             /* KetRmax                                */    0,                             /* KetRmax                                */
160    0,                             /* KetRmin                                */    0,                             /* KetRmin                                */
161      0,                             /* KetRpos                                */
162      0,                             /* Reverse                                */
163    0,                             /* Assert                                 */    0,                             /* Assert                                 */
164    0,                             /* Assert not                             */    0,                             /* Assert not                             */
165    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
166    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
167    0,                             /* Reverse                                */    0, 0, 0, 0, 0, 0,              /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, COND */
168    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
   0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */  
169    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
170    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
171    0,                             /* DEF                                    */    0,                             /* DEF                                    */
172    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
173    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG,                */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
174    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
175    0, 0, 0, 0, 0                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */    0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
176      0, 0                           /* CLOSE, SKIPZERO  */
177  };  };
178    
179  /* This table identifies those opcodes that inspect a character. It is used to  /* This table identifies those opcodes that inspect a character. It is used to
# Line 224  static const uschar poptable[] = { Line 226  static const uschar poptable[] = {
226    0,                             /* Ket                                    */    0,                             /* Ket                                    */
227    0,                             /* KetRmax                                */    0,                             /* KetRmax                                */
228    0,                             /* KetRmin                                */    0,                             /* KetRmin                                */
229      0,                             /* KetRpos                                */
230      0,                             /* Reverse                                */
231    0,                             /* Assert                                 */    0,                             /* Assert                                 */
232    0,                             /* Assert not                             */    0,                             /* Assert not                             */
233    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
234    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
235    0,                             /* Reverse                                */    0, 0, 0, 0, 0, 0,              /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, COND */
236    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
   0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */  
237    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
238    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
239    0,                             /* DEF                                    */    0,                             /* DEF                                    */
240    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
241    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG,                */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
242    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
243    0, 0, 0, 0, 0                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */    0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
244      0, 0                           /* CLOSE, SKIPZERO                        */
245  };  };
246    
247  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
# Line 324  Arguments: Line 328  Arguments:
328    workspace         vector of workspace    workspace         vector of workspace
329    wscount           size of same    wscount           size of same
330    rlevel            function call recursion level    rlevel            function call recursion level
   recursing         regex recursive call level  
331    
332  Returns:            > 0 => number of match offset pairs placed in offsets  Returns:            > 0 => number of match offset pairs placed in offsets
333                      = 0 => offsets overflowed; longest matches are present                      = 0 => offsets overflowed; longest matches are present
# Line 388  internal_dfa_exec( Line 391  internal_dfa_exec(
391    int offsetcount,    int offsetcount,
392    int *workspace,    int *workspace,
393    int wscount,    int wscount,
394    int  rlevel,    int  rlevel)
   int  recursing)  
395  {  {
396  stateblock *active_states, *new_states, *temp_states;  stateblock *active_states, *new_states, *temp_states;
397  stateblock *next_active_state, *next_new_state;  stateblock *next_active_state, *next_new_state;
# Line 398  const uschar *ctypes, *lcc, *fcc; Line 400  const uschar *ctypes, *lcc, *fcc;
400  const uschar *ptr;  const uschar *ptr;
401  const uschar *end_code, *first_op;  const uschar *end_code, *first_op;
402    
403    dfa_recursion_info new_recursive;
404    
405  int active_count, new_count, match_count;  int active_count, new_count, match_count;
406    
407  /* Some fields in the md block are frequently referenced, so we load them into  /* Some fields in the md block are frequently referenced, so we load them into
# Line 421  wscount = (wscount - (wscount % (INTS_PE Line 425  wscount = (wscount - (wscount % (INTS_PE
425            (2 * INTS_PER_STATEBLOCK);            (2 * INTS_PER_STATEBLOCK);
426    
427  DPRINTF(("\n%.*s---------------------\n"  DPRINTF(("\n%.*s---------------------\n"
428    "%.*sCall to internal_dfa_exec f=%d r=%d\n",    "%.*sCall to internal_dfa_exec f=%d\n",
429    rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));    rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
430    
431  ctypes = md->tables + ctypes_offset;  ctypes = md->tables + ctypes_offset;
432  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
# Line 435  next_new_state = new_states = active_sta Line 439  next_new_state = new_states = active_sta
439  new_count = 0;  new_count = 0;
440    
441  first_op = this_start_code + 1 + LINK_SIZE +  first_op = this_start_code + 1 + LINK_SIZE +
442    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
443        *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? 2:0);
444    
445  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
446  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
# Line 534  else Line 539  else
539    else    else
540      {      {
541      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
542        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
543            *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)?
544            2:0);
545      do      do
546        {        {
547        ADD_NEW((int)(end_code - start_code + length), 0);        ADD_NEW((int)(end_code - start_code + length), 0);
# Line 614  for (;;) Line 621  for (;;)
621    for (i = 0; i < active_count; i++)    for (i = 0; i < active_count; i++)
622      {      {
623      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
624      BOOL caseless = FALSE;      BOOL caseless = FALSE;
625      const uschar *code;      const uschar *code;
626      int state_offset = current_state->offset;      int state_offset = current_state->offset;
627      int count, codevalue, rrc;      int count, codevalue, rrc;
# Line 731  for (;;) Line 738  for (;;)
738    
739  /* ========================================================================== */  /* ========================================================================== */
740        /* Reached a closing bracket. If not at the end of the pattern, carry        /* Reached a closing bracket. If not at the end of the pattern, carry
741        on with the next opcode. Otherwise, unless we have an empty string and        on with the next opcode. For repeating opcodes, also add the repeat
742          state. Note that KETRPOS will always be encountered at the end of the
743          subpattern, because the possessive subpattern repeats are always handled
744          using recursive calls. Thus, it never adds any new states.
745    
746          At the end of the (sub)pattern, unless we have an empty string and
747        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
748        start of the subject, save the match data, shifting up all previous        start of the subject, save the match data, shifting up all previous
749        matches so we always have the longest first. */        matches so we always have the longest first. */
# Line 739  for (;;) Line 751  for (;;)
751        case OP_KET:        case OP_KET:
752        case OP_KETRMIN:        case OP_KETRMIN:
753        case OP_KETRMAX:        case OP_KETRMAX:
754          case OP_KETRPOS:
755        if (code != end_code)        if (code != end_code)
756          {          {
757          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
# Line 755  for (;;) Line 768  for (;;)
768                  current_subject > start_subject + md->start_offset)))                  current_subject > start_subject + md->start_offset)))
769            {            {
770            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
771              else if (match_count > 0 && ++match_count * 2 >= offsetcount)              else if (match_count > 0 && ++match_count * 2 > offsetcount)
772                match_count = 0;                match_count = 0;
773            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
774            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
# Line 2166  for (;;) Line 2179  for (;;)
2179        checking (c) can be multibyte. */        checking (c) can be multibyte. */
2180    
2181        case OP_NOTI:        case OP_NOTI:
2182        if (clen > 0 && c != d && c != fcc[d])        if (clen > 0 && c != d && c != fcc[d])
2183          { ADD_NEW(state_offset + dlen + 1, 0); }          { ADD_NEW(state_offset + dlen + 1, 0); }
2184        break;        break;
2185    
# Line 2179  for (;;) Line 2192  for (;;)
2192        case OP_NOTPOSPLUSI:        case OP_NOTPOSPLUSI:
2193        caseless = TRUE;        caseless = TRUE;
2194        codevalue -= OP_STARI - OP_STAR;        codevalue -= OP_STARI - OP_STAR;
2195    
2196        /* Fall through */        /* Fall through */
2197        case OP_PLUS:        case OP_PLUS:
2198        case OP_MINPLUS:        case OP_MINPLUS:
# Line 2508  for (;;) Line 2521  for (;;)
2521            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2522            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2523            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2524            rlevel,                               /* function recursion level */            rlevel);                              /* function recursion level */
           recursing);                           /* pass on regex recursion */  
2525    
2526          if (rc == PCRE_ERROR_DFA_UITEM) return rc;          if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2527          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
# Line 2548  for (;;) Line 2560  for (;;)
2560              cb.capture_top      = 1;              cb.capture_top      = 1;
2561              cb.capture_last     = -1;              cb.capture_last     = -1;
2562              cb.callout_data     = md->callout_data;              cb.callout_data     = md->callout_data;
2563                cb.mark             = NULL;   /* No (*MARK) support */
2564              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
2565              }              }
2566            if (rrc > 0) break;                      /* Fail this thread */            if (rrc > 0) break;                      /* Fail this thread */
# Line 2574  for (;;) Line 2587  for (;;)
2587            {            {
2588            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2589            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2590            if (recursing > 0)            if (md->recursive != NULL)
2591              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2592            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2593            }            }
# Line 2598  for (;;) Line 2611  for (;;)
2611              sizeof(local_offsets)/sizeof(int),    /* size of same */              sizeof(local_offsets)/sizeof(int),    /* size of same */
2612              local_workspace,                      /* workspace vector */              local_workspace,                      /* workspace vector */
2613              sizeof(local_workspace)/sizeof(int),  /* size of same */              sizeof(local_workspace)/sizeof(int),  /* size of same */
2614              rlevel,                               /* function recursion level */              rlevel);                              /* function recursion level */
             recursing);                           /* pass on regex recursion */  
2615    
2616            if (rc == PCRE_ERROR_DFA_UITEM) return rc;            if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2617            if ((rc >= 0) ==            if ((rc >= 0) ==
# Line 2614  for (;;) Line 2626  for (;;)
2626        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2627        case OP_RECURSE:        case OP_RECURSE:
2628          {          {
2629            dfa_recursion_info *ri;
2630          int local_offsets[1000];          int local_offsets[1000];
2631          int local_workspace[1000];          int local_workspace[1000];
2632            const uschar *callpat = start_code + GET(code, 1);
2633            int recno = (callpat == md->start_code)? 0 :
2634              GET2(callpat, 1 + LINK_SIZE);
2635          int rc;          int rc;
2636    
2637          DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP,          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2638            recursing + 1));  
2639            /* Check for repeating a recursion without advancing the subject
2640            pointer. This should catch convoluted mutual recursions. (Some simple
2641            cases are caught at compile time.) */
2642    
2643            for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2644              if (recno == ri->group_num && ptr == ri->subject_position)
2645                return PCRE_ERROR_RECURSELOOP;
2646    
2647            /* Remember this recursion and where we started it so as to
2648            catch infinite loops. */
2649    
2650            new_recursive.group_num = recno;
2651            new_recursive.subject_position = ptr;
2652            new_recursive.prevrec = md->recursive;
2653            md->recursive = &new_recursive;
2654    
2655          rc = internal_dfa_exec(          rc = internal_dfa_exec(
2656            md,                                   /* fixed match data */            md,                                   /* fixed match data */
2657            start_code + GET(code, 1),            /* this subexpression's code */            callpat,                              /* this subexpression's code */
2658            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2659            (int)(ptr - start_subject),           /* start offset */            (int)(ptr - start_subject),           /* start offset */
2660            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2661            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2662            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2663            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2664            rlevel,                               /* function recursion level */            rlevel);                              /* function recursion level */
           recursing + 1);                       /* regex recurse level */  
2665    
2666          DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP,          md->recursive = new_recursive.prevrec;  /* Done this recursion */
2667            recursing + 1, rc));  
2668            DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2669              rc));
2670    
2671          /* Ran out of internal offsets */          /* Ran out of internal offsets */
2672    
# Line 2667  for (;;) Line 2699  for (;;)
2699        break;        break;
2700    
2701        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2702          case OP_BRAPOS:
2703          case OP_SBRAPOS:
2704          case OP_CBRAPOS:
2705          case OP_SCBRAPOS:
2706          case OP_BRAPOSZERO:
2707            {
2708            int charcount, matched_count;
2709            const uschar *local_ptr = ptr;
2710            BOOL allow_zero;
2711    
2712            if (codevalue == OP_BRAPOSZERO)
2713              {
2714              allow_zero = TRUE;
2715              codevalue = *(++code);  /* Codevalue will be one of above BRAs */
2716              }
2717            else allow_zero = FALSE;
2718    
2719            /* Loop to match the subpattern as many times as possible as if it were
2720            a complete pattern. */
2721    
2722            for (matched_count = 0;; matched_count++)
2723              {
2724              int local_offsets[2];
2725              int local_workspace[1000];
2726    
2727              int rc = internal_dfa_exec(
2728                md,                                   /* fixed match data */
2729                code,                                 /* this subexpression's code */
2730                local_ptr,                            /* where we currently are */
2731                (int)(ptr - start_subject),           /* start offset */
2732                local_offsets,                        /* offset vector */
2733                sizeof(local_offsets)/sizeof(int),    /* size of same */
2734                local_workspace,                      /* workspace vector */
2735                sizeof(local_workspace)/sizeof(int),  /* size of same */
2736                rlevel);                              /* function recursion level */
2737    
2738              /* Failed to match */
2739    
2740              if (rc < 0)
2741                {
2742                if (rc != PCRE_ERROR_NOMATCH) return rc;
2743                break;
2744                }
2745    
2746              /* Matched: break the loop if zero characters matched. */
2747    
2748              charcount = local_offsets[1] - local_offsets[0];
2749              if (charcount == 0) break;
2750              local_ptr += charcount;    /* Advance temporary position ptr */
2751              }
2752    
2753            /* At this point we have matched the subpattern matched_count
2754            times, and local_ptr is pointing to the character after the end of the
2755            last match. */
2756    
2757            if (matched_count > 0 || allow_zero)
2758              {
2759              const uschar *end_subpattern = code;
2760              int next_state_offset;
2761    
2762              do { end_subpattern += GET(end_subpattern, 1); }
2763                while (*end_subpattern == OP_ALT);
2764              next_state_offset =
2765                (int)(end_subpattern - start_code + LINK_SIZE + 1);
2766    
2767              /* Optimization: if there are no more active states, and there
2768              are no new states yet set up, then skip over the subject string
2769              right here, to save looping. Otherwise, set up the new state to swing
2770              into action when the end of the matched substring is reached. */
2771    
2772              if (i + 1 >= active_count && new_count == 0)
2773                {
2774                ptr = local_ptr;
2775                clen = 0;
2776                ADD_NEW(next_state_offset, 0);
2777                }
2778              else
2779                {
2780                const uschar *p = ptr;
2781                const uschar *pp = local_ptr;
2782                charcount = pp - p;
2783                while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
2784                ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2785                }
2786              }
2787            }
2788          break;
2789    
2790          /*-----------------------------------------------------------------*/
2791        case OP_ONCE:        case OP_ONCE:
2792          {          {
2793          int local_offsets[2];          int local_offsets[2];
# Line 2681  for (;;) Line 2802  for (;;)
2802            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2803            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2804            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2805            rlevel,                               /* function recursion level */            rlevel);                              /* function recursion level */
           recursing);                           /* pass on regex recursion */  
2806    
2807          if (rc >= 0)          if (rc >= 0)
2808            {            {
# Line 2716  for (;;) Line 2836  for (;;)
2836            /* Optimization: if there are no more active states, and there            /* Optimization: if there are no more active states, and there
2837            are no new states yet set up, then skip over the subject string            are no new states yet set up, then skip over the subject string
2838            right here, to save looping. Otherwise, set up the new state to swing            right here, to save looping. Otherwise, set up the new state to swing
2839            into action when the end of the substring is reached. */            into action when the end of the matched substring is reached. */
2840    
2841            else if (i + 1 >= active_count && new_count == 0)            else if (i + 1 >= active_count && new_count == 0)
2842              {              {
# Line 2746  for (;;) Line 2866  for (;;)
2866              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
2867                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
2868              }              }
   
2869            }            }
2870          else if (rc != PCRE_ERROR_NOMATCH) return rc;          else if (rc != PCRE_ERROR_NOMATCH) return rc;
2871          }          }
# Line 2773  for (;;) Line 2892  for (;;)
2892          cb.capture_top      = 1;          cb.capture_top      = 1;
2893          cb.capture_last     = -1;          cb.capture_last     = -1;
2894          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
2895            cb.mark             = NULL;   /* No (*MARK) support */
2896          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
2897          }          }
2898        if (rrc == 0)        if (rrc == 0)
# Line 3023  back the character offset. */ Line 3143  back the character offset. */
3143  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3144  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3145    {    {
3146    int errorcode;    int erroroffset;
3147    int tb = _pcre_valid_utf8((uschar *)subject, length, &errorcode);    int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset);
3148    if (tb >= 0)    if (errorcode != 0)
3149      {      {
3150      if (offsetcount >= 2)      if (offsetcount >= 2)
3151        {        {
3152        offsets[0] = tb;        offsets[0] = erroroffset;
3153        offsets[1] = errorcode;        offsets[1] = errorcode;
3154        }        }
3155      return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?      return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
3156        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
     }  
   if (start_offset > 0 && start_offset < length)  
     {  
     tb = ((USPTR)subject)[start_offset] & 0xc0;  
     if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;  
3157      }      }
3158      if (start_offset > 0 && start_offset < length &&
3159            (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
3160        return PCRE_ERROR_BADUTF8_OFFSET;
3161    }    }
3162  #endif  #endif
3163    
# Line 3277  for (;;) Line 3395  for (;;)
3395    /* OK, now we can do the business */    /* OK, now we can do the business */
3396    
3397    md->start_used_ptr = current_subject;    md->start_used_ptr = current_subject;
3398      md->recursive = NULL;
3399    
3400    rc = internal_dfa_exec(    rc = internal_dfa_exec(
3401      md,                                /* fixed match data */      md,                                /* fixed match data */
# Line 3287  for (;;) Line 3406  for (;;)
3406      offsetcount,                       /* size of same */      offsetcount,                       /* size of same */
3407      workspace,                         /* workspace vector */      workspace,                         /* workspace vector */
3408      wscount,                           /* size of same */      wscount,                           /* size of same */
3409      0,                                 /* function recurse level */      0);                                /* function recurse level */
     0);                                /* regex recurse level */  
3410    
3411    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
3412    on only if not anchored. */    on only if not anchored. */

Legend:
Removed from v.602  
changed lines
  Added in v.680

  ViewVC Help
Powered by ViewVC 1.1.5