/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 602 by ph10, Wed May 25 08:29:03 2011 UTC revision 723 by ph10, Sat Oct 8 15:55:23 2011 UTC
# Line 158  static const uschar coptable[] = { Line 158  static const uschar coptable[] = {
158    0,                             /* Ket                                    */    0,                             /* Ket                                    */
159    0,                             /* KetRmax                                */    0,                             /* KetRmax                                */
160    0,                             /* KetRmin                                */    0,                             /* KetRmin                                */
161      0,                             /* KetRpos                                */
162      0,                             /* Reverse                                */
163    0,                             /* Assert                                 */    0,                             /* Assert                                 */
164    0,                             /* Assert not                             */    0,                             /* Assert not                             */
165    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
166    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
167    0,                             /* Reverse                                */    0, 0,                          /* ONCE, ONCE_NC                          */
168    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */    0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
169    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
170    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
171    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
172    0,                             /* DEF                                    */    0,                             /* DEF                                    */
173    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
174    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG,                */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
175    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
176    0, 0, 0, 0, 0                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */    0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
177      0, 0                           /* CLOSE, SKIPZERO  */
178  };  };
179    
180  /* This table identifies those opcodes that inspect a character. It is used to  /* This table identifies those opcodes that inspect a character. It is used to
# Line 224  static const uschar poptable[] = { Line 227  static const uschar poptable[] = {
227    0,                             /* Ket                                    */    0,                             /* Ket                                    */
228    0,                             /* KetRmax                                */    0,                             /* KetRmax                                */
229    0,                             /* KetRmin                                */    0,                             /* KetRmin                                */
230      0,                             /* KetRpos                                */
231      0,                             /* Reverse                                */
232    0,                             /* Assert                                 */    0,                             /* Assert                                 */
233    0,                             /* Assert not                             */    0,                             /* Assert not                             */
234    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
235    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
236    0,                             /* Reverse                                */    0, 0,                          /* ONCE, ONCE_NC                          */
237    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */    0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
238    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
239    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
240    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
241    0,                             /* DEF                                    */    0,                             /* DEF                                    */
242    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
243    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG,                */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
244    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
245    0, 0, 0, 0, 0                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */    0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
246      0, 0                           /* CLOSE, SKIPZERO                        */
247  };  };
248    
249  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
# Line 324  Arguments: Line 330  Arguments:
330    workspace         vector of workspace    workspace         vector of workspace
331    wscount           size of same    wscount           size of same
332    rlevel            function call recursion level    rlevel            function call recursion level
   recursing         regex recursive call level  
333    
334  Returns:            > 0 => number of match offset pairs placed in offsets  Returns:            > 0 => number of match offset pairs placed in offsets
335                      = 0 => offsets overflowed; longest matches are present                      = 0 => offsets overflowed; longest matches are present
# Line 388  internal_dfa_exec( Line 393  internal_dfa_exec(
393    int offsetcount,    int offsetcount,
394    int *workspace,    int *workspace,
395    int wscount,    int wscount,
396    int  rlevel,    int  rlevel)
   int  recursing)  
397  {  {
398  stateblock *active_states, *new_states, *temp_states;  stateblock *active_states, *new_states, *temp_states;
399  stateblock *next_active_state, *next_new_state;  stateblock *next_active_state, *next_new_state;
# Line 398  const uschar *ctypes, *lcc, *fcc; Line 402  const uschar *ctypes, *lcc, *fcc;
402  const uschar *ptr;  const uschar *ptr;
403  const uschar *end_code, *first_op;  const uschar *end_code, *first_op;
404    
405    dfa_recursion_info new_recursive;
406    
407  int active_count, new_count, match_count;  int active_count, new_count, match_count;
408    
409  /* Some fields in the md block are frequently referenced, so we load them into  /* Some fields in the md block are frequently referenced, so we load them into
# Line 421  wscount = (wscount - (wscount % (INTS_PE Line 427  wscount = (wscount - (wscount % (INTS_PE
427            (2 * INTS_PER_STATEBLOCK);            (2 * INTS_PER_STATEBLOCK);
428    
429  DPRINTF(("\n%.*s---------------------\n"  DPRINTF(("\n%.*s---------------------\n"
430    "%.*sCall to internal_dfa_exec f=%d r=%d\n",    "%.*sCall to internal_dfa_exec f=%d\n",
431    rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));    rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
432    
433  ctypes = md->tables + ctypes_offset;  ctypes = md->tables + ctypes_offset;
434  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
# Line 435  next_new_state = new_states = active_sta Line 441  next_new_state = new_states = active_sta
441  new_count = 0;  new_count = 0;
442    
443  first_op = this_start_code + 1 + LINK_SIZE +  first_op = this_start_code + 1 + LINK_SIZE +
444    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
445        *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? 2:0);
446    
447  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
448  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
# Line 534  else Line 541  else
541    else    else
542      {      {
543      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
544        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
545            *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)?
546            2:0);
547      do      do
548        {        {
549        ADD_NEW((int)(end_code - start_code + length), 0);        ADD_NEW((int)(end_code - start_code + length), 0);
# Line 614  for (;;) Line 623  for (;;)
623    for (i = 0; i < active_count; i++)    for (i = 0; i < active_count; i++)
624      {      {
625      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
626      BOOL caseless = FALSE;      BOOL caseless = FALSE;
627      const uschar *code;      const uschar *code;
628      int state_offset = current_state->offset;      int state_offset = current_state->offset;
629      int count, codevalue, rrc;      int count, codevalue, rrc;
# Line 731  for (;;) Line 740  for (;;)
740    
741  /* ========================================================================== */  /* ========================================================================== */
742        /* Reached a closing bracket. If not at the end of the pattern, carry        /* Reached a closing bracket. If not at the end of the pattern, carry
743        on with the next opcode. Otherwise, unless we have an empty string and        on with the next opcode. For repeating opcodes, also add the repeat
744          state. Note that KETRPOS will always be encountered at the end of the
745          subpattern, because the possessive subpattern repeats are always handled
746          using recursive calls. Thus, it never adds any new states.
747    
748          At the end of the (sub)pattern, unless we have an empty string and
749        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
750        start of the subject, save the match data, shifting up all previous        start of the subject, save the match data, shifting up all previous
751        matches so we always have the longest first. */        matches so we always have the longest first. */
# Line 739  for (;;) Line 753  for (;;)
753        case OP_KET:        case OP_KET:
754        case OP_KETRMIN:        case OP_KETRMIN:
755        case OP_KETRMAX:        case OP_KETRMAX:
756          case OP_KETRPOS:
757        if (code != end_code)        if (code != end_code)
758          {          {
759          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
# Line 755  for (;;) Line 770  for (;;)
770                  current_subject > start_subject + md->start_offset)))                  current_subject > start_subject + md->start_offset)))
771            {            {
772            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
773              else if (match_count > 0 && ++match_count * 2 >= offsetcount)              else if (match_count > 0 && ++match_count * 2 > offsetcount)
774                match_count = 0;                match_count = 0;
775            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
776            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
# Line 2166  for (;;) Line 2181  for (;;)
2181        checking (c) can be multibyte. */        checking (c) can be multibyte. */
2182    
2183        case OP_NOTI:        case OP_NOTI:
2184        if (clen > 0 && c != d && c != fcc[d])        if (clen > 0 && c != d && c != fcc[d])
2185          { ADD_NEW(state_offset + dlen + 1, 0); }          { ADD_NEW(state_offset + dlen + 1, 0); }
2186        break;        break;
2187    
# Line 2179  for (;;) Line 2194  for (;;)
2194        case OP_NOTPOSPLUSI:        case OP_NOTPOSPLUSI:
2195        caseless = TRUE;        caseless = TRUE;
2196        codevalue -= OP_STARI - OP_STAR;        codevalue -= OP_STARI - OP_STAR;
2197    
2198        /* Fall through */        /* Fall through */
2199        case OP_PLUS:        case OP_PLUS:
2200        case OP_MINPLUS:        case OP_MINPLUS:
# Line 2508  for (;;) Line 2523  for (;;)
2523            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2524            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2525            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2526            rlevel,                               /* function recursion level */            rlevel);                              /* function recursion level */
           recursing);                           /* pass on regex recursion */  
2527    
2528          if (rc == PCRE_ERROR_DFA_UITEM) return rc;          if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2529          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
# Line 2548  for (;;) Line 2562  for (;;)
2562              cb.capture_top      = 1;              cb.capture_top      = 1;
2563              cb.capture_last     = -1;              cb.capture_last     = -1;
2564              cb.callout_data     = md->callout_data;              cb.callout_data     = md->callout_data;
2565                cb.mark             = NULL;   /* No (*MARK) support */
2566              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
2567              }              }
2568            if (rrc > 0) break;                      /* Fail this thread */            if (rrc > 0) break;                      /* Fail this thread */
# Line 2574  for (;;) Line 2589  for (;;)
2589            {            {
2590            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2591            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2592            if (recursing > 0)            if (md->recursive != NULL)
2593              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2594            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2595            }            }
# Line 2598  for (;;) Line 2613  for (;;)
2613              sizeof(local_offsets)/sizeof(int),    /* size of same */              sizeof(local_offsets)/sizeof(int),    /* size of same */
2614              local_workspace,                      /* workspace vector */              local_workspace,                      /* workspace vector */
2615              sizeof(local_workspace)/sizeof(int),  /* size of same */              sizeof(local_workspace)/sizeof(int),  /* size of same */
2616              rlevel,                               /* function recursion level */              rlevel);                              /* function recursion level */
             recursing);                           /* pass on regex recursion */  
2617    
2618            if (rc == PCRE_ERROR_DFA_UITEM) return rc;            if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2619            if ((rc >= 0) ==            if ((rc >= 0) ==
# Line 2614  for (;;) Line 2628  for (;;)
2628        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2629        case OP_RECURSE:        case OP_RECURSE:
2630          {          {
2631            dfa_recursion_info *ri;
2632          int local_offsets[1000];          int local_offsets[1000];
2633          int local_workspace[1000];          int local_workspace[1000];
2634            const uschar *callpat = start_code + GET(code, 1);
2635            int recno = (callpat == md->start_code)? 0 :
2636              GET2(callpat, 1 + LINK_SIZE);
2637          int rc;          int rc;
2638    
2639          DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP,          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2640            recursing + 1));  
2641            /* Check for repeating a recursion without advancing the subject
2642            pointer. This should catch convoluted mutual recursions. (Some simple
2643            cases are caught at compile time.) */
2644    
2645            for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2646              if (recno == ri->group_num && ptr == ri->subject_position)
2647                return PCRE_ERROR_RECURSELOOP;
2648    
2649            /* Remember this recursion and where we started it so as to
2650            catch infinite loops. */
2651    
2652            new_recursive.group_num = recno;
2653            new_recursive.subject_position = ptr;
2654            new_recursive.prevrec = md->recursive;
2655            md->recursive = &new_recursive;
2656    
2657          rc = internal_dfa_exec(          rc = internal_dfa_exec(
2658            md,                                   /* fixed match data */            md,                                   /* fixed match data */
2659            start_code + GET(code, 1),            /* this subexpression's code */            callpat,                              /* this subexpression's code */
2660            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2661            (int)(ptr - start_subject),           /* start offset */            (int)(ptr - start_subject),           /* start offset */
2662            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2663            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2664            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2665            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2666            rlevel,                               /* function recursion level */            rlevel);                              /* function recursion level */
           recursing + 1);                       /* regex recurse level */  
2667    
2668          DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP,          md->recursive = new_recursive.prevrec;  /* Done this recursion */
2669            recursing + 1, rc));  
2670            DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2671              rc));
2672    
2673          /* Ran out of internal offsets */          /* Ran out of internal offsets */
2674    
# Line 2667  for (;;) Line 2701  for (;;)
2701        break;        break;
2702    
2703        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2704          case OP_BRAPOS:
2705          case OP_SBRAPOS:
2706          case OP_CBRAPOS:
2707          case OP_SCBRAPOS:
2708          case OP_BRAPOSZERO:
2709            {
2710            int charcount, matched_count;
2711            const uschar *local_ptr = ptr;
2712            BOOL allow_zero;
2713    
2714            if (codevalue == OP_BRAPOSZERO)
2715              {
2716              allow_zero = TRUE;
2717              codevalue = *(++code);  /* Codevalue will be one of above BRAs */
2718              }
2719            else allow_zero = FALSE;
2720    
2721            /* Loop to match the subpattern as many times as possible as if it were
2722            a complete pattern. */
2723    
2724            for (matched_count = 0;; matched_count++)
2725              {
2726              int local_offsets[2];
2727              int local_workspace[1000];
2728    
2729              int rc = internal_dfa_exec(
2730                md,                                   /* fixed match data */
2731                code,                                 /* this subexpression's code */
2732                local_ptr,                            /* where we currently are */
2733                (int)(ptr - start_subject),           /* start offset */
2734                local_offsets,                        /* offset vector */
2735                sizeof(local_offsets)/sizeof(int),    /* size of same */
2736                local_workspace,                      /* workspace vector */
2737                sizeof(local_workspace)/sizeof(int),  /* size of same */
2738                rlevel);                              /* function recursion level */
2739    
2740              /* Failed to match */
2741    
2742              if (rc < 0)
2743                {
2744                if (rc != PCRE_ERROR_NOMATCH) return rc;
2745                break;
2746                }
2747    
2748              /* Matched: break the loop if zero characters matched. */
2749    
2750              charcount = local_offsets[1] - local_offsets[0];
2751              if (charcount == 0) break;
2752              local_ptr += charcount;    /* Advance temporary position ptr */
2753              }
2754    
2755            /* At this point we have matched the subpattern matched_count
2756            times, and local_ptr is pointing to the character after the end of the
2757            last match. */
2758    
2759            if (matched_count > 0 || allow_zero)
2760              {
2761              const uschar *end_subpattern = code;
2762              int next_state_offset;
2763    
2764              do { end_subpattern += GET(end_subpattern, 1); }
2765                while (*end_subpattern == OP_ALT);
2766              next_state_offset =
2767                (int)(end_subpattern - start_code + LINK_SIZE + 1);
2768    
2769              /* Optimization: if there are no more active states, and there
2770              are no new states yet set up, then skip over the subject string
2771              right here, to save looping. Otherwise, set up the new state to swing
2772              into action when the end of the matched substring is reached. */
2773    
2774              if (i + 1 >= active_count && new_count == 0)
2775                {
2776                ptr = local_ptr;
2777                clen = 0;
2778                ADD_NEW(next_state_offset, 0);
2779                }
2780              else
2781                {
2782                const uschar *p = ptr;
2783                const uschar *pp = local_ptr;
2784                charcount = pp - p;
2785                while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
2786                ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2787                }
2788              }
2789            }
2790          break;
2791    
2792          /*-----------------------------------------------------------------*/
2793        case OP_ONCE:        case OP_ONCE:
2794          case OP_ONCE_NC:
2795          {          {
2796          int local_offsets[2];          int local_offsets[2];
2797          int local_workspace[1000];          int local_workspace[1000];
# Line 2681  for (;;) Line 2805  for (;;)
2805            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2806            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2807            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2808            rlevel,                               /* function recursion level */            rlevel);                              /* function recursion level */
           recursing);                           /* pass on regex recursion */  
2809    
2810          if (rc >= 0)          if (rc >= 0)
2811            {            {
# Line 2716  for (;;) Line 2839  for (;;)
2839            /* Optimization: if there are no more active states, and there            /* Optimization: if there are no more active states, and there
2840            are no new states yet set up, then skip over the subject string            are no new states yet set up, then skip over the subject string
2841            right here, to save looping. Otherwise, set up the new state to swing            right here, to save looping. Otherwise, set up the new state to swing
2842            into action when the end of the substring is reached. */            into action when the end of the matched substring is reached. */
2843    
2844            else if (i + 1 >= active_count && new_count == 0)            else if (i + 1 >= active_count && new_count == 0)
2845              {              {
# Line 2746  for (;;) Line 2869  for (;;)
2869              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
2870                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
2871              }              }
   
2872            }            }
2873          else if (rc != PCRE_ERROR_NOMATCH) return rc;          else if (rc != PCRE_ERROR_NOMATCH) return rc;
2874          }          }
# Line 2773  for (;;) Line 2895  for (;;)
2895          cb.capture_top      = 1;          cb.capture_top      = 1;
2896          cb.capture_last     = -1;          cb.capture_last     = -1;
2897          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
2898            cb.mark             = NULL;   /* No (*MARK) support */
2899          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
2900          }          }
2901        if (rrc == 0)        if (rrc == 0)
# Line 3023  back the character offset. */ Line 3146  back the character offset. */
3146  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3147  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3148    {    {
3149    int errorcode;    int erroroffset;
3150    int tb = _pcre_valid_utf8((uschar *)subject, length, &errorcode);    int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset);
3151    if (tb >= 0)    if (errorcode != 0)
3152      {      {
3153      if (offsetcount >= 2)      if (offsetcount >= 2)
3154        {        {
3155        offsets[0] = tb;        offsets[0] = erroroffset;
3156        offsets[1] = errorcode;        offsets[1] = errorcode;
3157        }        }
3158      return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?      return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
3159        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
     }  
   if (start_offset > 0 && start_offset < length)  
     {  
     tb = ((USPTR)subject)[start_offset] & 0xc0;  
     if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;  
3160      }      }
3161      if (start_offset > 0 && start_offset < length &&
3162            (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
3163        return PCRE_ERROR_BADUTF8_OFFSET;
3164    }    }
3165  #endif  #endif
3166    
# Line 3277  for (;;) Line 3398  for (;;)
3398    /* OK, now we can do the business */    /* OK, now we can do the business */
3399    
3400    md->start_used_ptr = current_subject;    md->start_used_ptr = current_subject;
3401      md->recursive = NULL;
3402    
3403    rc = internal_dfa_exec(    rc = internal_dfa_exec(
3404      md,                                /* fixed match data */      md,                                /* fixed match data */
# Line 3287  for (;;) Line 3409  for (;;)
3409      offsetcount,                       /* size of same */      offsetcount,                       /* size of same */
3410      workspace,                         /* workspace vector */      workspace,                         /* workspace vector */
3411      wscount,                           /* size of same */      wscount,                           /* size of same */
3412      0,                                 /* function recurse level */      0);                                /* function recurse level */
     0);                                /* regex recurse level */  
3413    
3414    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
3415    on only if not anchored. */    on only if not anchored. */

Legend:
Removed from v.602  
changed lines
  Added in v.723

  ViewVC Help
Powered by ViewVC 1.1.5