/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 543 by ph10, Tue Jun 15 16:33:29 2010 UTC revision 597 by ph10, Mon May 2 17:08:52 2011 UTC
# Line 132  while (length-- > 0) Line 132  while (length-- > 0)
132  *          Match a back-reference                *  *          Match a back-reference                *
133  *************************************************/  *************************************************/
134    
135  /* If a back reference hasn't been set, the length that is passed is greater  /* Normally, if a back reference hasn't been set, the length that is passed is
136  than the number of characters left in the string, so the match fails.  negative, so the match always fails. However, in JavaScript compatibility mode,
137    the length passed is zero. Note that in caseless UTF-8 mode, the number of
138    subject bytes matched may be different to the number of reference bytes.
139    
140  Arguments:  Arguments:
141    offset      index into the offset vector    offset      index into the offset vector
142    eptr        points into the subject    eptr        pointer into the subject
143    length      length to be matched    length      length of reference to be matched (number of bytes)
144    md          points to match data block    md          points to match data block
145    ims         the ims flags    ims         the ims flags
146    
147  Returns:      TRUE if matched  Returns:      < 0 if not matched, otherwise the number of subject bytes matched
148  */  */
149    
150  static BOOL  static int
151  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
152    unsigned long int ims)    unsigned long int ims)
153  {  {
154  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR eptr_start = eptr;
155    register USPTR p = md->start_subject + md->offset_vector[offset];
156    
157  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
158  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 164  pchars(p, length, FALSE, md); Line 167  pchars(p, length, FALSE, md);
167  printf("\n");  printf("\n");
168  #endif  #endif
169    
170  /* Always fail if not enough characters left */  /* Always fail if reference not set (and not JavaScript compatible). */
171    
172  if (length > md->end_subject - eptr) return FALSE;  if (length < 0) return -1;
173    
174  /* Separate the caseless case for speed. In UTF-8 mode we can only do this  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175  properly if Unicode properties are supported. Otherwise, we can check only  properly if Unicode properties are supported. Otherwise, we can check only
# Line 178  if ((ims & PCRE_CASELESS) != 0) Line 181  if ((ims & PCRE_CASELESS) != 0)
181  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
182    if (md->utf8)    if (md->utf8)
183      {      {
184      USPTR endptr = eptr + length;      /* Match characters up to the end of the reference. NOTE: the number of
185      while (eptr < endptr)      bytes matched may differ, because there are some characters whose upper and
186        lower case versions code as different numbers of bytes. For example, U+023A
187        (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188        a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189        the latter. It is important, therefore, to check the length along the
190        reference, not along the subject (earlier code did this wrong). */
191    
192        USPTR endptr = p + length;
193        while (p < endptr)
194        {        {
195        int c, d;        int c, d;
196          if (eptr >= md->end_subject) return -1;
197        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
198        GETCHARINC(d, p);        GETCHARINC(d, p);
199        if (c != d && c != UCD_OTHERCASE(d)) return FALSE;        if (c != d && c != UCD_OTHERCASE(d)) return -1;
200        }        }
201      }      }
202    else    else
# Line 193  if ((ims & PCRE_CASELESS) != 0) Line 205  if ((ims & PCRE_CASELESS) != 0)
205    
206    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
207    is no UCP support. */    is no UCP support. */
208        {
209    while (length-- > 0)      if (eptr + length > md->end_subject) return -1;
210      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }      while (length-- > 0)
211          { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
212        }
213    }    }
214    
215  /* In the caseful case, we can just compare the bytes, whether or not we  /* In the caseful case, we can just compare the bytes, whether or not we
216  are in UTF-8 mode. */  are in UTF-8 mode. */
217    
218  else  else
219    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    {
220      if (eptr + length > md->end_subject) return -1;
221      while (length-- > 0) if (*p++ != *eptr++) return -1;
222      }
223    
224  return TRUE;  return eptr - eptr_start;
225  }  }
226    
227    
# Line 294  argument of match(), which never changes Line 311  argument of match(), which never changes
311    
312  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
313    {\    {\
314    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
315    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
316    frame->Xwhere = rw; \    frame->Xwhere = rw; \
317    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
# Line 422  immediately. The second one is used when Line 439  immediately. The second one is used when
439  the subject. */  the subject. */
440    
441  #define CHECK_PARTIAL()\  #define CHECK_PARTIAL()\
442    if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\    if (md->partial != 0 && eptr >= md->end_subject && \
443      {\        eptr > md->start_used_ptr) \
444      md->hitend = TRUE;\      { \
445      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\      md->hitend = TRUE; \
446        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
447      }      }
448    
449  #define SCHECK_PARTIAL()\  #define SCHECK_PARTIAL()\
450    if (md->partial != 0 && eptr > mstart)\    if (md->partial != 0 && eptr > md->start_used_ptr) \
451      {\      { \
452      md->hitend = TRUE;\      md->hitend = TRUE; \
453      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
454      }      }
455    
456    
# Line 488  heap storage. Set up the top-level frame Line 506  heap storage. Set up the top-level frame
506  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  heap whenever RMATCH() does a "recursion". See the macro definitions above. */
507    
508  #ifdef NO_RECURSE  #ifdef NO_RECURSE
509  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
510  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
511  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
512    
# Line 710  for (;;) Line 728  for (;;)
728      case OP_FAIL:      case OP_FAIL:
729      MRRETURN(MATCH_NOMATCH);      MRRETURN(MATCH_NOMATCH);
730    
731        /* COMMIT overrides PRUNE, SKIP, and THEN */
732    
733      case OP_COMMIT:      case OP_COMMIT:
734      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
735        ims, eptrb, flags, RM52);        ims, eptrb, flags, RM52);
736      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
737            rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
738            rrc != MATCH_THEN)
739          RRETURN(rrc);
740      MRRETURN(MATCH_COMMIT);      MRRETURN(MATCH_COMMIT);
741    
742        /* PRUNE overrides THEN */
743    
744      case OP_PRUNE:      case OP_PRUNE:
745      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
746        ims, eptrb, flags, RM51);        ims, eptrb, flags, RM51);
747      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
748      MRRETURN(MATCH_PRUNE);      MRRETURN(MATCH_PRUNE);
749    
750      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
751      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
752        ims, eptrb, flags, RM56);        ims, eptrb, flags, RM56);
753      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
754      md->mark = ecode + 2;      md->mark = ecode + 2;
755      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
756    
757        /* SKIP overrides PRUNE and THEN */
758    
759      case OP_SKIP:      case OP_SKIP:
760      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
761        ims, eptrb, flags, RM53);        ims, eptrb, flags, RM53);
762      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
763          RRETURN(rrc);
764      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
765      MRRETURN(MATCH_SKIP);      MRRETURN(MATCH_SKIP);
766    
767      case OP_SKIP_ARG:      case OP_SKIP_ARG:
768      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
769        ims, eptrb, flags, RM57);        ims, eptrb, flags, RM57);
770      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
771          RRETURN(rrc);
772    
773      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
774      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
# Line 749  for (;;) Line 778  for (;;)
778      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
779      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
780    
781        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
782        the alt that is at the start of the current branch. This makes it possible
783        to skip back past alternatives that precede the THEN within the current
784        branch. */
785    
786      case OP_THEN:      case OP_THEN:
787      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
788        ims, eptrb, flags, RM54);        ims, eptrb, flags, RM54);
789      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
790        md->start_match_ptr = ecode - GET(ecode, 1);
791      MRRETURN(MATCH_THEN);      MRRETURN(MATCH_THEN);
792    
793      case OP_THEN_ARG:      case OP_THEN_ARG:
794      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
795        ims, eptrb, flags, RM58);        offset_top, md, ims, eptrb, flags, RM58);
796      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
797      md->mark = ecode + 2;      md->start_match_ptr = ecode - GET(ecode, 1);
798        md->mark = ecode + LINK_SIZE + 2;
799      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
800    
801      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
# Line 804  for (;;) Line 840  for (;;)
840          {          {
841          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
842            ims, eptrb, flags, RM1);            ims, eptrb, flags, RM1);
843          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
844                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
845              RRETURN(rrc);
846          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
847          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
848          }          }
# Line 865  for (;;) Line 903  for (;;)
903    
904        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
905          eptrb, flags, RM2);          eptrb, flags, RM2);
906        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
907              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
908            RRETURN(rrc);
909        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
910        }        }
911      /* Control never reaches here. */      /* Control never reaches here. */
# Line 1066  for (;;) Line 1106  for (;;)
1106          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1107          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1108          }          }
1109        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)        else if (rrc != MATCH_NOMATCH &&
1110                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1111          {          {
1112          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1113          }          }
# Line 1194  for (;;) Line 1235  for (;;)
1235          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1236          break;          break;
1237          }          }
1238        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1239              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1240            RRETURN(rrc);
1241        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1242        }        }
1243      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1228  for (;;) Line 1271  for (;;)
1271          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1272          break;          break;
1273          }          }
1274        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1275              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1276            RRETURN(rrc);
1277        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1278        }        }
1279      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1365  for (;;) Line 1410  for (;;)
1410              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1411            MRRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1412            }            }
1413          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH &&
1414                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1415            {            {
1416            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1417            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1408  for (;;) Line 1454  for (;;)
1454          mstart = md->start_match_ptr;          mstart = md->start_match_ptr;
1455          break;          break;
1456          }          }
1457        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1458              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1459            RRETURN(rrc);
1460        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1461        }        }
1462      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1674  for (;;) Line 1722  for (;;)
1722        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1723          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1724        else        else
1725          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }          {
1726            if (md->noteol) MRRETURN(MATCH_NOMATCH);
1727            SCHECK_PARTIAL();
1728            }
1729        ecode++;        ecode++;
1730        break;        break;
1731        }        }
1732      else      else  /* Not multiline */
1733        {        {
1734        if (md->noteol) MRRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1735        if (!md->endonly)        if (!md->endonly) goto ASSERT_NL_OR_EOS;
         {  
         if (eptr != md->end_subject &&  
             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))  
           MRRETURN(MATCH_NOMATCH);  
         ecode++;  
         break;  
         }  
1736        }        }
1737    
1738      /* ... else fall through for endonly */      /* ... else fall through for endonly */
1739    
1740      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1741    
1742      case OP_EOD:      case OP_EOD:
1743      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1744        SCHECK_PARTIAL();
1745      ecode++;      ecode++;
1746      break;      break;
1747    
1748      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1749    
1750      case OP_EODN:      case OP_EODN:
1751      if (eptr != md->end_subject &&      ASSERT_NL_OR_EOS:
1752        if (eptr < md->end_subject &&
1753          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1754        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1755    
1756        /* Either at end of string or \n before end. */
1757    
1758        SCHECK_PARTIAL();
1759      ecode++;      ecode++;
1760      break;      break;
1761    
# Line 2218  for (;;) Line 2269  for (;;)
2269      loops). */      loops). */
2270    
2271      case OP_REF:      case OP_REF:
2272        {      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2273        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      ecode += 3;
       ecode += 3;  
2274    
2275        /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2276    
2277        (a) In the default, Perl-compatible state, set the length to be longer      (a) In the default, Perl-compatible state, set the length negative;
2278        than the amount of subject left; this ensures that every attempt at a      this ensures that every attempt at a match fails. We can't just fail
2279        match fails. We can't just fail here, because of the possibility of      here, because of the possibility of quantifiers with zero minima.
       quantifiers with zero minima.  
2280    
2281        (b) If the JavaScript compatibility flag is set, set the length to zero      (b) If the JavaScript compatibility flag is set, set the length to zero
2282        so that the back reference matches an empty string.      so that the back reference matches an empty string.
2283    
2284        Otherwise, set the length to the length of what was matched by the      Otherwise, set the length to the length of what was matched by the
2285        referenced subpattern. */      referenced subpattern. */
2286    
2287        if (offset >= offset_top || md->offset_vector[offset] < 0)      if (offset >= offset_top || md->offset_vector[offset] < 0)
2288          length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);        length = (md->jscript_compat)? 0 : -1;
2289        else      else
2290          length = md->offset_vector[offset+1] - md->offset_vector[offset];        length = md->offset_vector[offset+1] - md->offset_vector[offset];
2291    
2292        /* Set up for repetition, or handle the non-repeated case */      /* Set up for repetition, or handle the non-repeated case */
2293    
2294        switch (*ecode)      switch (*ecode)
2295          {        {
2296          case OP_CRSTAR:        case OP_CRSTAR:
2297          case OP_CRMINSTAR:        case OP_CRMINSTAR:
2298          case OP_CRPLUS:        case OP_CRPLUS:
2299          case OP_CRMINPLUS:        case OP_CRMINPLUS:
2300          case OP_CRQUERY:        case OP_CRQUERY:
2301          case OP_CRMINQUERY:        case OP_CRMINQUERY:
2302          c = *ecode++ - OP_CRSTAR;        c = *ecode++ - OP_CRSTAR;
2303          minimize = (c & 1) != 0;        minimize = (c & 1) != 0;
2304          min = rep_min[c];                 /* Pick up values from tables; */        min = rep_min[c];                 /* Pick up values from tables; */
2305          max = rep_max[c];                 /* zero for max => infinity */        max = rep_max[c];                 /* zero for max => infinity */
2306          if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2307          break;        break;
2308    
2309          case OP_CRRANGE:        case OP_CRRANGE:
2310          case OP_CRMINRANGE:        case OP_CRMINRANGE:
2311          minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2312          min = GET2(ecode, 1);        min = GET2(ecode, 1);
2313          max = GET2(ecode, 3);        max = GET2(ecode, 3);
2314          if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2315          ecode += 5;        ecode += 5;
2316          break;        break;
2317    
2318          default:               /* No repeat follows */        default:               /* No repeat follows */
2319          if (!match_ref(offset, eptr, length, md, ims))        if ((length = match_ref(offset, eptr, length, md, ims)) < 0)
2320            {          {
2321            CHECK_PARTIAL();          CHECK_PARTIAL();
2322            MRRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
           }  
         eptr += length;  
         continue;              /* With the main loop */  
2323          }          }
2324          eptr += length;
2325          continue;              /* With the main loop */
2326          }
2327    
2328        /* If the length of the reference is zero, just continue with the      /* Handle repeated back references. If the length of the reference is
2329        main loop. */      zero, just continue with the main loop. */
2330    
2331        if (length == 0) continue;      if (length == 0) continue;
2332    
2333        /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2334        the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
2335        address of eptr, so that eptr can be a register variable. */      address of eptr, so that eptr can be a register variable. */
2336    
2337        for (i = 1; i <= min; i++)      for (i = 1; i <= min; i++)
2338          {
2339          int slength;
2340          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2341          {          {
2342          if (!match_ref(offset, eptr, length, md, ims))          CHECK_PARTIAL();
2343            {          MRRETURN(MATCH_NOMATCH);
           CHECK_PARTIAL();  
           MRRETURN(MATCH_NOMATCH);  
           }  
         eptr += length;  
2344          }          }
2345          eptr += slength;
2346          }
2347    
2348        /* If min = max, continue at the same level without recursion.      /* If min = max, continue at the same level without recursion.
2349        They are not both allowed to be zero. */      They are not both allowed to be zero. */
2350    
2351        if (min == max) continue;      if (min == max) continue;
2352    
2353        /* If minimizing, keep trying and advancing the pointer */      /* If minimizing, keep trying and advancing the pointer */
2354    
2355        if (minimize)      if (minimize)
2356          {
2357          for (fi = min;; fi++)
2358          {          {
2359          for (fi = min;; fi++)          int slength;
2360            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2361            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2362            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2363            if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2364            {            {
2365            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);            CHECK_PARTIAL();
2366            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            MRRETURN(MATCH_NOMATCH);
           if (fi >= max) MRRETURN(MATCH_NOMATCH);  
           if (!match_ref(offset, eptr, length, md, ims))  
             {  
             CHECK_PARTIAL();  
             MRRETURN(MATCH_NOMATCH);  
             }  
           eptr += length;  
2367            }            }
2368          /* Control never gets here */          eptr += slength;
2369          }          }
2370          /* Control never gets here */
2371          }
2372    
2373        /* If maximizing, find the longest string and work backwards */      /* If maximizing, find the longest string and work backwards */
2374    
2375        else      else
2376          {
2377          pp = eptr;
2378          for (i = min; i < max; i++)
2379          {          {
2380          pp = eptr;          int slength;
2381          for (i = min; i < max; i++)          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
           {  
           if (!match_ref(offset, eptr, length, md, ims))  
             {  
             CHECK_PARTIAL();  
             break;  
             }  
           eptr += length;  
           }  
         while (eptr >= pp)  
2382            {            {
2383            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);            CHECK_PARTIAL();
2384            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            break;
           eptr -= length;  
2385            }            }
2386          MRRETURN(MATCH_NOMATCH);          eptr += slength;
2387            }
2388          while (eptr >= pp)
2389            {
2390            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2391            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2392            eptr -= length;
2393          }          }
2394          MRRETURN(MATCH_NOMATCH);
2395        }        }
2396      /* Control never gets here */      /* Control never gets here */
2397    
# Line 5600  if ((options & ~PUBLIC_EXEC_OPTIONS) != Line 5651  if ((options & ~PUBLIC_EXEC_OPTIONS) !=
5651  if (re == NULL || subject == NULL ||  if (re == NULL || subject == NULL ||
5652     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5653  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5654    if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5655    
5656  /* This information is for finding all the numbers associated with a given  /* This information is for finding all the numbers associated with a given
5657  name, for condition testing. */  name, for condition testing. */
# Line 5766  back the character offset. */ Line 5818  back the character offset. */
5818  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5819  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5820    {    {
5821    if (_pcre_valid_utf8((USPTR)subject, length) >= 0)    int tb;
5822      return PCRE_ERROR_BADUTF8;    if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0)
5823        return (tb == length && md->partial > 1)?
5824          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5825    if (start_offset > 0 && start_offset < length)    if (start_offset > 0 && start_offset < length)
5826      {      {
5827      int tb = ((USPTR)subject)[start_offset];      tb = ((USPTR)subject)[start_offset] & 0xc0;
5828      if (tb > 127)      if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
       {  
       tb &= 0xc0;  
       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;  
       }  
5829      }      }
5830    }    }
5831  #endif  #endif
# Line 5903  for(;;) Line 5953  for(;;)
5953    /* There are some optimizations that avoid running the match if a known    /* There are some optimizations that avoid running the match if a known
5954    starting point is not found, or if a known later character is not present.    starting point is not found, or if a known later character is not present.
5955    However, there is an option that disables these, for testing and for ensuring    However, there is an option that disables these, for testing and for ensuring
5956    that all callouts do actually occur. */    that all callouts do actually occur. The option can be set in the regex by
5957      (*NO_START_OPT) or passed in match-time options. */
5958    
5959    if ((options & PCRE_NO_START_OPTIMIZE) == 0)    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
5960      {      {
5961      /* Advance to a unique first byte if there is one. */      /* Advance to a unique first byte if there is one. */
5962    
# Line 5959  for(;;) Line 6010  for(;;)
6010        while (start_match < end_subject)        while (start_match < end_subject)
6011          {          {
6012          register unsigned int c = *start_match;          register unsigned int c = *start_match;
6013          if ((start_bits[c/8] & (1 << (c&7))) == 0)          if ((start_bits[c/8] & (1 << (c&7))) == 0)
6014            {            {
6015            start_match++;            start_match++;
6016  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
6017            if (utf8)            if (utf8)
6018              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
6019                start_match++;                start_match++;
6020  #endif  #endif
6021            }            }
6022          else break;          else break;
6023          }          }
# Line 6069  for(;;) Line 6120  for(;;)
6120    
6121    switch(rc)    switch(rc)
6122      {      {
6123      /* SKIP passes back the next starting point explicitly, but if it is the      /* SKIP passes back the next starting point explicitly, but if it is the
6124      same as the match we have just done, treat it as NOMATCH. */      same as the match we have just done, treat it as NOMATCH. */
6125    
6126      case MATCH_SKIP:      case MATCH_SKIP:
6127      if (md->start_match_ptr != start_match)      if (md->start_match_ptr != start_match)
6128        {        {
6129        new_start_match = md->start_match_ptr;        new_start_match = md->start_match_ptr;
6130        break;        break;
6131        }        }
6132      /* Fall through */      /* Fall through */
6133    
6134      /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched      /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6135      the SKIP's arg was not found. We also treat this as NOMATCH. */      the SKIP's arg was not found. We also treat this as NOMATCH. */
6136    
6137      case MATCH_SKIP_ARG:      case MATCH_SKIP_ARG:
6138      /* Fall through */      /* Fall through */
6139    
6140      /* NOMATCH and PRUNE advance by one character. THEN at this level acts      /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6141      exactly like PRUNE. */      exactly like PRUNE. */

Legend:
Removed from v.543  
changed lines
  Added in v.597

  ViewVC Help
Powered by ViewVC 1.1.5