/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 543 by ph10, Tue Jun 15 16:33:29 2010 UTC revision 595 by ph10, Mon May 2 10:33:29 2011 UTC
# Line 132  while (length-- > 0) Line 132  while (length-- > 0)
132  *          Match a back-reference                *  *          Match a back-reference                *
133  *************************************************/  *************************************************/
134    
135  /* If a back reference hasn't been set, the length that is passed is greater  /* Normally, if a back reference hasn't been set, the length that is passed is
136  than the number of characters left in the string, so the match fails.  negative, so the match always fails. However, in JavaScript compatibility mode,
137    the length passed is zero. Note that in caseless UTF-8 mode, the number of
138    subject bytes matched may be different to the number of reference bytes.
139    
140  Arguments:  Arguments:
141    offset      index into the offset vector    offset      index into the offset vector
142    eptr        points into the subject    eptr        pointer into the subject
143    length      length to be matched    length      length of reference to be matched (number of bytes)
144    md          points to match data block    md          points to match data block
145    ims         the ims flags    ims         the ims flags
146    
147  Returns:      TRUE if matched  Returns:      < 0 if not matched, otherwise the number of subject bytes matched
148  */  */
149    
150  static BOOL  static int
151  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
152    unsigned long int ims)    unsigned long int ims)
153  {  {
154  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR eptr_start = eptr;
155    register USPTR p = md->start_subject + md->offset_vector[offset];
156    
157  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
158  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 164  pchars(p, length, FALSE, md); Line 167  pchars(p, length, FALSE, md);
167  printf("\n");  printf("\n");
168  #endif  #endif
169    
170  /* Always fail if not enough characters left */  /* Always fail if reference not set (and not JavaScript compatible). */
171    
172  if (length > md->end_subject - eptr) return FALSE;  if (length < 0) return -1;
173    
174  /* Separate the caseless case for speed. In UTF-8 mode we can only do this  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175  properly if Unicode properties are supported. Otherwise, we can check only  properly if Unicode properties are supported. Otherwise, we can check only
# Line 178  if ((ims & PCRE_CASELESS) != 0) Line 181  if ((ims & PCRE_CASELESS) != 0)
181  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
182    if (md->utf8)    if (md->utf8)
183      {      {
184      USPTR endptr = eptr + length;      /* Match characters up to the end of the reference. NOTE: the number of
185      while (eptr < endptr)      bytes matched may differ, because there are some characters whose upper and
186        lower case versions code as different numbers of bytes. For example, U+023A
187        (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188        a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189        the latter. It is important, therefore, to check the length along the
190        reference, not along the subject (earlier code did this wrong). */
191    
192        USPTR endptr = p + length;
193        while (p < endptr)
194        {        {
195        int c, d;        int c, d;
196        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
197        GETCHARINC(d, p);        GETCHARINC(d, p);
198        if (c != d && c != UCD_OTHERCASE(d)) return FALSE;        if (c != d && c != UCD_OTHERCASE(d)) return -1;
199        }        }
200      }      }
201    else    else
# Line 195  if ((ims & PCRE_CASELESS) != 0) Line 206  if ((ims & PCRE_CASELESS) != 0)
206    is no UCP support. */    is no UCP support. */
207    
208    while (length-- > 0)    while (length-- > 0)
209      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }      { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
210    }    }
211    
212  /* In the caseful case, we can just compare the bytes, whether or not we  /* In the caseful case, we can just compare the bytes, whether or not we
213  are in UTF-8 mode. */  are in UTF-8 mode. */
214    
215  else  else
216    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return -1; }
217    
218  return TRUE;  return eptr - eptr_start;
219  }  }
220    
221    
# Line 294  argument of match(), which never changes Line 305  argument of match(), which never changes
305    
306  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
307    {\    {\
308    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
309    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
310    frame->Xwhere = rw; \    frame->Xwhere = rw; \
311    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
# Line 422  immediately. The second one is used when Line 433  immediately. The second one is used when
433  the subject. */  the subject. */
434    
435  #define CHECK_PARTIAL()\  #define CHECK_PARTIAL()\
436    if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\    if (md->partial != 0 && eptr >= md->end_subject && \
437      {\        eptr > md->start_used_ptr) \
438      md->hitend = TRUE;\      { \
439      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\      md->hitend = TRUE; \
440        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
441      }      }
442    
443  #define SCHECK_PARTIAL()\  #define SCHECK_PARTIAL()\
444    if (md->partial != 0 && eptr > mstart)\    if (md->partial != 0 && eptr > md->start_used_ptr) \
445      {\      { \
446      md->hitend = TRUE;\      md->hitend = TRUE; \
447      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
448      }      }
449    
450    
# Line 488  heap storage. Set up the top-level frame Line 500  heap storage. Set up the top-level frame
500  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  heap whenever RMATCH() does a "recursion". See the macro definitions above. */
501    
502  #ifdef NO_RECURSE  #ifdef NO_RECURSE
503  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
504  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
505  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
506    
# Line 710  for (;;) Line 722  for (;;)
722      case OP_FAIL:      case OP_FAIL:
723      MRRETURN(MATCH_NOMATCH);      MRRETURN(MATCH_NOMATCH);
724    
725        /* COMMIT overrides PRUNE, SKIP, and THEN */
726    
727      case OP_COMMIT:      case OP_COMMIT:
728      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
729        ims, eptrb, flags, RM52);        ims, eptrb, flags, RM52);
730      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
731            rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
732            rrc != MATCH_THEN)
733          RRETURN(rrc);
734      MRRETURN(MATCH_COMMIT);      MRRETURN(MATCH_COMMIT);
735    
736        /* PRUNE overrides THEN */
737    
738      case OP_PRUNE:      case OP_PRUNE:
739      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
740        ims, eptrb, flags, RM51);        ims, eptrb, flags, RM51);
741      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
742      MRRETURN(MATCH_PRUNE);      MRRETURN(MATCH_PRUNE);
743    
744      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
745      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
746        ims, eptrb, flags, RM56);        ims, eptrb, flags, RM56);
747      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
748      md->mark = ecode + 2;      md->mark = ecode + 2;
749      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
750    
751        /* SKIP overrides PRUNE and THEN */
752    
753      case OP_SKIP:      case OP_SKIP:
754      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
755        ims, eptrb, flags, RM53);        ims, eptrb, flags, RM53);
756      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
757          RRETURN(rrc);
758      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
759      MRRETURN(MATCH_SKIP);      MRRETURN(MATCH_SKIP);
760    
761      case OP_SKIP_ARG:      case OP_SKIP_ARG:
762      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
763        ims, eptrb, flags, RM57);        ims, eptrb, flags, RM57);
764      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
765          RRETURN(rrc);
766    
767      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
768      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
# Line 749  for (;;) Line 772  for (;;)
772      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
773      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
774    
775        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
776        the alt that is at the start of the current branch. This makes it possible
777        to skip back past alternatives that precede the THEN within the current
778        branch. */
779    
780      case OP_THEN:      case OP_THEN:
781      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
782        ims, eptrb, flags, RM54);        ims, eptrb, flags, RM54);
783      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
784        md->start_match_ptr = ecode - GET(ecode, 1);
785      MRRETURN(MATCH_THEN);      MRRETURN(MATCH_THEN);
786    
787      case OP_THEN_ARG:      case OP_THEN_ARG:
788      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
789        ims, eptrb, flags, RM58);        offset_top, md, ims, eptrb, flags, RM58);
790      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
791      md->mark = ecode + 2;      md->start_match_ptr = ecode - GET(ecode, 1);
792        md->mark = ecode + LINK_SIZE + 2;
793      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
794    
795      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
# Line 804  for (;;) Line 834  for (;;)
834          {          {
835          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
836            ims, eptrb, flags, RM1);            ims, eptrb, flags, RM1);
837          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
838                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
839              RRETURN(rrc);
840          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
841          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
842          }          }
# Line 865  for (;;) Line 897  for (;;)
897    
898        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
899          eptrb, flags, RM2);          eptrb, flags, RM2);
900        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
901              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
902            RRETURN(rrc);
903        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
904        }        }
905      /* Control never reaches here. */      /* Control never reaches here. */
# Line 1066  for (;;) Line 1100  for (;;)
1100          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1101          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1102          }          }
1103        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)        else if (rrc != MATCH_NOMATCH &&
1104                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1105          {          {
1106          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1107          }          }
# Line 1194  for (;;) Line 1229  for (;;)
1229          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1230          break;          break;
1231          }          }
1232        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1233              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1234            RRETURN(rrc);
1235        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1236        }        }
1237      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1228  for (;;) Line 1265  for (;;)
1265          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1266          break;          break;
1267          }          }
1268        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1269              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1270            RRETURN(rrc);
1271        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1272        }        }
1273      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1365  for (;;) Line 1404  for (;;)
1404              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1405            MRRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1406            }            }
1407          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH &&
1408                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1409            {            {
1410            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1411            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1408  for (;;) Line 1448  for (;;)
1448          mstart = md->start_match_ptr;          mstart = md->start_match_ptr;
1449          break;          break;
1450          }          }
1451        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1452              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1453            RRETURN(rrc);
1454        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1455        }        }
1456      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1674  for (;;) Line 1716  for (;;)
1716        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1717          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1718        else        else
1719          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }          {
1720            if (md->noteol) MRRETURN(MATCH_NOMATCH);
1721            SCHECK_PARTIAL();
1722            }
1723        ecode++;        ecode++;
1724        break;        break;
1725        }        }
1726      else      else  /* Not multiline */
1727        {        {
1728        if (md->noteol) MRRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1729        if (!md->endonly)        if (!md->endonly) goto ASSERT_NL_OR_EOS;
         {  
         if (eptr != md->end_subject &&  
             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))  
           MRRETURN(MATCH_NOMATCH);  
         ecode++;  
         break;  
         }  
1730        }        }
1731    
1732      /* ... else fall through for endonly */      /* ... else fall through for endonly */
1733    
1734      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1735    
1736      case OP_EOD:      case OP_EOD:
1737      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1738        SCHECK_PARTIAL();
1739      ecode++;      ecode++;
1740      break;      break;
1741    
1742      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1743    
1744      case OP_EODN:      case OP_EODN:
1745      if (eptr != md->end_subject &&      ASSERT_NL_OR_EOS:
1746        if (eptr < md->end_subject &&
1747          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1748        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1749    
1750        /* Either at end of string or \n before end. */
1751    
1752        SCHECK_PARTIAL();
1753      ecode++;      ecode++;
1754      break;      break;
1755    
# Line 2218  for (;;) Line 2263  for (;;)
2263      loops). */      loops). */
2264    
2265      case OP_REF:      case OP_REF:
2266        {      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2267        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      ecode += 3;
       ecode += 3;  
2268    
2269        /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2270    
2271        (a) In the default, Perl-compatible state, set the length to be longer      (a) In the default, Perl-compatible state, set the length negative;
2272        than the amount of subject left; this ensures that every attempt at a      this ensures that every attempt at a match fails. We can't just fail
2273        match fails. We can't just fail here, because of the possibility of      here, because of the possibility of quantifiers with zero minima.
       quantifiers with zero minima.  
2274    
2275        (b) If the JavaScript compatibility flag is set, set the length to zero      (b) If the JavaScript compatibility flag is set, set the length to zero
2276        so that the back reference matches an empty string.      so that the back reference matches an empty string.
2277    
2278        Otherwise, set the length to the length of what was matched by the      Otherwise, set the length to the length of what was matched by the
2279        referenced subpattern. */      referenced subpattern. */
2280    
2281        if (offset >= offset_top || md->offset_vector[offset] < 0)      if (offset >= offset_top || md->offset_vector[offset] < 0)
2282          length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);        length = (md->jscript_compat)? 0 : -1;
2283        else      else
2284          length = md->offset_vector[offset+1] - md->offset_vector[offset];        length = md->offset_vector[offset+1] - md->offset_vector[offset];
2285    
2286        /* Set up for repetition, or handle the non-repeated case */      /* Set up for repetition, or handle the non-repeated case */
2287    
2288        switch (*ecode)      switch (*ecode)
2289          {        {
2290          case OP_CRSTAR:        case OP_CRSTAR:
2291          case OP_CRMINSTAR:        case OP_CRMINSTAR:
2292          case OP_CRPLUS:        case OP_CRPLUS:
2293          case OP_CRMINPLUS:        case OP_CRMINPLUS:
2294          case OP_CRQUERY:        case OP_CRQUERY:
2295          case OP_CRMINQUERY:        case OP_CRMINQUERY:
2296          c = *ecode++ - OP_CRSTAR;        c = *ecode++ - OP_CRSTAR;
2297          minimize = (c & 1) != 0;        minimize = (c & 1) != 0;
2298          min = rep_min[c];                 /* Pick up values from tables; */        min = rep_min[c];                 /* Pick up values from tables; */
2299          max = rep_max[c];                 /* zero for max => infinity */        max = rep_max[c];                 /* zero for max => infinity */
2300          if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2301          break;        break;
2302    
2303          case OP_CRRANGE:        case OP_CRRANGE:
2304          case OP_CRMINRANGE:        case OP_CRMINRANGE:
2305          minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2306          min = GET2(ecode, 1);        min = GET2(ecode, 1);
2307          max = GET2(ecode, 3);        max = GET2(ecode, 3);
2308          if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2309          ecode += 5;        ecode += 5;
2310          break;        break;
2311    
2312          default:               /* No repeat follows */        default:               /* No repeat follows */
2313          if (!match_ref(offset, eptr, length, md, ims))        if ((length = match_ref(offset, eptr, length, md, ims)) < 0)
2314            {          {
2315            CHECK_PARTIAL();          CHECK_PARTIAL();
2316            MRRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
           }  
         eptr += length;  
         continue;              /* With the main loop */  
2317          }          }
2318          eptr += length;
2319          continue;              /* With the main loop */
2320          }
2321    
2322        /* If the length of the reference is zero, just continue with the      /* Handle repeated back references. If the length of the reference is
2323        main loop. */      zero, just continue with the main loop. */
2324    
2325        if (length == 0) continue;      if (length == 0) continue;
2326    
2327        /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2328        the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
2329        address of eptr, so that eptr can be a register variable. */      address of eptr, so that eptr can be a register variable. */
2330    
2331        for (i = 1; i <= min; i++)      for (i = 1; i <= min; i++)
2332          {
2333          int slength;
2334          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2335          {          {
2336          if (!match_ref(offset, eptr, length, md, ims))          CHECK_PARTIAL();
2337            {          MRRETURN(MATCH_NOMATCH);
           CHECK_PARTIAL();  
           MRRETURN(MATCH_NOMATCH);  
           }  
         eptr += length;  
2338          }          }
2339          eptr += slength;
2340          }
2341    
2342        /* If min = max, continue at the same level without recursion.      /* If min = max, continue at the same level without recursion.
2343        They are not both allowed to be zero. */      They are not both allowed to be zero. */
2344    
2345        if (min == max) continue;      if (min == max) continue;
2346    
2347        /* If minimizing, keep trying and advancing the pointer */      /* If minimizing, keep trying and advancing the pointer */
2348    
2349        if (minimize)      if (minimize)
2350          {
2351          for (fi = min;; fi++)
2352          {          {
2353          for (fi = min;; fi++)          int slength;
2354            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2355            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2356            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2357            if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2358            {            {
2359            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);            CHECK_PARTIAL();
2360            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            MRRETURN(MATCH_NOMATCH);
           if (fi >= max) MRRETURN(MATCH_NOMATCH);  
           if (!match_ref(offset, eptr, length, md, ims))  
             {  
             CHECK_PARTIAL();  
             MRRETURN(MATCH_NOMATCH);  
             }  
           eptr += length;  
2361            }            }
2362          /* Control never gets here */          eptr += slength;
2363          }          }
2364          /* Control never gets here */
2365          }
2366    
2367        /* If maximizing, find the longest string and work backwards */      /* If maximizing, find the longest string and work backwards */
2368    
2369        else      else
2370          {
2371          pp = eptr;
2372          for (i = min; i < max; i++)
2373          {          {
2374          pp = eptr;          int slength;
2375          for (i = min; i < max; i++)          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
           {  
           if (!match_ref(offset, eptr, length, md, ims))  
             {  
             CHECK_PARTIAL();  
             break;  
             }  
           eptr += length;  
           }  
         while (eptr >= pp)  
2376            {            {
2377            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);            CHECK_PARTIAL();
2378            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            break;
           eptr -= length;  
2379            }            }
2380          MRRETURN(MATCH_NOMATCH);          eptr += slength;
2381          }          }
2382          while (eptr >= pp)
2383            {
2384            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2385            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2386            eptr -= length;
2387            }
2388          MRRETURN(MATCH_NOMATCH);
2389        }        }
2390      /* Control never gets here */      /* Control never gets here */
2391    
# Line 5600  if ((options & ~PUBLIC_EXEC_OPTIONS) != Line 5645  if ((options & ~PUBLIC_EXEC_OPTIONS) !=
5645  if (re == NULL || subject == NULL ||  if (re == NULL || subject == NULL ||
5646     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5647  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5648    if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5649    
5650  /* This information is for finding all the numbers associated with a given  /* This information is for finding all the numbers associated with a given
5651  name, for condition testing. */  name, for condition testing. */
# Line 5766  back the character offset. */ Line 5812  back the character offset. */
5812  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5813  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5814    {    {
5815    if (_pcre_valid_utf8((USPTR)subject, length) >= 0)    int tb;
5816      return PCRE_ERROR_BADUTF8;    if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0)
5817        return (tb == length && md->partial > 1)?
5818          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5819    if (start_offset > 0 && start_offset < length)    if (start_offset > 0 && start_offset < length)
5820      {      {
5821      int tb = ((USPTR)subject)[start_offset];      tb = ((USPTR)subject)[start_offset] & 0xc0;
5822      if (tb > 127)      if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
       {  
       tb &= 0xc0;  
       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;  
       }  
5823      }      }
5824    }    }
5825  #endif  #endif
# Line 5903  for(;;) Line 5947  for(;;)
5947    /* There are some optimizations that avoid running the match if a known    /* There are some optimizations that avoid running the match if a known
5948    starting point is not found, or if a known later character is not present.    starting point is not found, or if a known later character is not present.
5949    However, there is an option that disables these, for testing and for ensuring    However, there is an option that disables these, for testing and for ensuring
5950    that all callouts do actually occur. */    that all callouts do actually occur. The option can be set in the regex by
5951      (*NO_START_OPT) or passed in match-time options. */
5952    
5953    if ((options & PCRE_NO_START_OPTIMIZE) == 0)    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
5954      {      {
5955      /* Advance to a unique first byte if there is one. */      /* Advance to a unique first byte if there is one. */
5956    
# Line 5959  for(;;) Line 6004  for(;;)
6004        while (start_match < end_subject)        while (start_match < end_subject)
6005          {          {
6006          register unsigned int c = *start_match;          register unsigned int c = *start_match;
6007          if ((start_bits[c/8] & (1 << (c&7))) == 0)          if ((start_bits[c/8] & (1 << (c&7))) == 0)
6008            {            {
6009            start_match++;            start_match++;
6010  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
6011            if (utf8)            if (utf8)
6012              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
6013                start_match++;                start_match++;
6014  #endif  #endif
6015            }            }
6016          else break;          else break;
6017          }          }
# Line 6069  for(;;) Line 6114  for(;;)
6114    
6115    switch(rc)    switch(rc)
6116      {      {
6117      /* SKIP passes back the next starting point explicitly, but if it is the      /* SKIP passes back the next starting point explicitly, but if it is the
6118      same as the match we have just done, treat it as NOMATCH. */      same as the match we have just done, treat it as NOMATCH. */
6119    
6120      case MATCH_SKIP:      case MATCH_SKIP:
6121      if (md->start_match_ptr != start_match)      if (md->start_match_ptr != start_match)
6122        {        {
6123        new_start_match = md->start_match_ptr;        new_start_match = md->start_match_ptr;
6124        break;        break;
6125        }        }
6126      /* Fall through */      /* Fall through */
6127    
6128      /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched      /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6129      the SKIP's arg was not found. We also treat this as NOMATCH. */      the SKIP's arg was not found. We also treat this as NOMATCH. */
6130    
6131      case MATCH_SKIP_ARG:      case MATCH_SKIP_ARG:
6132      /* Fall through */      /* Fall through */
6133    
6134      /* NOMATCH and PRUNE advance by one character. THEN at this level acts      /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6135      exactly like PRUNE. */      exactly like PRUNE. */

Legend:
Removed from v.543  
changed lines
  Added in v.595

  ViewVC Help
Powered by ViewVC 1.1.5