/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 298 by ph10, Thu Jan 10 17:09:12 2008 UTC revision 341 by ph10, Sat Apr 19 16:41:04 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 1148  for (;;) Line 1148  for (;;)
1148      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1149      break;      break;
1150    
1151      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1152      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1153      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1154      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1155      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1156    
1157      case OP_BRAZERO:      case OP_BRAZERO:
1158        {        {
# Line 1174  for (;;) Line 1174  for (;;)
1174        }        }
1175      break;      break;
1176    
1177        case OP_SKIPZERO:
1178          {
1179          next = ecode+1;
1180          do next += GET(next,1); while (*next == OP_ALT);
1181          ecode = next + 1 + LINK_SIZE;
1182          }
1183        break;
1184    
1185      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1186    
1187      case OP_KET:      case OP_KET:
# Line 1425  for (;;) Line 1433  for (;;)
1433        {        {
1434        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1435        }        }
1436        /* Fall through */
1437    
1438        case OP_ALLANY:
1439      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1440      if (utf8)      if (utf8)
1441        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
# Line 1723  for (;;) Line 1734  for (;;)
1734      case OP_REF:      case OP_REF:
1735        {        {
1736        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1737        ecode += 3;                                 /* Advance past item */        ecode += 3;
1738    
1739        /* If the reference is unset, set the length to be longer than the amount        /* If the reference is unset, there are two possibilities:
1740        of subject left; this ensures that every attempt at a match fails. We  
1741        can't just fail here, because of the possibility of quantifiers with zero        (a) In the default, Perl-compatible state, set the length to be longer
1742        minima. */        than the amount of subject left; this ensures that every attempt at a
1743          match fails. We can't just fail here, because of the possibility of
1744        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        quantifiers with zero minima.
1745          md->end_subject - eptr + 1 :  
1746          md->offset_vector[offset+1] - md->offset_vector[offset];        (b) If the JavaScript compatibility flag is set, set the length to zero
1747          so that the back reference matches an empty string.
1748    
1749          Otherwise, set the length to the length of what was matched by the
1750          referenced subpattern. */
1751    
1752          if (offset >= offset_top || md->offset_vector[offset] < 0)
1753            length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1754          else
1755            length = md->offset_vector[offset+1] - md->offset_vector[offset];
1756    
1757        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1758    
# Line 2943  for (;;) Line 2963  for (;;)
2963            }            }
2964          break;          break;
2965    
2966            case OP_ALLANY:
2967            for (i = 1; i <= min; i++)
2968              {
2969              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2970              eptr++;
2971              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2972              }
2973            break;
2974    
2975          case OP_ANYBYTE:          case OP_ANYBYTE:
2976          eptr += min;          eptr += min;
2977          break;          break;
# Line 3162  for (;;) Line 3191  for (;;)
3191          else eptr += min;          else eptr += min;
3192          break;          break;
3193    
3194            case OP_ALLANY:
3195            eptr += min;
3196            break;
3197    
3198          case OP_ANYBYTE:          case OP_ANYBYTE:
3199          eptr += min;          eptr += min;
3200          break;          break;
# Line 3424  for (;;) Line 3457  for (;;)
3457            switch(ctype)            switch(ctype)
3458              {              {
3459              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the DOTALL case */
3460              break;              case OP_ALLANY:
   
3461              case OP_ANYBYTE:              case OP_ANYBYTE:
3462              break;              break;
3463    
# Line 3583  for (;;) Line 3615  for (;;)
3615            c = *eptr++;            c = *eptr++;
3616            switch(ctype)            switch(ctype)
3617              {              {
3618              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the DOTALL case */
3619              break;              case OP_ALLANY:
   
3620              case OP_ANYBYTE:              case OP_ANYBYTE:
3621              break;              break;
3622    
# Line 3879  for (;;) Line 3910  for (;;)
3910              }              }
3911            break;            break;
3912    
3913              case OP_ALLANY:
3914              if (max < INT_MAX)
3915                {
3916                for (i = min; i < max; i++)
3917                  {
3918                  if (eptr >= md->end_subject) break;
3919                  eptr++;
3920                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3921                  }
3922                }
3923              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
3924              break;
3925    
3926            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
3927    
3928            case OP_ANYBYTE:            case OP_ANYBYTE:
# Line 4073  for (;;) Line 4117  for (;;)
4117                }                }
4118              break;              break;
4119              }              }
4120            /* For DOTALL case, fall through and treat as \C */            /* For DOTALL case, fall through */
4121    
4122              case OP_ALLANY:
4123            case OP_ANYBYTE:            case OP_ANYBYTE:
4124            c = max - min;            c = max - min;
4125            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
# Line 4450  end_subject = md->end_subject; Line 4495  end_subject = md->end_subject;
4495    
4496  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4497  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4498    md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4499    
4500  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4501  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;

Legend:
Removed from v.298  
changed lines
  Added in v.341

  ViewVC Help
Powered by ViewVC 1.1.5