/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 595 by ph10, Mon May 2 10:33:29 2011 UTC revision 600 by ph10, Mon May 9 08:54:11 2011 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2010 University of Cambridge             Copyright (c) 1997-2011 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 193  if ((ims & PCRE_CASELESS) != 0) Line 193  if ((ims & PCRE_CASELESS) != 0)
193      while (p < endptr)      while (p < endptr)
194        {        {
195        int c, d;        int c, d;
196          if (eptr >= md->end_subject) return -1;
197        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
198        GETCHARINC(d, p);        GETCHARINC(d, p);
199        if (c != d && c != UCD_OTHERCASE(d)) return -1;        if (c != d && c != UCD_OTHERCASE(d)) return -1;
# Line 204  if ((ims & PCRE_CASELESS) != 0) Line 205  if ((ims & PCRE_CASELESS) != 0)
205    
206    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
207    is no UCP support. */    is no UCP support. */
208        {
209    while (length-- > 0)      if (eptr + length > md->end_subject) return -1;
210      { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }      while (length-- > 0)
211          { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
212        }
213    }    }
214    
215  /* In the caseful case, we can just compare the bytes, whether or not we  /* In the caseful case, we can just compare the bytes, whether or not we
216  are in UTF-8 mode. */  are in UTF-8 mode. */
217    
218  else  else
219    { while (length-- > 0) if (*p++ != *eptr++) return -1; }    {
220      if (eptr + length > md->end_subject) return -1;
221      while (length-- > 0) if (*p++ != *eptr++) return -1;
222      }
223    
224  return eptr - eptr_start;  return eptr - eptr_start;
225  }  }
# Line 2011  for (;;) Line 2017  for (;;)
2017      switch(c)      switch(c)
2018        {        {
2019        default: MRRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
2020    
2021        case 0x000d:        case 0x000d:
2022        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2023        break;        break;
# Line 3785  for (;;) Line 3792  for (;;)
3792            switch(c)            switch(c)
3793              {              {
3794              default: MRRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3795    
3796              case 0x000d:              case 0x000d:
3797              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3798              break;              break;
# Line 4061  for (;;) Line 4069  for (;;)
4069            switch(*eptr++)            switch(*eptr++)
4070              {              {
4071              default: MRRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4072    
4073              case 0x000d:              case 0x000d:
4074              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4075              break;              break;
4076    
4077              case 0x000a:              case 0x000a:
4078              break;              break;
4079    
# Line 5252  for (;;) Line 5262  for (;;)
5262            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5263            }            }
5264    
5265          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run. If possessive, we are
5266            done (no backing up). Otherwise, match at this position; anything other
5267            than no match is immediately returned. For nomatch, back up one
5268            character, unless we are matching \R and the last thing matched was
5269            \r\n, in which case, back up two bytes. */
5270    
5271          if (possessive) continue;          if (possessive) continue;
5272          for(;;)          for(;;)
# Line 5261  for (;;) Line 5275  for (;;)
5275            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5276            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5277            BACKCHAR(eptr);            BACKCHAR(eptr);
5278              if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
5279                  eptr[-1] == '\r') eptr--;
5280            }            }
5281          }          }
5282        else        else
# Line 5459  for (;;) Line 5475  for (;;)
5475            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5476            }            }
5477    
5478          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run. If possessive, we are
5479            done (no backing up). Otherwise, match at this position; anything other
5480            than no match is immediately returned. For nomatch, back up one
5481            character (byte), unless we are matching \R and the last thing matched
5482            was \r\n, in which case, back up two bytes. */
5483    
5484          if (possessive) continue;          if (possessive) continue;
5485          while (eptr >= pp)          while (eptr >= pp)
5486            {            {
5487            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
           eptr--;  
5488            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5489              eptr--;
5490              if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
5491                  eptr[-1] == '\r') eptr--;
5492            }            }
5493          }          }
5494    
# Line 5806  defined (though never set). So there's n Line 5828  defined (though never set). So there's n
5828  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5829    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
5830    
5831  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Pass back the character offset and error
5832  back the character offset. */  code if a results vector is available. */
5833    
5834  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5835  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5836    {    {
5837    int tb;    int errorcode;
5838    if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0)    int tb = _pcre_valid_utf8((USPTR)subject, length, &errorcode);
5839      return (tb == length && md->partial > 1)?    if (tb >= 0)
5840        {
5841        if (offsetcount >= 2)
5842          {
5843          offsets[0] = tb;
5844          offsets[1] = errorcode;
5845          }
5846        return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
5847        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5848        }
5849    if (start_offset > 0 && start_offset < length)    if (start_offset > 0 && start_offset < length)
5850      {      {
5851      tb = ((USPTR)subject)[start_offset] & 0xc0;      tb = ((USPTR)subject)[start_offset] & 0xc0;

Legend:
Removed from v.595  
changed lines
  Added in v.600

  ViewVC Help
Powered by ViewVC 1.1.5