/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 351 by ph10, Fri Jul 4 18:27:16 2008 UTC revision 381 by ph10, Tue Mar 3 16:08:23 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 158  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 761  for (;;) Line 787  for (;;)
787    
788      case OP_COND:      case OP_COND:
789      case OP_SCOND:      case OP_SCOND:
790        /* Because of the way auto-callout works during compile, a callout item is
791        inserted between OP_COND and an assertion condition. */
792    
793        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
794          {
795          if (pcre_callout != NULL)
796            {
797            pcre_callout_block cb;
798            cb.version          = 1;   /* Version 1 of the callout block */
799            cb.callout_number   = ecode[LINK_SIZE+2];
800            cb.offset_vector    = md->offset_vector;
801            cb.subject          = (PCRE_SPTR)md->start_subject;
802            cb.subject_length   = md->end_subject - md->start_subject;
803            cb.start_match      = mstart - md->start_subject;
804            cb.current_position = eptr - md->start_subject;
805            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
806            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
807            cb.capture_top      = offset_top/2;
808            cb.capture_last     = md->capture_last;
809            cb.callout_data     = md->callout_data;
810            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
811            if (rrc < 0) RRETURN(rrc);
812            }
813          ecode += _pcre_OP_lengths[OP_CALLOUT];
814          }
815    
816        /* Now see what the actual condition is */
817    
818      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
819        {        {
820        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
# Line 2583  for (;;) Line 2637  for (;;)
2637              {              {
2638              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2639              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2640                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2641              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2642              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2643              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
2644                RRETURN(MATCH_NOMATCH);  
2645              }              }
2646            }            }
2647          else          else
# Line 2692  for (;;) Line 2747  for (;;)
2747              {              {
2748              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2749              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2750                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2751              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2752              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2753              }              }
2754            }            }
2755          else          else
# Line 4358  Returns:          > 0 => success; value Line 4413  Returns:          > 0 => success; value
4413                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4414  */  */
4415    
4416  PCRE_EXP_DEFN int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
4417  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4418    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4419    int offsetcount)    int offsetcount)
# Line 4670  for(;;) Line 4725  for(;;)
4725    if (firstline)    if (firstline)
4726      {      {
4727      USPTR t = start_match;      USPTR t = start_match;
4728    #ifdef SUPPORT_UTF8
4729        if (utf8)
4730          {
4731          while (t < md->end_subject && !IS_NEWLINE(t))
4732            {
4733            t++;
4734            while (t < end_subject && (*t & 0xc0) == 0x80) t++;
4735            }
4736          }
4737        else
4738    #endif
4739      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4740      end_subject = t;      end_subject = t;
4741      }      }
4742    
4743    /* Now test for a unique first byte */    /* Now advance to a unique first byte if there is one. */
4744    
4745    if (first_byte >= 0)    if (first_byte >= 0)
4746      {      {
4747      if (first_byte_caseless)      if (first_byte_caseless)
4748        while (start_match < end_subject &&        while (start_match < end_subject && md->lcc[*start_match] != first_byte)
4749               md->lcc[*start_match] != first_byte)          start_match++;
         { NEXTCHAR(start_match); }  
4750      else      else
4751        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4752          { NEXTCHAR(start_match); }          start_match++;
4753      }      }
4754    
4755    /* Or to just after a linebreak for a multiline match if possible */    /* Or to just after a linebreak for a multiline match */
4756    
4757    else if (startline)    else if (startline)
4758      {      {
4759      if (start_match > md->start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4760        {        {
4761        while (start_match <= end_subject && !WAS_NEWLINE(start_match))  #ifdef SUPPORT_UTF8
4762          { NEXTCHAR(start_match); }        if (utf8)
4763            {
4764            while (start_match < end_subject && !WAS_NEWLINE(start_match))
4765              {
4766              start_match++;
4767              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4768                start_match++;
4769              }
4770            }
4771          else
4772    #endif
4773          while (start_match < end_subject && !WAS_NEWLINE(start_match))
4774            start_match++;
4775    
4776        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4777        and we are now at a LF, advance the match position by one more character.        and we are now at a LF, advance the match position by one more character.
# Line 4708  for(;;) Line 4785  for(;;)
4785        }        }
4786      }      }
4787    
4788    /* Or to a non-unique first char after study */    /* Or to a non-unique first byte after study */
4789    
4790    else if (start_bits != NULL)    else if (start_bits != NULL)
4791      {      {
4792      while (start_match < end_subject)      while (start_match < end_subject)
4793        {        {
4794        register unsigned int c = *start_match;        register unsigned int c = *start_match;
4795        if ((start_bits[c/8] & (1 << (c&7))) == 0)        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
4796          { NEXTCHAR(start_match); }          else break;
       else break;  
4797        }        }
4798      }      }
4799    

Legend:
Removed from v.351  
changed lines
  Added in v.381

  ViewVC Help
Powered by ViewVC 1.1.5