/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 359 by ph10, Wed Jul 9 16:20:19 2008 UTC revision 384 by ph10, Sun Mar 8 16:27:43 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 787  for (;;) Line 787  for (;;)
787    
788      case OP_COND:      case OP_COND:
789      case OP_SCOND:      case OP_SCOND:
790        /* Because of the way auto-callout works during compile, a callout item is
791        inserted between OP_COND and an assertion condition. */
792    
793        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
794          {
795          if (pcre_callout != NULL)
796            {
797            pcre_callout_block cb;
798            cb.version          = 1;   /* Version 1 of the callout block */
799            cb.callout_number   = ecode[LINK_SIZE+2];
800            cb.offset_vector    = md->offset_vector;
801            cb.subject          = (PCRE_SPTR)md->start_subject;
802            cb.subject_length   = md->end_subject - md->start_subject;
803            cb.start_match      = mstart - md->start_subject;
804            cb.current_position = eptr - md->start_subject;
805            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
806            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
807            cb.capture_top      = offset_top/2;
808            cb.capture_last     = md->capture_last;
809            cb.callout_data     = md->callout_data;
810            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
811            if (rrc < 0) RRETURN(rrc);
812            }
813          ecode += _pcre_OP_lengths[OP_CALLOUT];
814          }
815    
816        /* Now see what the actual condition is */
817    
818      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
819        {        {
820        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
# Line 1679  for (;;) Line 1707  for (;;)
1707      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1708      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1709        {        {
1710        const ucd_record * prop = GET_UCD(c);        const ucd_record *prop = GET_UCD(c);
1711    
1712        switch(ecode[1])        switch(ecode[1])
1713          {          {
# Line 2047  for (;;) Line 2075  for (;;)
2075    
2076    
2077      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2078      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2079        mode, because Unicode properties are supported in non-UTF-8 mode. */
2080    
2081  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2082      case OP_XCLASS:      case OP_XCLASS:
# Line 2089  for (;;) Line 2118  for (;;)
2118        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2119          {          {
2120          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2121          GETCHARINC(c, eptr);          GETCHARINCTEST(c, eptr);
2122          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2123          }          }
2124    
# Line 2108  for (;;) Line 2137  for (;;)
2137            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2138            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2139            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2140            GETCHARINC(c, eptr);            GETCHARINCTEST(c, eptr);
2141            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2142            }            }
2143          /* Control never gets here */          /* Control never gets here */
# Line 2123  for (;;) Line 2152  for (;;)
2152            {            {
2153            int len = 1;            int len = 1;
2154            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2155            GETCHARLEN(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
2156            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2157            eptr += len;            eptr += len;
2158            }            }
# Line 2609  for (;;) Line 2638  for (;;)
2638              {              {
2639              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2640              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2641                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2642              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2643              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2644              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
2645                RRETURN(MATCH_NOMATCH);  
2646              }              }
2647            }            }
2648          else          else
# Line 2718  for (;;) Line 2748  for (;;)
2748              {              {
2749              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2750              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2751                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2752              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2753              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2754              }              }
2755            }            }
2756          else          else
# Line 4696  for(;;) Line 4726  for(;;)
4726    if (firstline)    if (firstline)
4727      {      {
4728      USPTR t = start_match;      USPTR t = start_match;
4729    #ifdef SUPPORT_UTF8
4730        if (utf8)
4731          {
4732          while (t < md->end_subject && !IS_NEWLINE(t))
4733            {
4734            t++;
4735            while (t < end_subject && (*t & 0xc0) == 0x80) t++;
4736            }
4737          }
4738        else
4739    #endif
4740      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4741      end_subject = t;      end_subject = t;
4742      }      }
4743    
4744    /* Now test for a unique first byte */    /* Now advance to a unique first byte if there is one. */
4745    
4746    if (first_byte >= 0)    if (first_byte >= 0)
4747      {      {
4748      if (first_byte_caseless)      if (first_byte_caseless)
4749        while (start_match < end_subject &&        while (start_match < end_subject && md->lcc[*start_match] != first_byte)
4750               md->lcc[*start_match] != first_byte)          start_match++;
         { NEXTCHAR(start_match); }  
4751      else      else
4752        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4753          { NEXTCHAR(start_match); }          start_match++;
4754      }      }
4755    
4756    /* Or to just after a linebreak for a multiline match if possible */    /* Or to just after a linebreak for a multiline match */
4757    
4758    else if (startline)    else if (startline)
4759      {      {
4760      if (start_match > md->start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4761        {        {
4762        while (start_match <= end_subject && !WAS_NEWLINE(start_match))  #ifdef SUPPORT_UTF8
4763          { NEXTCHAR(start_match); }        if (utf8)
4764            {
4765            while (start_match < end_subject && !WAS_NEWLINE(start_match))
4766              {
4767              start_match++;
4768              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4769                start_match++;
4770              }
4771            }
4772          else
4773    #endif
4774          while (start_match < end_subject && !WAS_NEWLINE(start_match))
4775            start_match++;
4776    
4777        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4778        and we are now at a LF, advance the match position by one more character.        and we are now at a LF, advance the match position by one more character.
# Line 4734  for(;;) Line 4786  for(;;)
4786        }        }
4787      }      }
4788    
4789    /* Or to a non-unique first char after study */    /* Or to a non-unique first byte after study */
4790    
4791    else if (start_bits != NULL)    else if (start_bits != NULL)
4792      {      {
4793      while (start_match < end_subject)      while (start_match < end_subject)
4794        {        {
4795        register unsigned int c = *start_match;        register unsigned int c = *start_match;
4796        if ((start_bits[c/8] & (1 << (c&7))) == 0)        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
4797          { NEXTCHAR(start_match); }          else break;
       else break;  
4798        }        }
4799      }      }
4800    

Legend:
Removed from v.359  
changed lines
  Added in v.384

  ViewVC Help
Powered by ViewVC 1.1.5