/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 391 by ph10, Tue Mar 17 21:16:01 2009 UTC revision 409 by ph10, Sat Mar 28 17:10:56 2009 UTC
# Line 322  typedef struct heapframe { Line 322  typedef struct heapframe {
322    
323    /* Function arguments that may change */    /* Function arguments that may change */
324    
325    const uschar *Xeptr;    USPTR Xeptr;
326    const uschar *Xecode;    const uschar *Xecode;
327    const uschar *Xmstart;    USPTR Xmstart;
328    int Xoffset_top;    int Xoffset_top;
329    long int Xims;    long int Xims;
330    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 333  typedef struct heapframe { Line 333  typedef struct heapframe {
333    
334    /* Function local variables */    /* Function local variables */
335    
336    const uschar *Xcallpat;    USPTR Xcallpat;
337    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
338    const uschar *Xdata;    USPTR Xcharptr;
339    const uschar *Xnext;  #endif
340    const uschar *Xpp;    USPTR Xdata;
341    const uschar *Xprev;    USPTR Xnext;
342    const uschar *Xsaved_eptr;    USPTR Xpp;
343      USPTR Xprev;
344      USPTR Xsaved_eptr;
345    
346    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
347    
# Line 360  typedef struct heapframe { Line 362  typedef struct heapframe {
362    uschar Xocchars[8];    uschar Xocchars[8];
363  #endif  #endif
364    
365      int Xcodelink;
366    int Xctype;    int Xctype;
367    unsigned int Xfc;    unsigned int Xfc;
368    int Xfi;    int Xfi;
# Line 425  Returns:       MATCH_MATCH if matched Line 428  Returns:       MATCH_MATCH if matched
428  */  */
429    
430  static int  static int
431  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
432    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
433    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
434  {  {
# Line 439  register unsigned int c;   /* Character Line 442  register unsigned int c;   /* Character
442  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
443    
444  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
445    int condcode;
446    
447  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
448  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 481  HEAP_RECURSE: Line 485  HEAP_RECURSE:
485  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
486  #endif  #endif
487  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
488    #define codelink           frame->Xcodelink
489  #define data               frame->Xdata  #define data               frame->Xdata
490  #define next               frame->Xnext  #define next               frame->Xnext
491  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 561  int oclength; Line 566  int oclength;
566  uschar occhars[8];  uschar occhars[8];
567  #endif  #endif
568    
569    int codelink;
570  int ctype;  int ctype;
571  int length;  int length;
572  int max;  int max;
# Line 787  for (;;) Line 793  for (;;)
793    
794      case OP_COND:      case OP_COND:
795      case OP_SCOND:      case OP_SCOND:
796        codelink= GET(ecode, 1);
797    
798      /* Because of the way auto-callout works during compile, a callout item is      /* Because of the way auto-callout works during compile, a callout item is
799      inserted between OP_COND and an assertion condition. */      inserted between OP_COND and an assertion condition. */
800    
801      if (ecode[LINK_SIZE+1] == OP_CALLOUT)      if (ecode[LINK_SIZE+1] == OP_CALLOUT)
802        {        {
803        if (pcre_callout != NULL)        if (pcre_callout != NULL)
# Line 812  for (;;) Line 820  for (;;)
820          }          }
821        ecode += _pcre_OP_lengths[OP_CALLOUT];        ecode += _pcre_OP_lengths[OP_CALLOUT];
822        }        }
823    
824        condcode = ecode[LINK_SIZE+1];
825    
826      /* Now see what the actual condition is */      /* Now see what the actual condition is */
827    
828      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      if (condcode == OP_RREF)         /* Recursion test */
829        {        {
830        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
831        condition = md->recursive != NULL &&        condition = md->recursive != NULL &&
# Line 823  for (;;) Line 833  for (;;)
833        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
834        }        }
835    
836      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF)    /* Group used test */
837        {        {
838        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
839        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
840        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
841        }        }
842    
843      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
844        {        {
845        condition = FALSE;        condition = FALSE;
846        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 857  for (;;) Line 867  for (;;)
867        else        else
868          {          {
869          condition = FALSE;          condition = FALSE;
870          ecode += GET(ecode, 1);          ecode += codelink;
871          }          }
872        }        }
873    
# Line 880  for (;;) Line 890  for (;;)
890          goto TAIL_RECURSE;          goto TAIL_RECURSE;
891          }          }
892        }        }
893      else                         /* Condition false & no 2nd alternative */      else                         /* Condition false & no alternative */
894        {        {
895        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
896        }        }
# Line 1103  for (;;) Line 1113  for (;;)
1113          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1114            {            {
1115            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1116              if (new_recursive.offset_save != stacksave)
1117                (pcre_free)(new_recursive.offset_save);
1118            RRETURN(rrc);            RRETURN(rrc);
1119            }            }
1120    
# Line 1449  for (;;) Line 1461  for (;;)
1461          {          {
1462          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1463            {            {
1464            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1465            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1466            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1467            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 4607  back the character offset. */ Line 4619  back the character offset. */
4619  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4620  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4621    {    {
4622    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
4623      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
4624    if (start_offset > 0 && start_offset < length)    if (start_offset > 0 && start_offset < length)
4625      {      {
4626      int tb = ((uschar *)subject)[start_offset];      int tb = ((USPTR)subject)[start_offset];
4627      if (tb > 127)      if (tb > 127)
4628        {        {
4629        tb &= 0xc0;        tb &= 0xc0;
# Line 4716  for(;;) Line 4728  for(;;)
4728      register int *iend = iptr + resetcount;      register int *iend = iptr + resetcount;
4729      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
4730      }      }
4731    
4732    /* If firstline is TRUE, the start of the match is constrained to the first    /* If firstline is TRUE, the start of the match is constrained to the first
4733    line of a multiline string. That is, the match must be before or at the first    line of a multiline string. That is, the match must be before or at the first
4734    newline. Implement this by temporarily adjusting end_subject so that we stop    newline. Implement this by temporarily adjusting end_subject so that we stop
# Line 4740  for(;;) Line 4752  for(;;)
4752      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4753      end_subject = t;      end_subject = t;
4754      }      }
4755    
4756    /* There are some optimizations that avoid running the match if a known    /* There are some optimizations that avoid running the match if a known
4757    starting point is not found, or if a known later character is not present.    starting point is not found, or if a known later character is not present.
4758    However, there is an option that disables these, for testing and for ensuring    However, there is an option that disables these, for testing and for ensuring
4759    that all callouts do actually occur. */    that all callouts do actually occur. */
4760    
4761    if ((options & PCRE_NO_START_OPTIMIZE) == 0)    if ((options & PCRE_NO_START_OPTIMIZE) == 0)
4762      {      {
4763      /* Advance to a unique first byte if there is one. */      /* Advance to a unique first byte if there is one. */
4764    
4765      if (first_byte >= 0)      if (first_byte >= 0)
# Line 4759  for(;;) Line 4771  for(;;)
4771          while (start_match < end_subject && *start_match != first_byte)          while (start_match < end_subject && *start_match != first_byte)
4772            start_match++;            start_match++;
4773        }        }
4774    
4775      /* Or to just after a linebreak for a multiline match */      /* Or to just after a linebreak for a multiline match */
4776    
4777      else if (startline)      else if (startline)
4778        {        {
4779        if (start_match > md->start_subject + start_offset)        if (start_match > md->start_subject + start_offset)
# Line 4780  for(;;) Line 4792  for(;;)
4792  #endif  #endif
4793          while (start_match < end_subject && !WAS_NEWLINE(start_match))          while (start_match < end_subject && !WAS_NEWLINE(start_match))
4794            start_match++;            start_match++;
4795    
4796          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4797          and we are now at a LF, advance the match position by one more character.          and we are now at a LF, advance the match position by one more character.
4798          */          */
4799    
4800          if (start_match[-1] == CHAR_CR &&          if (start_match[-1] == CHAR_CR &&
4801               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4802               start_match < end_subject &&               start_match < end_subject &&
# Line 4792  for(;;) Line 4804  for(;;)
4804            start_match++;            start_match++;
4805          }          }
4806        }        }
4807    
4808      /* Or to a non-unique first byte after study */      /* Or to a non-unique first byte after study */
4809    
4810      else if (start_bits != NULL)      else if (start_bits != NULL)
4811        {        {
4812        while (start_match < end_subject)        while (start_match < end_subject)
# Line 4805  for(;;) Line 4817  for(;;)
4817          }          }
4818        }        }
4819      }   /* Starting optimizations */      }   /* Starting optimizations */
4820    
4821    /* Restore fudged end_subject */    /* Restore fudged end_subject */
4822    
4823    end_subject = save_end_subject;    end_subject = save_end_subject;
4824    
4825  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
# Line 4830  for(;;) Line 4842  for(;;)
4842    32-megabyte string... so we don't do this when the string is sufficiently    32-megabyte string... so we don't do this when the string is sufficiently
4843    long.    long.
4844    
4845    ALSO: this processing is disabled when partial matching is requested, or if    ALSO: this processing is disabled when partial matching is requested, or if
4846    disabling is explicitly requested. */    disabling is explicitly requested. */
4847    
4848    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&

Legend:
Removed from v.391  
changed lines
  Added in v.409

  ViewVC Help
Powered by ViewVC 1.1.5