/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1379 by ph10, Mon Oct 14 13:54:07 2013 UTC revision 1380 by ph10, Tue Oct 15 16:49:12 2013 UTC
# Line 777  static const pcre_uint8 posspropstab[3][ Line 777  static const pcre_uint8 posspropstab[3][
777    { ucp_L, ucp_N, ucp_P, ucp_Po }   /* WORD */    { ucp_L, ucp_N, ucp_P, ucp_Po }   /* WORD */
778  };  };
779    
780    /* This table is used when converting repeating opcodes into possessified
781    versions as a result of an explicit possessive quantifier such as ++. A zero
782    value means there is no possessified version - in those cases the item in
783    question must be wrapped in ONCE brackets. The table is truncated at OP_CALLOUT
784    because all relevant opcodes are less than that. */
785    
786    static const pcre_uint8 opcode_possessify[] = {
787      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   /* 0 - 15  */
788      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   /* 16 - 31 */
789    
790      0,                       /* NOTI */
791      OP_POSSTAR, 0,           /* STAR, MINSTAR */
792      OP_POSPLUS, 0,           /* PLUS, MINPLUS */
793      OP_POSQUERY, 0,          /* QUERY, MINQUERY */
794      OP_POSUPTO, 0,           /* UPTO, MINUPTO */
795      0,                       /* EXACT */
796      0, 0, 0, 0,              /* POS{STAR,PLUS,QUERY,UPTO} */
797    
798      OP_POSSTARI, 0,          /* STARI, MINSTARI */
799      OP_POSPLUSI, 0,          /* PLUSI, MINPLUSI */
800      OP_POSQUERYI, 0,         /* QUERYI, MINQUERYI */
801      OP_POSUPTOI, 0,          /* UPTOI, MINUPTOI */
802      0,                       /* EXACTI */
803      0, 0, 0, 0,              /* POS{STARI,PLUSI,QUERYI,UPTOI} */
804    
805      OP_NOTPOSSTAR, 0,        /* NOTSTAR, NOTMINSTAR */
806      OP_NOTPOSPLUS, 0,        /* NOTPLUS, NOTMINPLUS */
807      OP_NOTPOSQUERY, 0,       /* NOTQUERY, NOTMINQUERY */
808      OP_NOTPOSUPTO, 0,        /* NOTUPTO, NOTMINUPTO */
809      0,                       /* NOTEXACT */
810      0, 0, 0, 0,              /* NOTPOS{STAR,PLUS,QUERY,UPTO} */
811    
812      OP_NOTPOSSTARI, 0,       /* NOTSTARI, NOTMINSTARI */
813      OP_NOTPOSPLUSI, 0,       /* NOTPLUSI, NOTMINPLUSI */
814      OP_NOTPOSQUERYI, 0,      /* NOTQUERYI, NOTMINQUERYI */
815      OP_NOTPOSUPTOI, 0,       /* NOTUPTOI, NOTMINUPTOI */
816      0,                       /* NOTEXACTI */
817      0, 0, 0, 0,              /* NOTPOS{STARI,PLUSI,QUERYI,UPTOI} */
818    
819      OP_TYPEPOSSTAR, 0,       /* TYPESTAR, TYPEMINSTAR */
820      OP_TYPEPOSPLUS, 0,       /* TYPEPLUS, TYPEMINPLUS */
821      OP_TYPEPOSQUERY, 0,      /* TYPEQUERY, TYPEMINQUERY */
822      OP_TYPEPOSUPTO, 0,       /* TYPEUPTO, TYPEMINUPTO */
823      0,                       /* TYPEEXACT */
824      0, 0, 0, 0,              /* TYPEPOS{STAR,PLUS,QUERY,UPTO} */
825    
826      OP_CRPOSSTAR, 0,         /* CRSTAR, CRMINSTAR */
827      OP_CRPOSPLUS, 0,         /* CRPLUS, CRMINPLUS */
828      OP_CRPOSQUERY, 0,        /* CRQUERY, CRMINQUERY */
829      OP_CRPOSRANGE, 0,        /* CRRANGE, CRMINRANGE */
830      0, 0, 0, 0,              /* CRPOS{STAR,PLUS,QUERY,RANGE} */
831    
832      0, 0, 0,                 /* CLASS, NCLASS, XCLASS */
833      0, 0,                    /* REF, REFI */
834      0, 0,                    /* DNREF, DNREFI */
835      0, 0                     /* RECURSE, CALLOUT */
836    };
837    
838    
839    
840  /*************************************************  /*************************************************
# Line 2722  switch(ptype) Line 2780  switch(ptype)
2780      HSPACE_CASES:      HSPACE_CASES:
2781      VSPACE_CASES:      VSPACE_CASES:
2782      return negated;      return negated;
2783    
2784      default:      default:
2785      return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;      return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
2786      }      }
2787    break;  /* Control never reaches here */    break;  /* Control never reaches here */
2788    
2789    case PT_WORD:    case PT_WORD:
# Line 3399  pcre_uint32 list[8]; Line 3457  pcre_uint32 list[8];
3457  for (;;)  for (;;)
3458    {    {
3459    c = *code;    c = *code;
3460    
3461    if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)    if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
3462      {      {
3463      c -= get_repeat_base(c) - OP_STAR;      c -= get_repeat_base(c) - OP_STAR;
# Line 3461  for (;;) Line 3519  for (;;)
3519        /* end must not be NULL. */        /* end must not be NULL. */
3520        end = get_chr_property_list(code, utf, cd->fcc, list);        end = get_chr_property_list(code, utf, cd->fcc, list);
3521    
3522        list[1] = d == OP_CRSTAR || d == OP_CRPLUS || d == OP_CRQUERY ||        list[1] = d == OP_CRSTAR || d == OP_CRPLUS || d == OP_CRQUERY ||
3523          d == OP_CRRANGE;          d == OP_CRRANGE;
3524    
3525        if (compare_opcodes(end, utf, cd, list, end))        if (compare_opcodes(end, utf, cd, list, end))
# Line 5910  for (;; ptr++) Line 5968  for (;; ptr++)
5968        goto FAILED;        goto FAILED;
5969        }        }
5970    
5971      /* If the character following a repeat is '+', or if certain optimization      /* If the character following a repeat is '+', possessive_quantifier is
5972      tests above succeeded, possessive_quantifier is TRUE. For some opcodes,      TRUE. For some opcodes, there are special alternative opcodes for this
5973      there are special alternative opcodes for this case. For anything else, we      case. For anything else, we wrap the entire repeated item inside OP_ONCE
5974      wrap the entire repeated item inside OP_ONCE brackets. Logically, the '+'      brackets. Logically, the '+' notation is just syntactic sugar, taken from
5975      notation is just syntactic sugar, taken from Sun's Java package, but the      Sun's Java package, but the special opcodes can optimize it.
     special opcodes can optimize it.  
5976    
5977      Some (but not all) possessively repeated subpatterns have already been      Some (but not all) possessively repeated subpatterns have already been
5978      completely handled in the code just above. For them, possessive_quantifier      completely handled in the code just above. For them, possessive_quantifier
5979      is always FALSE at this stage.      is always FALSE at this stage. Note that the repeated item starts at
5980        tempcode, not at previous, which might be the first part of a string whose
5981      Note that the repeated item starts at tempcode, not at previous, which      (former) last char we repeated. */
     might be the first part of a string whose (former) last char we repeated.  
   
     Possessifying an 'exact' quantifier has no effect, so we can ignore it. But  
     an 'upto' may follow. We skip over an 'exact' item, and then test the  
     length of what remains before proceeding. */  
5982    
5983      if (possessive_quantifier)      if (possessive_quantifier)
5984        {        {
5985        int len;        int len;
5986    
5987        if (*tempcode == OP_TYPEEXACT)        /* Possessifying an EXACT quantifier has no effect, so we can ignore it.
5988          However, QUERY, STAR, or UPTO may follow (for quantifiers such as {5,6},
5989          {5,}, or {5,10}). We skip over an EXACT item; if the length of what
5990          remains is greater than zero, there's a further opcode that can be
5991          handled. If not, do nothing, leaving the EXACT alone. */
5992    
5993          switch(*tempcode)
5994            {
5995            case OP_TYPEEXACT:
5996          tempcode += PRIV(OP_lengths)[*tempcode] +          tempcode += PRIV(OP_lengths)[*tempcode] +
5997            ((tempcode[1 + IMM2_SIZE] == OP_PROP            ((tempcode[1 + IMM2_SIZE] == OP_PROP
5998            || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);            || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
5999            break;
6000    
6001        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)          /* CHAR opcodes are used for exacts whose count is 1. */
6002          {  
6003            case OP_CHAR:
6004            case OP_CHARI:
6005            case OP_NOT:
6006            case OP_NOTI:
6007            case OP_EXACT:
6008            case OP_EXACTI:
6009            case OP_NOTEXACT:
6010            case OP_NOTEXACTI:
6011          tempcode += PRIV(OP_lengths)[*tempcode];          tempcode += PRIV(OP_lengths)[*tempcode];
6012  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
6013          if (utf && HAS_EXTRALEN(tempcode[-1]))          if (utf && HAS_EXTRALEN(tempcode[-1]))
6014            tempcode += GET_EXTRALEN(tempcode[-1]);            tempcode += GET_EXTRALEN(tempcode[-1]);
6015  #endif  #endif
6016            break;
6017    
6018            /* For the class opcodes, the repeat operator appears at the end;
6019            adjust tempcode to point to it. */
6020    
6021            case OP_CLASS:
6022            case OP_NCLASS:
6023            tempcode += 1 + 32/sizeof(pcre_uchar);
6024            break;
6025    
6026    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6027            case OP_XCLASS:
6028            tempcode += GET(tempcode, 1);
6029            break;
6030    #endif
6031          }          }
6032    
6033          /* If tempcode is equal to code (which points to the end of the repeated
6034          item), it means we have skipped an EXACT item but there is no following
6035          QUERY, STAR, or UPTO; the value of len will be 0, and we do nothing. In
6036          all other cases, tempcode will be pointing to the repeat opcode, and will
6037          be less than code, so the value of len will be greater than 0. */
6038    
6039        len = (int)(code - tempcode);        len = (int)(code - tempcode);
6040          if (len > 0)
6041            {
6042            unsigned int repcode = *tempcode;
6043    
6044            /* There is a table for possessifying opcodes, all of which are less
6045            than OP_CALLOUT. A zero entry means there is no possessified version.
6046            */
6047    
6048            if (repcode < OP_CALLOUT && opcode_possessify[repcode] > 0)
6049              *tempcode = opcode_possessify[repcode];
6050    
6051            /* For opcode without a special possessified version, wrap the item in
6052            ONCE brackets. Because we are moving code along, we must ensure that any
6053            pending recursive references are updated. */
6054    
6055            else
6056              {
6057              *code = OP_END;
6058              adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);
6059              memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
6060              code += 1 + LINK_SIZE;
6061              len += 1 + LINK_SIZE;
6062              tempcode[0] = OP_ONCE;
6063              *code++ = OP_KET;
6064              PUTINC(code, 0, len);
6065              PUT(tempcode, 1, len);
6066              }
6067            }
6068    
6069    #ifdef NEVER
6070        if (len > 0) switch (*tempcode)        if (len > 0) switch (*tempcode)
6071          {          {
6072          case OP_STAR:  *tempcode = OP_POSSTAR; break;          case OP_STAR:  *tempcode = OP_POSSTAR; break;
# Line 5974  for (;; ptr++) Line 6094  for (;; ptr++)
6094          case OP_TYPEQUERY: *tempcode = OP_TYPEPOSQUERY; break;          case OP_TYPEQUERY: *tempcode = OP_TYPEPOSQUERY; break;
6095          case OP_TYPEUPTO:  *tempcode = OP_TYPEPOSUPTO; break;          case OP_TYPEUPTO:  *tempcode = OP_TYPEPOSUPTO; break;
6096    
6097            case OP_CRSTAR:   *tempcode = OP_CRPOSSTAR; break;
6098            case OP_CRPLUS:   *tempcode = OP_CRPOSPLUS; break;
6099            case OP_CRQUERY:  *tempcode = OP_CRPOSQUERY; break;
6100            case OP_CRRANGE:  *tempcode = OP_CRPOSRANGE; break;
6101    
6102          /* Because we are moving code along, we must ensure that any          /* Because we are moving code along, we must ensure that any
6103          pending recursive references are updated. */          pending recursive references are updated. */
6104    
# Line 5989  for (;; ptr++) Line 6114  for (;; ptr++)
6114          PUT(tempcode, 1, len);          PUT(tempcode, 1, len);
6115          break;          break;
6116          }          }
6117    #endif
6118        }        }
6119    
6120      /* In all case we no longer have a previous item. We also set the      /* In all case we no longer have a previous item. We also set the
# Line 9044  return (pcre32 *)re; Line 9170  return (pcre32 *)re;
9170  }  }
9171    
9172  /* End of pcre_compile.c */  /* End of pcre_compile.c */
9173    

Legend:
Removed from v.1379  
changed lines
  Added in v.1380

  ViewVC Help
Powered by ViewVC 1.1.5