/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 604 by ph10, Thu Jun 2 19:04:54 2011 UTC revision 618 by ph10, Sat Jul 16 17:24:16 2011 UTC
# Line 1694  _pcre_find_bracket(const uschar *code, B Line 1694  _pcre_find_bracket(const uschar *code, B
1694  for (;;)  for (;;)
1695    {    {
1696    register int c = *code;    register int c = *code;
1697    
1698    if (c == OP_END) return NULL;    if (c == OP_END) return NULL;
1699    
1700    /* XCLASS is used for classes that cannot be represented just by a bit    /* XCLASS is used for classes that cannot be represented just by a bit
# Line 4510  for (;; ptr++) Line 4511  for (;; ptr++)
4511        int len = (int)(code - previous);        int len = (int)(code - previous);
4512        uschar *bralink = NULL;        uschar *bralink = NULL;
4513        uschar *brazeroptr = NULL;        uschar *brazeroptr = NULL;
4514    
4515        /* Repeating a DEFINE group is pointless */        /* Repeating a DEFINE group is pointless */
4516    
4517        if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)        if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)
# Line 4726  for (;; ptr++) Line 4727  for (;; ptr++)
4727          }          }
4728    
4729        /* If the maximum is unlimited, set a repeater in the final copy. For        /* If the maximum is unlimited, set a repeater in the final copy. For
4730        ONCE brackets, that's all we need to do.        ONCE brackets, that's all we need to do.
4731    
4732          (To be done next, after recursion adjusted)
4733          However, possessively repeated
4734          ONCE brackets can be converted into non-capturing brackets, as the
4735          behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
4736          deal with possessive ONCEs specially.
4737          (....)
4738    
4739        Otherwise, if the quantifier was possessive, we convert the BRA code to        Otherwise, if the quantifier was possessive, we convert the BRA code to
4740        the POS form, and the KET code to KETRPOS. (It turns out to be convenient        the POS form, and the KET code to KETRPOS. (It turns out to be convenient
4741        at runtime to detect this kind of subpattern at both the start and at the        at runtime to detect this kind of subpattern at both the start and at the
4742        end.) If the group is preceded by OP_BRAZERO, convert this to        end.) The use of special opcodes makes it possible to reduce greatly the
4743        OP_BRAPOSZERO. Then cancel the possessive flag so that the default action        stack usage in pcre_exec(). If the group is preceded by OP_BRAZERO,
4744        below, of wrapping everything inside atomic brackets, does not happen.        convert this to OP_BRAPOSZERO. Then cancel the possessive flag so that
4745          the default action below, of wrapping everything inside atomic brackets,
4746          does not happen.
4747    
4748        Then, when we are doing the actual compile phase, check to see whether        Then, when we are doing the actual compile phase, check to see whether
4749        this group is one that could match an empty string. If so, convert the        this group is one that could match an empty string. If so, convert the
# Line 4746  for (;; ptr++) Line 4756  for (;; ptr++)
4756          uschar *ketcode = code - 1 - LINK_SIZE;          uschar *ketcode = code - 1 - LINK_SIZE;
4757          uschar *bracode = ketcode - GET(ketcode, 1);          uschar *bracode = ketcode - GET(ketcode, 1);
4758    
4759          if (*bracode == OP_ONCE)  /****
4760            if (*bracode == OP_ONCE && possessive_quantifier)
4761              *bracode = OP_BRA;
4762    ****/
4763    
4764            if (*bracode == OP_ONCE)
4765            *ketcode = OP_KETRMAX + repeat_type;            *ketcode = OP_KETRMAX + repeat_type;
4766          else          else
4767            {            {
# Line 4793  for (;; ptr++) Line 4808  for (;; ptr++)
4808        }        }
4809    
4810      /* If the character following a repeat is '+', or if certain optimization      /* If the character following a repeat is '+', or if certain optimization
4811      tests above succeeded, possessive_quantifier is TRUE. For some of the      tests above succeeded, possessive_quantifier is TRUE. For some opcodes,
4812      simpler opcodes, there is an special alternative opcode for this. For      there are special alternative opcodes for this case. For anything else, we
4813      anything else, we wrap the entire repeated item inside OP_ONCE brackets.      wrap the entire repeated item inside OP_ONCE brackets. Logically, the '+'
4814      The '+' notation is just syntactic sugar, taken from Sun's Java package,      notation is just syntactic sugar, taken from Sun's Java package, but the
4815      but the special opcodes can optimize it a bit. The repeated item starts at      special opcodes can optimize it.
4816      tempcode, not at previous, which might be the first part of a string whose  
4817      (former) last char we repeated.      Possessively repeated subpatterns have already been handled in the code
4818        just above, so possessive_quantifier is always FALSE for them at this
4819        stage.
4820    
4821        Note that the repeated item starts at tempcode, not at previous, which
4822        might be the first part of a string whose (former) last char we repeated.
4823    
4824      Possessifying an 'exact' quantifier has no effect, so we can ignore it. But      Possessifying an 'exact' quantifier has no effect, so we can ignore it. But
4825      an 'upto' may follow. We skip over an 'exact' item, and then test the      an 'upto' may follow. We skip over an 'exact' item, and then test the
# Line 4924  for (;; ptr++) Line 4944  for (;; ptr++)
4944          if (namelen == verbs[i].len &&          if (namelen == verbs[i].len &&
4945              strncmp((char *)name, vn, namelen) == 0)              strncmp((char *)name, vn, namelen) == 0)
4946            {            {
4947            /* Check for open captures before ACCEPT */            /* Check for open captures before ACCEPT and convert it to
4948              ASSERT_ACCEPT if in an assertion. */
4949    
4950            if (verbs[i].op == OP_ACCEPT)            if (verbs[i].op == OP_ACCEPT)
4951              {              {
4952              open_capitem *oc;              open_capitem *oc;
4953                if (arglen != 0)
4954                  {
4955                  *errorcodeptr = ERR59;
4956                  goto FAILED;
4957                  }
4958              cd->had_accept = TRUE;              cd->had_accept = TRUE;
4959              for (oc = cd->open_caps; oc != NULL; oc = oc->next)              for (oc = cd->open_caps; oc != NULL; oc = oc->next)
4960                {                {
4961                *code++ = OP_CLOSE;                *code++ = OP_CLOSE;
4962                PUT2INC(code, 0, oc->number);                PUT2INC(code, 0, oc->number);
4963                }                }
4964                *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
4965              }              }
4966    
4967            /* Handle the cases with/without an argument */            /* Handle other cases with/without an argument */
4968    
4969            if (arglen == 0)            else if (arglen == 0)
4970              {              {
4971              if (verbs[i].op < 0)   /* Argument is mandatory */              if (verbs[i].op < 0)   /* Argument is mandatory */
4972                {                {
# Line 5228  for (;; ptr++) Line 5255  for (;; ptr++)
5255          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
5256          case CHAR_EQUALS_SIGN:                 /* Positive lookahead */          case CHAR_EQUALS_SIGN:                 /* Positive lookahead */
5257          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
5258            cd->assert_depth += 1;
5259          ptr++;          ptr++;
5260          break;          break;
5261    
# Line 5242  for (;; ptr++) Line 5270  for (;; ptr++)
5270            continue;            continue;
5271            }            }
5272          bravalue = OP_ASSERT_NOT;          bravalue = OP_ASSERT_NOT;
5273            cd->assert_depth += 1;
5274          break;          break;
5275    
5276    
# Line 5251  for (;; ptr++) Line 5280  for (;; ptr++)
5280            {            {
5281            case CHAR_EQUALS_SIGN:               /* Positive lookbehind */            case CHAR_EQUALS_SIGN:               /* Positive lookbehind */
5282            bravalue = OP_ASSERTBACK;            bravalue = OP_ASSERTBACK;
5283              cd->assert_depth += 1;
5284            ptr += 2;            ptr += 2;
5285            break;            break;
5286    
5287            case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */            case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */
5288            bravalue = OP_ASSERTBACK_NOT;            bravalue = OP_ASSERTBACK_NOT;
5289              cd->assert_depth += 1;
5290            ptr += 2;            ptr += 2;
5291            break;            break;
5292    
# Line 5667  for (;; ptr++) Line 5698  for (;; ptr++)
5698            /* Insert the recursion/subroutine item, automatically wrapped inside            /* Insert the recursion/subroutine item, automatically wrapped inside
5699            "once" brackets. Set up a "previous group" length so that a            "once" brackets. Set up a "previous group" length so that a
5700            subsequent quantifier will work. */            subsequent quantifier will work. */
5701    
5702            *code = OP_ONCE;            *code = OP_ONCE;
5703            PUT(code, 1, 2 + 2*LINK_SIZE);            PUT(code, 1, 2 + 2*LINK_SIZE);
5704            code += 1 + LINK_SIZE;            code += 1 + LINK_SIZE;
# Line 5823  for (;; ptr++) Line 5854  for (;; ptr++)
5854             &length_prevgroup           /* Pre-compile phase */             &length_prevgroup           /* Pre-compile phase */
5855           ))           ))
5856        goto FAILED;        goto FAILED;
5857    
5858        if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
5859          cd->assert_depth -= 1;
5860    
5861      /* At the end of compiling, code is still pointing to the start of the      /* At the end of compiling, code is still pointing to the start of the
5862      group, while tempcode has been updated to point past the end of the group      group, while tempcode has been updated to point past the end of the group
# Line 5894  for (;; ptr++) Line 5928  for (;; ptr++)
5928          goto FAILED;          goto FAILED;
5929          }          }
5930        *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE;        *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE;
5931        *code++ = OP_BRA;        code++;   /* This already contains bravalue */
5932        PUTINC(code, 0, 1 + LINK_SIZE);        PUTINC(code, 0, 1 + LINK_SIZE);
5933        *code++ = OP_KET;        *code++ = OP_KET;
5934        PUTINC(code, 0, 1 + LINK_SIZE);        PUTINC(code, 0, 1 + LINK_SIZE);
# Line 6969  utf8 = (options & PCRE_UTF8) != 0; Line 7003  utf8 = (options & PCRE_UTF8) != 0;
7003    
7004  /* Can't support UTF8 unless PCRE has been compiled to include the code. The  /* Can't support UTF8 unless PCRE has been compiled to include the code. The
7005  return of an error code from _pcre_valid_utf8() is a new feature, introduced in  return of an error code from _pcre_valid_utf8() is a new feature, introduced in
7006  release 8.13. The only use we make of it here is to adjust the offset value to  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is
7007  the end of the string for a short string error, for compatibility with previous  not used here. */
 versions. */  
7008    
7009  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
7010  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
7011       (*erroroffset = _pcre_valid_utf8((USPTR)pattern, -1, &errorcode)) >= 0)       (errorcode = _pcre_valid_utf8((USPTR)pattern, -1, erroroffset)) != 0)
7012    {    {
7013    errorcode = ERR44;    errorcode = ERR44;
7014    goto PCRE_EARLY_ERROR_RETURN2;    goto PCRE_EARLY_ERROR_RETURN2;
# Line 7146  field; this time it's used for rememberi Line 7179  field; this time it's used for rememberi
7179  */  */
7180    
7181  cd->final_bracount = cd->bracount;  /* Save for checking forward references */  cd->final_bracount = cd->bracount;  /* Save for checking forward references */
7182    cd->assert_depth = 0;
7183  cd->bracount = 0;  cd->bracount = 0;
7184  cd->names_found = 0;  cd->names_found = 0;
7185  cd->name_table = (uschar *)re + re->name_table_offset;  cd->name_table = (uschar *)re + re->name_table_offset;

Legend:
Removed from v.604  
changed lines
  Added in v.618

  ViewVC Help
Powered by ViewVC 1.1.5