/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 604 by ph10, Thu Jun 2 19:04:54 2011 UTC revision 613 by ph10, Sat Jul 2 16:59:52 2011 UTC
# Line 4510  for (;; ptr++) Line 4510  for (;; ptr++)
4510        int len = (int)(code - previous);        int len = (int)(code - previous);
4511        uschar *bralink = NULL;        uschar *bralink = NULL;
4512        uschar *brazeroptr = NULL;        uschar *brazeroptr = NULL;
4513    
4514        /* Repeating a DEFINE group is pointless */        /* Repeating a DEFINE group is pointless */
4515    
4516        if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)        if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)
# Line 4731  for (;; ptr++) Line 4731  for (;; ptr++)
4731        Otherwise, if the quantifier was possessive, we convert the BRA code to        Otherwise, if the quantifier was possessive, we convert the BRA code to
4732        the POS form, and the KET code to KETRPOS. (It turns out to be convenient        the POS form, and the KET code to KETRPOS. (It turns out to be convenient
4733        at runtime to detect this kind of subpattern at both the start and at the        at runtime to detect this kind of subpattern at both the start and at the
4734        end.) If the group is preceded by OP_BRAZERO, convert this to        end.) The use of special opcodes makes it possible to reduce greatly the
4735        OP_BRAPOSZERO. Then cancel the possessive flag so that the default action        stack usage in pcre_exec(). If the group is preceded by OP_BRAZERO,
4736        below, of wrapping everything inside atomic brackets, does not happen.        convert this to OP_BRAPOSZERO. Then cancel the possessive flag so that
4737          the default action below, of wrapping everything inside atomic brackets,
4738          does not happen.
4739    
4740        Then, when we are doing the actual compile phase, check to see whether        Then, when we are doing the actual compile phase, check to see whether
4741        this group is one that could match an empty string. If so, convert the        this group is one that could match an empty string. If so, convert the
# Line 4793  for (;; ptr++) Line 4795  for (;; ptr++)
4795        }        }
4796    
4797      /* If the character following a repeat is '+', or if certain optimization      /* If the character following a repeat is '+', or if certain optimization
4798      tests above succeeded, possessive_quantifier is TRUE. For some of the      tests above succeeded, possessive_quantifier is TRUE. For some opcodes,
4799      simpler opcodes, there is an special alternative opcode for this. For      there are special alternative opcodes for this case. For anything else, we
4800      anything else, we wrap the entire repeated item inside OP_ONCE brackets.      wrap the entire repeated item inside OP_ONCE brackets. Logically, the '+'
4801      The '+' notation is just syntactic sugar, taken from Sun's Java package,      notation is just syntactic sugar, taken from Sun's Java package, but the
4802      but the special opcodes can optimize it a bit. The repeated item starts at      special opcodes can optimize it.
4803      tempcode, not at previous, which might be the first part of a string whose  
4804      (former) last char we repeated.      Possessively repeated subpatterns have already been handled in the code
4805        just above, so possessive_quantifier is always FALSE for them at this
4806        stage.
4807    
4808        Note that the repeated item starts at tempcode, not at previous, which
4809        might be the first part of a string whose (former) last char we repeated.
4810    
4811      Possessifying an 'exact' quantifier has no effect, so we can ignore it. But      Possessifying an 'exact' quantifier has no effect, so we can ignore it. But
4812      an 'upto' may follow. We skip over an 'exact' item, and then test the      an 'upto' may follow. We skip over an 'exact' item, and then test the
# Line 4924  for (;; ptr++) Line 4931  for (;; ptr++)
4931          if (namelen == verbs[i].len &&          if (namelen == verbs[i].len &&
4932              strncmp((char *)name, vn, namelen) == 0)              strncmp((char *)name, vn, namelen) == 0)
4933            {            {
4934            /* Check for open captures before ACCEPT */            /* Check for open captures before ACCEPT and convert it to
4935              ASSERT_ACCEPT if in an assertion. */
4936    
4937            if (verbs[i].op == OP_ACCEPT)            if (verbs[i].op == OP_ACCEPT)
4938              {              {
4939              open_capitem *oc;              open_capitem *oc;
4940                if (arglen != 0)
4941                  {
4942                  *errorcodeptr = ERR59;
4943                  goto FAILED;
4944                  }
4945              cd->had_accept = TRUE;              cd->had_accept = TRUE;
4946              for (oc = cd->open_caps; oc != NULL; oc = oc->next)              for (oc = cd->open_caps; oc != NULL; oc = oc->next)
4947                {                {
4948                *code++ = OP_CLOSE;                *code++ = OP_CLOSE;
4949                PUT2INC(code, 0, oc->number);                PUT2INC(code, 0, oc->number);
4950                }                }
4951                *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
4952              }              }
4953    
4954            /* Handle the cases with/without an argument */            /* Handle other cases with/without an argument */
4955    
4956            if (arglen == 0)            else if (arglen == 0)
4957              {              {
4958              if (verbs[i].op < 0)   /* Argument is mandatory */              if (verbs[i].op < 0)   /* Argument is mandatory */
4959                {                {
# Line 5228  for (;; ptr++) Line 5242  for (;; ptr++)
5242          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
5243          case CHAR_EQUALS_SIGN:                 /* Positive lookahead */          case CHAR_EQUALS_SIGN:                 /* Positive lookahead */
5244          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
5245            cd->assert_depth += 1;
5246          ptr++;          ptr++;
5247          break;          break;
5248    
# Line 5242  for (;; ptr++) Line 5257  for (;; ptr++)
5257            continue;            continue;
5258            }            }
5259          bravalue = OP_ASSERT_NOT;          bravalue = OP_ASSERT_NOT;
5260            cd->assert_depth += 1;
5261          break;          break;
5262    
5263    
# Line 5251  for (;; ptr++) Line 5267  for (;; ptr++)
5267            {            {
5268            case CHAR_EQUALS_SIGN:               /* Positive lookbehind */            case CHAR_EQUALS_SIGN:               /* Positive lookbehind */
5269            bravalue = OP_ASSERTBACK;            bravalue = OP_ASSERTBACK;
5270              cd->assert_depth += 1;
5271            ptr += 2;            ptr += 2;
5272            break;            break;
5273    
5274            case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */            case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */
5275            bravalue = OP_ASSERTBACK_NOT;            bravalue = OP_ASSERTBACK_NOT;
5276              cd->assert_depth += 1;
5277            ptr += 2;            ptr += 2;
5278            break;            break;
5279    
# Line 5823  for (;; ptr++) Line 5841  for (;; ptr++)
5841             &length_prevgroup           /* Pre-compile phase */             &length_prevgroup           /* Pre-compile phase */
5842           ))           ))
5843        goto FAILED;        goto FAILED;
5844    
5845        if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
5846          cd->assert_depth -= 1;
5847    
5848      /* At the end of compiling, code is still pointing to the start of the      /* At the end of compiling, code is still pointing to the start of the
5849      group, while tempcode has been updated to point past the end of the group      group, while tempcode has been updated to point past the end of the group
# Line 5894  for (;; ptr++) Line 5915  for (;; ptr++)
5915          goto FAILED;          goto FAILED;
5916          }          }
5917        *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE;        *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE;
5918        *code++ = OP_BRA;        code++;   /* This already contains bravalue */
5919        PUTINC(code, 0, 1 + LINK_SIZE);        PUTINC(code, 0, 1 + LINK_SIZE);
5920        *code++ = OP_KET;        *code++ = OP_KET;
5921        PUTINC(code, 0, 1 + LINK_SIZE);        PUTINC(code, 0, 1 + LINK_SIZE);
# Line 6969  utf8 = (options & PCRE_UTF8) != 0; Line 6990  utf8 = (options & PCRE_UTF8) != 0;
6990    
6991  /* Can't support UTF8 unless PCRE has been compiled to include the code. The  /* Can't support UTF8 unless PCRE has been compiled to include the code. The
6992  return of an error code from _pcre_valid_utf8() is a new feature, introduced in  return of an error code from _pcre_valid_utf8() is a new feature, introduced in
6993  release 8.13. The only use we make of it here is to adjust the offset value to  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is
6994  the end of the string for a short string error, for compatibility with previous  not used here. */
 versions. */  
6995    
6996  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
6997  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
6998       (*erroroffset = _pcre_valid_utf8((USPTR)pattern, -1, &errorcode)) >= 0)       (errorcode = _pcre_valid_utf8((USPTR)pattern, -1, erroroffset)) != 0)
6999    {    {
7000    errorcode = ERR44;    errorcode = ERR44;
7001    goto PCRE_EARLY_ERROR_RETURN2;    goto PCRE_EARLY_ERROR_RETURN2;
# Line 7146  field; this time it's used for rememberi Line 7166  field; this time it's used for rememberi
7166  */  */
7167    
7168  cd->final_bracount = cd->bracount;  /* Save for checking forward references */  cd->final_bracount = cd->bracount;  /* Save for checking forward references */
7169    cd->assert_depth = 0;
7170  cd->bracount = 0;  cd->bracount = 0;
7171  cd->names_found = 0;  cd->names_found = 0;
7172  cd->name_table = (uschar *)re + re->name_table_offset;  cd->name_table = (uschar *)re + re->name_table_offset;

Legend:
Removed from v.604  
changed lines
  Added in v.613

  ViewVC Help
Powered by ViewVC 1.1.5