/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 721 by ph10, Fri Oct 7 15:51:39 2011 UTC revision 758 by ph10, Mon Nov 21 12:05:36 2011 UTC
# Line 410  static const char error_texts[] = Line 410  static const char error_texts[] =
410    "this version of PCRE is not compiled with PCRE_UCP support\0"    "this version of PCRE is not compiled with PCRE_UCP support\0"
411    "\\c must be followed by an ASCII character\0"    "\\c must be followed by an ASCII character\0"
412    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
413      /* 70 */
414      "internal error: unknown opcode in find_fixedlength()\0"
415      "\\N is not supported in a class\0"
416    ;    ;
417    
418  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 676  else Line 679  else
679    
680      case CHAR_l:      case CHAR_l:
681      case CHAR_L:      case CHAR_L:
682        *errorcodeptr = ERR37;
683        break;
684    
685      case CHAR_u:      case CHAR_u:
686        if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
687          {
688          /* In JavaScript, \u must be followed by four hexadecimal numbers.
689          Otherwise it is a lowercase u letter. */
690          if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0
691               && (digitab[ptr[3]] & ctype_xdigit) != 0 && (digitab[ptr[4]] & ctype_xdigit) != 0)
692            {
693            c = 0;
694            for (i = 0; i < 4; ++i)
695              {
696              register int cc = *(++ptr);
697    #ifndef EBCDIC  /* ASCII/UTF-8 coding */
698              if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
699              c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
700    #else           /* EBCDIC coding */
701              if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
702              c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
703    #endif
704              }
705            }
706          }
707        else
708          *errorcodeptr = ERR37;
709        break;
710    
711      case CHAR_U:      case CHAR_U:
712      *errorcodeptr = ERR37;      /* In JavaScript, \U is an uppercase U letter. */
713        if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37;
714      break;      break;
715    
716      /* In a character class, \g is just a literal "g". Outside a character      /* In a character class, \g is just a literal "g". Outside a character
# Line 828  else Line 860  else
860      treated as a data character. */      treated as a data character. */
861    
862      case CHAR_x:      case CHAR_x:
863        if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
864          {
865          /* In JavaScript, \x must be followed by two hexadecimal numbers.
866          Otherwise it is a lowercase x letter. */
867          if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0)
868            {
869            c = 0;
870            for (i = 0; i < 2; ++i)
871              {
872              register int cc = *(++ptr);
873    #ifndef EBCDIC  /* ASCII/UTF-8 coding */
874              if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
875              c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
876    #else           /* EBCDIC coding */
877              if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
878              c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
879    #endif
880              }
881            }
882          break;
883          }
884    
885      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
886        {        {
887        const uschar *pt = ptr + 2;        const uschar *pt = ptr + 2;
# Line 1475  Arguments: Line 1529  Arguments:
1529    
1530  Returns:   the fixed length,  Returns:   the fixed length,
1531               or -1 if there is no fixed length,               or -1 if there is no fixed length,
1532               or -2 if \C was encountered               or -2 if \C was encountered (in UTF-8 mode only)
1533               or -3 if an OP_RECURSE item was encountered and atend is FALSE               or -3 if an OP_RECURSE item was encountered and atend is FALSE
1534                 or -4 if an unknown opcode was encountered (internal error)
1535  */  */
1536    
1537  static int  static int
# Line 1500  for (;;) Line 1555  for (;;)
1555      /* We only need to continue for OP_CBRA (normal capturing bracket) and      /* We only need to continue for OP_CBRA (normal capturing bracket) and
1556      OP_BRA (normal non-capturing bracket) because the other variants of these      OP_BRA (normal non-capturing bracket) because the other variants of these
1557      opcodes are all concerned with unlimited repeated groups, which of course      opcodes are all concerned with unlimited repeated groups, which of course
1558      are not of fixed length. They will cause a -1 response from the default      are not of fixed length. */
     case of this switch. */  
1559    
1560      case OP_CBRA:      case OP_CBRA:
1561      case OP_BRA:      case OP_BRA:
1562      case OP_ONCE:      case OP_ONCE:
1563        case OP_ONCE_NC:
1564      case OP_COND:      case OP_COND:
1565      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);
1566      if (d < 0) return d;      if (d < 0) return d;
# Line 1514  for (;;) Line 1569  for (;;)
1569      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1570      break;      break;
1571    
1572      /* Reached end of a branch; if it's a ket it is the end of a nested      /* Reached end of a branch; if it's a ket it is the end of a nested call.
1573      call. If it's ALT it is an alternation in a nested call. If it is      If it's ALT it is an alternation in a nested call. An ACCEPT is effectively
1574      END it's the end of the outer call. All can be handled by the same code.      an ALT. If it is END it's the end of the outer call. All can be handled by
1575      Note that we must not include the OP_KETRxxx opcodes here, because they      the same code. Note that we must not include the OP_KETRxxx opcodes here,
1576      all imply an unlimited repeat. */      because they all imply an unlimited repeat. */
1577    
1578      case OP_ALT:      case OP_ALT:
1579      case OP_KET:      case OP_KET:
1580      case OP_END:      case OP_END:
1581        case OP_ACCEPT:
1582        case OP_ASSERT_ACCEPT:
1583      if (length < 0) length = branchlength;      if (length < 0) length = branchlength;
1584        else if (length != branchlength) return -1;        else if (length != branchlength) return -1;
1585      if (*cc != OP_ALT) return length;      if (*cc != OP_ALT) return length;
# Line 1556  for (;;) Line 1613  for (;;)
1613    
1614      /* Skip over things that don't match chars */      /* Skip over things that don't match chars */
1615    
1616      case OP_REVERSE:      case OP_MARK:
1617      case OP_CREF:      case OP_PRUNE_ARG:
1618      case OP_NCREF:      case OP_SKIP_ARG:
1619      case OP_RREF:      case OP_THEN_ARG:
1620      case OP_NRREF:      cc += cc[1] + _pcre_OP_lengths[*cc];
1621      case OP_DEF:      break;
1622    
1623      case OP_CALLOUT:      case OP_CALLOUT:
     case OP_SOD:  
     case OP_SOM:  
     case OP_SET_SOM:  
     case OP_EOD:  
     case OP_EODN:  
1624      case OP_CIRC:      case OP_CIRC:
1625      case OP_CIRCM:      case OP_CIRCM:
1626        case OP_CLOSE:
1627        case OP_COMMIT:
1628        case OP_CREF:
1629        case OP_DEF:
1630      case OP_DOLL:      case OP_DOLL:
1631      case OP_DOLLM:      case OP_DOLLM:
1632        case OP_EOD:
1633        case OP_EODN:
1634        case OP_FAIL:
1635        case OP_NCREF:
1636        case OP_NRREF:
1637      case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
1638        case OP_PRUNE:
1639        case OP_REVERSE:
1640        case OP_RREF:
1641        case OP_SET_SOM:
1642        case OP_SKIP:
1643        case OP_SOD:
1644        case OP_SOM:
1645        case OP_THEN:
1646      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
1647      cc += _pcre_OP_lengths[*cc];      cc += _pcre_OP_lengths[*cc];
1648      break;      break;
# Line 1594  for (;;) Line 1664  for (;;)
1664      need to skip over a multibyte character in UTF8 mode.  */      need to skip over a multibyte character in UTF8 mode.  */
1665    
1666      case OP_EXACT:      case OP_EXACT:
1667        case OP_EXACTI:
1668        case OP_NOTEXACT:
1669        case OP_NOTEXACTI:
1670      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1671      cc += 4;      cc += 4;
1672  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1614  for (;;) Line 1687  for (;;)
1687      cc += 2;      cc += 2;
1688      /* Fall through */      /* Fall through */
1689    
1690        case OP_HSPACE:
1691        case OP_VSPACE:
1692        case OP_NOT_HSPACE:
1693        case OP_NOT_VSPACE:
1694      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1695      case OP_DIGIT:      case OP_DIGIT:
1696      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
# Line 1626  for (;;) Line 1703  for (;;)
1703      cc++;      cc++;
1704      break;      break;
1705    
1706      /* The single-byte matcher isn't allowed */      /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode;
1707        otherwise \C is coded as OP_ALLANY. */
1708    
1709      case OP_ANYBYTE:      case OP_ANYBYTE:
1710      return -2;      return -2;
# Line 1645  for (;;) Line 1723  for (;;)
1723    
1724      switch (*cc)      switch (*cc)
1725        {        {
1726          case OP_CRPLUS:
1727          case OP_CRMINPLUS:
1728        case OP_CRSTAR:        case OP_CRSTAR:
1729        case OP_CRMINSTAR:        case OP_CRMINSTAR:
1730        case OP_CRQUERY:        case OP_CRQUERY:
# Line 1665  for (;;) Line 1745  for (;;)
1745    
1746      /* Anything else is variable length */      /* Anything else is variable length */
1747    
1748      default:      case OP_ANYNL:
1749        case OP_BRAMINZERO:
1750        case OP_BRAPOS:
1751        case OP_BRAPOSZERO:
1752        case OP_BRAZERO:
1753        case OP_CBRAPOS:
1754        case OP_EXTUNI:
1755        case OP_KETRMAX:
1756        case OP_KETRMIN:
1757        case OP_KETRPOS:
1758        case OP_MINPLUS:
1759        case OP_MINPLUSI:
1760        case OP_MINQUERY:
1761        case OP_MINQUERYI:
1762        case OP_MINSTAR:
1763        case OP_MINSTARI:
1764        case OP_MINUPTO:
1765        case OP_MINUPTOI:
1766        case OP_NOTMINPLUS:
1767        case OP_NOTMINPLUSI:
1768        case OP_NOTMINQUERY:
1769        case OP_NOTMINQUERYI:
1770        case OP_NOTMINSTAR:
1771        case OP_NOTMINSTARI:
1772        case OP_NOTMINUPTO:
1773        case OP_NOTMINUPTOI:
1774        case OP_NOTPLUS:
1775        case OP_NOTPLUSI:
1776        case OP_NOTPOSPLUS:
1777        case OP_NOTPOSPLUSI:
1778        case OP_NOTPOSQUERY:
1779        case OP_NOTPOSQUERYI:
1780        case OP_NOTPOSSTAR:
1781        case OP_NOTPOSSTARI:
1782        case OP_NOTPOSUPTO:
1783        case OP_NOTPOSUPTOI:
1784        case OP_NOTQUERY:
1785        case OP_NOTQUERYI:
1786        case OP_NOTSTAR:
1787        case OP_NOTSTARI:
1788        case OP_NOTUPTO:
1789        case OP_NOTUPTOI:
1790        case OP_PLUS:
1791        case OP_PLUSI:
1792        case OP_POSPLUS:
1793        case OP_POSPLUSI:
1794        case OP_POSQUERY:
1795        case OP_POSQUERYI:
1796        case OP_POSSTAR:
1797        case OP_POSSTARI:
1798        case OP_POSUPTO:
1799        case OP_POSUPTOI:
1800        case OP_QUERY:
1801        case OP_QUERYI:
1802        case OP_REF:
1803        case OP_REFI:
1804        case OP_SBRA:
1805        case OP_SBRAPOS:
1806        case OP_SCBRA:
1807        case OP_SCBRAPOS:
1808        case OP_SCOND:
1809        case OP_SKIPZERO:
1810        case OP_STAR:
1811        case OP_STARI:
1812        case OP_TYPEMINPLUS:
1813        case OP_TYPEMINQUERY:
1814        case OP_TYPEMINSTAR:
1815        case OP_TYPEMINUPTO:
1816        case OP_TYPEPLUS:
1817        case OP_TYPEPOSPLUS:
1818        case OP_TYPEPOSQUERY:
1819        case OP_TYPEPOSSTAR:
1820        case OP_TYPEPOSUPTO:
1821        case OP_TYPEQUERY:
1822        case OP_TYPESTAR:
1823        case OP_TYPEUPTO:
1824        case OP_UPTO:
1825        case OP_UPTOI:
1826      return -1;      return -1;
1827    
1828        /* Catch unrecognized opcodes so that when new ones are added they
1829        are not forgotten, as has happened in the past. */
1830    
1831        default:
1832        return -4;
1833      }      }
1834    }    }
1835  /* Control never gets here */  /* Control never gets here */
# Line 2045  for (code = first_significant_code(code Line 2208  for (code = first_significant_code(code
2208    
2209    if (c == OP_BRA  || c == OP_BRAPOS ||    if (c == OP_BRA  || c == OP_BRAPOS ||
2210        c == OP_CBRA || c == OP_CBRAPOS ||        c == OP_CBRA || c == OP_CBRAPOS ||
2211        c == OP_ONCE || c == OP_COND)        c == OP_ONCE || c == OP_ONCE_NC ||
2212          c == OP_COND)
2213      {      {
2214      BOOL empty_branch;      BOOL empty_branch;
2215      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
# Line 3142  for (;; ptr++) Line 3306  for (;; ptr++)
3306    int subfirstbyte;    int subfirstbyte;
3307    int terminator;    int terminator;
3308    int mclength;    int mclength;
3309      int tempbracount;
3310    uschar mcbuffer[8];    uschar mcbuffer[8];
3311    
3312    /* Get next byte in the pattern */    /* Get next byte in the pattern */
# Line 3189  for (;; ptr++) Line 3354  for (;; ptr++)
3354        }        }
3355    
3356      *lengthptr += (int)(code - last_code);      *lengthptr += (int)(code - last_code);
3357      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, (int)(code - last_code),
3358          c));
3359    
3360      /* If "previous" is set and it is not at the start of the work space, move      /* If "previous" is set and it is not at the start of the work space, move
3361      it back to there, in order to avoid filling up the work space. Otherwise,      it back to there, in order to avoid filling up the work space. Otherwise,
# Line 3605  for (;; ptr++) Line 3771  for (;; ptr++)
3771          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
3772    
3773          if (-c == ESC_b) c = CHAR_BS;    /* \b is backspace in a class */          if (-c == ESC_b) c = CHAR_BS;    /* \b is backspace in a class */
3774            else if (-c == ESC_N)            /* \N is not supported in a class */
3775              {
3776              *errorcodeptr = ERR71;
3777              goto FAILED;
3778              }
3779          else if (-c == ESC_Q)            /* Handle start of quoted string */          else if (-c == ESC_Q)            /* Handle start of quoted string */
3780            {            {
3781            if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)            if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
# Line 4261  for (;; ptr++) Line 4432  for (;; ptr++)
4432      past, but it no longer happens for non-repeated recursions. In fact, the      past, but it no longer happens for non-repeated recursions. In fact, the
4433      repeated ones could be re-implemented independently so as not to need this,      repeated ones could be re-implemented independently so as not to need this,
4434      but for the moment we rely on the code for repeating groups. */      but for the moment we rely on the code for repeating groups. */
4435    
4436      if (*previous == OP_RECURSE)      if (*previous == OP_RECURSE)
4437        {        {
4438        memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);        memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);
# Line 4819  for (;; ptr++) Line 4990  for (;; ptr++)
4990        ONCE brackets can be converted into non-capturing brackets, as the        ONCE brackets can be converted into non-capturing brackets, as the
4991        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
4992        deal with possessive ONCEs specially.        deal with possessive ONCEs specially.
4993    
4994        Otherwise, if the quantifier was possessive, we convert the BRA code to        Otherwise, when we are doing the actual compile phase, check to see
4995        the POS form, and the KET code to KETRPOS. (It turns out to be convenient        whether this group is one that could match an empty string. If so,
4996        at runtime to detect this kind of subpattern at both the start and at the        convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
4997        end.) The use of special opcodes makes it possible to reduce greatly the        that runtime checking can be done. [This check is also applied to ONCE
4998        stack usage in pcre_exec(). If the group is preceded by OP_BRAZERO,        groups at runtime, but in a different way.]
4999        convert this to OP_BRAPOSZERO. Then cancel the possessive flag so that  
5000        the default action below, of wrapping everything inside atomic brackets,        Then, if the quantifier was possessive and the bracket is not a
5001        does not happen.        conditional, we convert the BRA code to the POS form, and the KET code to
5002          KETRPOS. (It turns out to be convenient at runtime to detect this kind of
5003        Then, when we are doing the actual compile phase, check to see whether        subpattern at both the start and at the end.) The use of special opcodes
5004        this group is one that could match an empty string. If so, convert the        makes it possible to reduce greatly the stack usage in pcre_exec(). If
5005        initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so that runtime        the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
5006        checking can be done. [This check is also applied to ONCE groups at  
5007        runtime, but in a different way.] */        Then, if the minimum number of matches is 1 or 0, cancel the possessive
5008          flag so that the default action below, of wrapping everything inside
5009          atomic brackets, does not happen. When the minimum is greater than 1,
5010          there will be earlier copies of the group, and so we still have to wrap
5011          the whole thing. */
5012    
5013        else        else
5014          {          {
5015          uschar *ketcode = code - 1 - LINK_SIZE;          uschar *ketcode = code - 1 - LINK_SIZE;
5016          uschar *bracode = ketcode - GET(ketcode, 1);          uschar *bracode = ketcode - GET(ketcode, 1);
5017    
5018          if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA;          /* Convert possessive ONCE brackets to non-capturing */
5019          if (*bracode == OP_ONCE)  
5020            if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
5021                possessive_quantifier) *bracode = OP_BRA;
5022    
5023            /* For non-possessive ONCE brackets, all we need to do is to
5024            set the KET. */
5025    
5026            if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
5027            *ketcode = OP_KETRMAX + repeat_type;            *ketcode = OP_KETRMAX + repeat_type;
5028    
5029            /* Handle non-ONCE brackets and possessive ONCEs (which have been
5030            converted to non-capturing above). */
5031    
5032          else          else
5033            {            {
5034            if (possessive_quantifier)            /* In the compile phase, check for empty string matching. */
5035              {  
             *bracode += 1;                   /* Switch to xxxPOS opcodes */  
             *ketcode = OP_KETRPOS;  
             if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;  
             possessive_quantifier = FALSE;  
             }  
           else *ketcode = OP_KETRMAX + repeat_type;  
   
5036            if (lengthptr == NULL)            if (lengthptr == NULL)
5037              {              {
5038              uschar *scode = bracode;              uschar *scode = bracode;
# Line 4868  for (;; ptr++) Line 5047  for (;; ptr++)
5047                }                }
5048              while (*scode == OP_ALT);              while (*scode == OP_ALT);
5049              }              }
5050    
5051              /* Handle possessive quantifiers. */
5052    
5053              if (possessive_quantifier)
5054                {
5055                /* For COND brackets, we wrap the whole thing in a possessively
5056                repeated non-capturing bracket, because we have not invented POS
5057                versions of the COND opcodes. Because we are moving code along, we
5058                must ensure that any pending recursive references are updated. */
5059    
5060                if (*bracode == OP_COND || *bracode == OP_SCOND)
5061                  {
5062                  int nlen = (int)(code - bracode);
5063                  *code = OP_END;
5064                  adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm);
5065                  memmove(bracode + 1+LINK_SIZE, bracode, nlen);
5066                  code += 1 + LINK_SIZE;
5067                  nlen += 1 + LINK_SIZE;
5068                  *bracode = OP_BRAPOS;
5069                  *code++ = OP_KETRPOS;
5070                  PUTINC(code, 0, nlen);
5071                  PUT(bracode, 1, nlen);
5072                  }
5073    
5074                /* For non-COND brackets, we modify the BRA code and use KETRPOS. */
5075    
5076                else
5077                  {
5078                  *bracode += 1;              /* Switch to xxxPOS opcodes */
5079                  *ketcode = OP_KETRPOS;
5080                  }
5081    
5082                /* If the minimum is zero, mark it as possessive, then unset the
5083                possessive flag when the minimum is 0 or 1. */
5084    
5085                if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
5086                if (repeat_min < 2) possessive_quantifier = FALSE;
5087                }
5088    
5089              /* Non-possessive quantifier */
5090    
5091              else *ketcode = OP_KETRMAX + repeat_type;
5092            }            }
5093          }          }
5094        }        }
# Line 4894  for (;; ptr++) Line 5115  for (;; ptr++)
5115      notation is just syntactic sugar, taken from Sun's Java package, but the      notation is just syntactic sugar, taken from Sun's Java package, but the
5116      special opcodes can optimize it.      special opcodes can optimize it.
5117    
5118      Possessively repeated subpatterns have already been handled in the code      Some (but not all) possessively repeated subpatterns have already been
5119      just above, so possessive_quantifier is always FALSE for them at this      completely handled in the code just above. For them, possessive_quantifier
5120      stage.      is always FALSE at this stage.
5121    
5122      Note that the repeated item starts at tempcode, not at previous, which      Note that the repeated item starts at tempcode, not at previous, which
5123      might be the first part of a string whose (former) last char we repeated.      might be the first part of a string whose (former) last char we repeated.
# Line 5386  for (;; ptr++) Line 5607  for (;; ptr++)
5607    
5608          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
5609          case CHAR_C:                 /* Callout - may be followed by digits; */          case CHAR_C:                 /* Callout - may be followed by digits; */
5610          previous_callout = code;  /* Save for later completion */          previous_callout = code;     /* Save for later completion */
5611          after_manual_callout = 1; /* Skip one item before completing */          after_manual_callout = 1;    /* Skip one item before completing */
5612          *code++ = OP_CALLOUT;          *code++ = OP_CALLOUT;
5613            {            {
5614            int n = 0;            int n = 0;
# Line 5906  for (;; ptr++) Line 6127  for (;; ptr++)
6127      *code = bravalue;      *code = bravalue;
6128      tempcode = code;      tempcode = code;
6129      tempreqvary = cd->req_varyopt;        /* Save value before bracket */      tempreqvary = cd->req_varyopt;        /* Save value before bracket */
6130        tempbracount = cd->bracount;          /* Save value before bracket */
6131      length_prevgroup = 0;                 /* Initialize for pre-compile phase */      length_prevgroup = 0;                 /* Initialize for pre-compile phase */
6132    
6133      if (!compile_regex(      if (!compile_regex(
# Line 5928  for (;; ptr++) Line 6150  for (;; ptr++)
6150           ))           ))
6151        goto FAILED;        goto FAILED;
6152    
6153        /* If this was an atomic group and there are no capturing groups within it,
6154        generate OP_ONCE_NC instead of OP_ONCE. */
6155    
6156        if (bravalue == OP_ONCE && cd->bracount <= tempbracount)
6157          *code = OP_ONCE_NC;
6158    
6159      if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)      if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
6160        cd->assert_depth -= 1;        cd->assert_depth -= 1;
6161    
# Line 6257  for (;; ptr++) Line 6485  for (;; ptr++)
6485            }            }
6486          else          else
6487  #endif  #endif
6488            {          /* In non-UTF-8 mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE
6489            so that it works in DFA mode and in lookbehinds. */
6490    
6491              {
6492            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
6493            *code++ = -c;            *code++ = (!utf8 && c == -ESC_C)? OP_ALLANY : -c;
6494            }            }
6495          }          }
6496        continue;        continue;
# Line 6551  for (;;) Line 6782  for (;;)
6782          }          }
6783        else if (fixed_length < 0)        else if (fixed_length < 0)
6784          {          {
6785          *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;          *errorcodeptr = (fixed_length == -2)? ERR36 :
6786                            (fixed_length == -4)? ERR70: ERR25;
6787          *ptrptr = ptr;          *ptrptr = ptr;
6788          return FALSE;          return FALSE;
6789          }          }
# Line 6726  do { Line 6958  do {
6958    
6959     /* Other brackets */     /* Other brackets */
6960    
6961     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC ||
6962                op == OP_COND)
6963       {       {
6964       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;
6965       }       }
# Line 6830  do { Line 7063  do {
7063    
7064     /* Other brackets */     /* Other brackets */
7065    
7066     else if (op == OP_ASSERT || op == OP_ONCE)     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC)
7067       {       {
7068       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
7069       }       }
# Line 6900  do { Line 7133  do {
7133       case OP_SCBRAPOS:       case OP_SCBRAPOS:
7134       case OP_ASSERT:       case OP_ASSERT:
7135       case OP_ONCE:       case OP_ONCE:
7136         case OP_ONCE_NC:
7137       case OP_COND:       case OP_COND:
7138       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)
7139         return -1;         return -1;
# Line 7348  if (cd->check_lookbehind) Line 7582  if (cd->check_lookbehind)
7582        DPRINTF(("fixed length = %d\n", fixed_length));        DPRINTF(("fixed length = %d\n", fixed_length));
7583        if (fixed_length < 0)        if (fixed_length < 0)
7584          {          {
7585          errorcode = (fixed_length == -2)? ERR36 : ERR25;          errorcode = (fixed_length == -2)? ERR36 :
7586                        (fixed_length == -4)? ERR70 : ERR25;
7587          break;          break;
7588          }          }
7589        PUT(cc, 1, fixed_length);        PUT(cc, 1, fixed_length);

Legend:
Removed from v.721  
changed lines
  Added in v.758

  ViewVC Help
Powered by ViewVC 1.1.5