/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 459 by ph10, Sun Oct 4 09:21:39 2009 UTC revision 487 by ph10, Wed Jan 6 10:26:55 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2009 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 53  supporting internal functions that are n Line 53  supporting internal functions that are n
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    
56  /* When DEBUG is defined, we need the pcre_printint() function, which is also  /* When PCRE_DEBUG is defined, we need the pcre_printint() function, which is
57  used by pcretest. DEBUG is not defined when building a production library. */  also used by pcretest. PCRE_DEBUG is not defined when building a production
58    library. */
59    
60  #ifdef DEBUG  #ifdef PCRE_DEBUG
61  #include "pcre_printint.src"  #include "pcre_printint.src"
62  #endif  #endif
63    
# Line 343  static const char error_texts[] = Line 344  static const char error_texts[] =
344    "digit expected after (?+\0"    "digit expected after (?+\0"
345    "] is an invalid data character in JavaScript compatibility mode\0"    "] is an invalid data character in JavaScript compatibility mode\0"
346    /* 65 */    /* 65 */
347    "different names for subpatterns of the same number are not allowed";    "different names for subpatterns of the same number are not allowed";
348    
349    
350  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 1102  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1103  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1103        if (name != NULL && lorn == ptr - thisname &&        if (name != NULL && lorn == ptr - thisname &&
1104            strncmp((const char *)name, (const char *)thisname, lorn) == 0)            strncmp((const char *)name, (const char *)thisname, lorn) == 0)
1105          return *count;          return *count;
1106        term++;        term++;
1107        }        }
1108      }      }
1109    }    }
# Line 1148  for (; *ptr != 0; ptr++) Line 1149  for (; *ptr != 0; ptr++)
1149            break;            break;
1150          }          }
1151        else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT)        else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
1152          {          {
1153          negate_class = TRUE;          negate_class = TRUE;
1154          ptr++;          ptr++;
1155          }          }
1156        else break;        else break;
1157        }        }
1158    
# Line 1340  for (;;) Line 1341  for (;;)
1341    
1342  /* Scan a branch and compute the fixed length of subject that will match it,  /* Scan a branch and compute the fixed length of subject that will match it,
1343  if the length is fixed. This is needed for dealing with backward assertions.  if the length is fixed. This is needed for dealing with backward assertions.
1344  In UTF8 mode, the result is in characters rather than bytes. The branch is  In UTF8 mode, the result is in characters rather than bytes. The branch is
1345  temporarily terminated with OP_END when this function is called.  temporarily terminated with OP_END when this function is called.
1346    
1347  This function is called when a backward assertion is encountered, so that if it  This function is called when a backward assertion is encountered, so that if it
1348  fails, the error message can point to the correct place in the pattern.  fails, the error message can point to the correct place in the pattern.
1349  However, we cannot do this when the assertion contains subroutine calls,  However, we cannot do this when the assertion contains subroutine calls,
1350  because they can be forward references. We solve this by remembering this case  because they can be forward references. We solve this by remembering this case
1351  and doing the check at the end; a flag specifies which mode we are running in.  and doing the check at the end; a flag specifies which mode we are running in.
1352    
1353  Arguments:  Arguments:
1354    code     points to the start of the pattern (the bracket)    code     points to the start of the pattern (the bracket)
1355    options  the compiling options    options  the compiling options
1356    atend    TRUE if called when the pattern is complete    atend    TRUE if called when the pattern is complete
1357    cd       the "compile data" structure    cd       the "compile data" structure
1358    
1359  Returns:   the fixed length,  Returns:   the fixed length,
1360               or -1 if there is no fixed length,               or -1 if there is no fixed length,
1361               or -2 if \C was encountered               or -2 if \C was encountered
1362               or -3 if an OP_RECURSE item was encountered and atend is FALSE               or -3 if an OP_RECURSE item was encountered and atend is FALSE
# Line 1405  for (;;) Line 1406  for (;;)
1406      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1407      branchlength = 0;      branchlength = 0;
1408      break;      break;
1409    
1410      /* A true recursion implies not fixed length, but a subroutine call may      /* A true recursion implies not fixed length, but a subroutine call may
1411      be OK. If the subroutine is a forward reference, we can't deal with      be OK. If the subroutine is a forward reference, we can't deal with
1412      it until the end of the pattern, so return -3. */      it until the end of the pattern, so return -3. */
1413    
1414      case OP_RECURSE:      case OP_RECURSE:
1415      if (!atend) return -3;      if (!atend) return -3;
1416      cs = ce = (uschar *)cd->start_code + GET(cc, 1);  /* Start subpattern */      cs = ce = (uschar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
1417      do ce += GET(ce, 1); while (*ce == OP_ALT);       /* End subpattern */      do ce += GET(ce, 1); while (*ce == OP_ALT);       /* End subpattern */
1418      if (cc > cs && cc < ce) return -1;                /* Recursion */      if (cc > cs && cc < ce) return -1;                /* Recursion */
1419      d = find_fixedlength(cs + 2, options, atend, cd);      d = find_fixedlength(cs + 2, options, atend, cd);
1420      if (d < 0) return d;      if (d < 0) return d;
1421      branchlength += d;      branchlength += d;
1422      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1423      break;      break;
1424    
1425      /* Skip over assertive subpatterns */      /* Skip over assertive subpatterns */
1426    
# Line 1459  for (;;) Line 1460  for (;;)
1460      branchlength++;      branchlength++;
1461      cc += 2;      cc += 2;
1462  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1463      if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)      if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
1464        cc += _pcre_utf8_table4[cc[-1] & 0x3f];        cc += _pcre_utf8_table4[cc[-1] & 0x3f];
1465  #endif  #endif
1466      break;      break;
# Line 1471  for (;;) Line 1472  for (;;)
1472      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1473      cc += 4;      cc += 4;
1474  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1475      if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)      if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
1476        cc += _pcre_utf8_table4[cc[-1] & 0x3f];        cc += _pcre_utf8_table4[cc[-1] & 0x3f];
1477  #endif  #endif
1478      break;      break;
# Line 1556  for (;;) Line 1557  for (;;)
1557    
1558  /* This little function scans through a compiled pattern until it finds a  /* This little function scans through a compiled pattern until it finds a
1559  capturing bracket with the given number, or, if the number is negative, an  capturing bracket with the given number, or, if the number is negative, an
1560  instance of OP_REVERSE for a lookbehind. The function is global in the C sense  instance of OP_REVERSE for a lookbehind. The function is global in the C sense
1561  so that it can be called from pcre_study() when finding the minimum matching  so that it can be called from pcre_study() when finding the minimum matching
1562  length.  length.
1563    
1564  Arguments:  Arguments:
# Line 1581  for (;;) Line 1582  for (;;)
1582    the table is zero; the actual length is stored in the compiled code. */    the table is zero; the actual length is stored in the compiled code. */
1583    
1584    if (c == OP_XCLASS) code += GET(code, 1);    if (c == OP_XCLASS) code += GET(code, 1);
1585    
1586    /* Handle recursion */    /* Handle recursion */
1587    
1588    else if (c == OP_REVERSE)    else if (c == OP_REVERSE)
1589      {      {
1590      if (number < 0) return (uschar *)code;      if (number < 0) return (uschar *)code;
1591      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1592      }      }
1593    
# Line 1957  for (code = first_significant_code(code Line 1958  for (code = first_significant_code(code
1958      case OP_POSQUERY:      case OP_POSQUERY:
1959      if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];      if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];
1960      break;      break;
1961    
1962      case OP_UPTO:      case OP_UPTO:
1963      case OP_MINUPTO:      case OP_MINUPTO:
1964      case OP_POSUPTO:      case OP_POSUPTO:
# Line 1994  static BOOL Line 1995  static BOOL
1995  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
1996    BOOL utf8)    BOOL utf8)
1997  {  {
1998  while (bcptr != NULL && bcptr->current >= code)  while (bcptr != NULL && bcptr->current_branch >= code)
1999    {    {
2000    if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE;    if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8))
2001        return FALSE;
2002    bcptr = bcptr->outer;    bcptr = bcptr->outer;
2003    }    }
2004  return TRUE;  return TRUE;
# Line 2658  BOOL utf8 = FALSE; Line 2660  BOOL utf8 = FALSE;
2660  uschar *utf8_char = NULL;  uschar *utf8_char = NULL;
2661  #endif  #endif
2662    
2663  #ifdef DEBUG  #ifdef PCRE_DEBUG
2664  if (lengthptr != NULL) DPRINTF((">> start branch\n"));  if (lengthptr != NULL) DPRINTF((">> start branch\n"));
2665  #endif  #endif
2666    
# Line 2717  for (;; ptr++) Line 2719  for (;; ptr++)
2719    
2720    if (lengthptr != NULL)    if (lengthptr != NULL)
2721      {      {
2722  #ifdef DEBUG  #ifdef PCRE_DEBUG
2723      if (code > cd->hwm) cd->hwm = code;                 /* High water info */      if (code > cd->hwm) cd->hwm = code;                 /* High water info */
2724  #endif  #endif
2725      if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */      if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */
# Line 3915  we set the flag only if there is a liter Line 3917  we set the flag only if there is a liter
3917    
3918        if (repeat_max == 0) goto END_REPEAT;        if (repeat_max == 0) goto END_REPEAT;
3919    
3920        /*--------------------------------------------------------------------*/        /*--------------------------------------------------------------------*/
3921        /* This code is obsolete from release 8.00; the restriction was finally        /* This code is obsolete from release 8.00; the restriction was finally
3922        removed: */        removed: */
3923    
3924        /* All real repeats make it impossible to handle partial matching (maybe        /* All real repeats make it impossible to handle partial matching (maybe
3925        one day we will be able to remove this restriction). */        one day we will be able to remove this restriction). */
3926    
3927        /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */        /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
3928        /*--------------------------------------------------------------------*/        /*--------------------------------------------------------------------*/
3929    
3930        /* Combine the op_type with the repeat_type */        /* Combine the op_type with the repeat_type */
3931    
# Line 4070  we set the flag only if there is a liter Line 4072  we set the flag only if there is a liter
4072          goto END_REPEAT;          goto END_REPEAT;
4073          }          }
4074    
4075        /*--------------------------------------------------------------------*/        /*--------------------------------------------------------------------*/
4076        /* This code is obsolete from release 8.00; the restriction was finally        /* This code is obsolete from release 8.00; the restriction was finally
4077        removed: */        removed: */
4078    
# Line 4078  we set the flag only if there is a liter Line 4080  we set the flag only if there is a liter
4080        one day we will be able to remove this restriction). */        one day we will be able to remove this restriction). */
4081    
4082        /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */        /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
4083        /*--------------------------------------------------------------------*/        /*--------------------------------------------------------------------*/
4084    
4085        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
4086          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 4213  we set the flag only if there is a liter Line 4215  we set the flag only if there is a liter
4215            {            {
4216            /* In the pre-compile phase, we don't actually do the replication. We            /* In the pre-compile phase, we don't actually do the replication. We
4217            just adjust the length as if we had. Do some paranoid checks for            just adjust the length as if we had. Do some paranoid checks for
4218            potential integer overflow. */            potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
4219              integer type when available, otherwise double. */
4220    
4221            if (lengthptr != NULL)            if (lengthptr != NULL)
4222              {              {
4223              int delta = (repeat_min - 1)*length_prevgroup;              int delta = (repeat_min - 1)*length_prevgroup;
4224              if ((double)(repeat_min - 1)*(double)length_prevgroup >              if ((INT64_OR_DOUBLE)(repeat_min - 1)*
4225                                                              (double)INT_MAX ||                    (INT64_OR_DOUBLE)length_prevgroup >
4226                        (INT64_OR_DOUBLE)INT_MAX ||
4227                  OFLOW_MAX - *lengthptr < delta)                  OFLOW_MAX - *lengthptr < delta)
4228                {                {
4229                *errorcodeptr = ERR20;                *errorcodeptr = ERR20;
# Line 4265  we set the flag only if there is a liter Line 4269  we set the flag only if there is a liter
4269          just adjust the length as if we had. For each repetition we must add 1          just adjust the length as if we had. For each repetition we must add 1
4270          to the length for BRAZERO and for all but the last repetition we must          to the length for BRAZERO and for all but the last repetition we must
4271          add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some          add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
4272          paranoid checks to avoid integer overflow. */          paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is
4273            a 64-bit integer type when available, otherwise double. */
4274    
4275          if (lengthptr != NULL && repeat_max > 0)          if (lengthptr != NULL && repeat_max > 0)
4276            {            {
4277            int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -            int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
4278                        2 - 2*LINK_SIZE;   /* Last one doesn't nest */                        2 - 2*LINK_SIZE;   /* Last one doesn't nest */
4279            if ((double)repeat_max *            if ((INT64_OR_DOUBLE)repeat_max *
4280                  (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)                  (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
4281                    > (double)INT_MAX ||                    > (INT64_OR_DOUBLE)INT_MAX ||
4282                OFLOW_MAX - *lengthptr < delta)                OFLOW_MAX - *lengthptr < delta)
4283              {              {
4284              *errorcodeptr = ERR20;              *errorcodeptr = ERR20;
# Line 4393  we set the flag only if there is a liter Line 4398  we set the flag only if there is a liter
4398      if (possessive_quantifier)      if (possessive_quantifier)
4399        {        {
4400        int len;        int len;
4401    
4402        if (*tempcode == OP_TYPEEXACT)        if (*tempcode == OP_TYPEEXACT)
4403          tempcode += _pcre_OP_lengths[*tempcode] +          tempcode += _pcre_OP_lengths[*tempcode] +
4404            ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);            ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);
4405    
4406        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
4407          {          {
4408          tempcode += _pcre_OP_lengths[*tempcode];          tempcode += _pcre_OP_lengths[*tempcode];
# Line 4405  we set the flag only if there is a liter Line 4410  we set the flag only if there is a liter
4410          if (utf8 && tempcode[-1] >= 0xc0)          if (utf8 && tempcode[-1] >= 0xc0)
4411            tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];            tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];
4412  #endif  #endif
4413          }          }
4414    
4415        len = code - tempcode;        len = code - tempcode;
4416        if (len > 0) switch (*tempcode)        if (len > 0) switch (*tempcode)
4417          {          {
# Line 4485  we set the flag only if there is a liter Line 4490  we set the flag only if there is a liter
4490              strncmp((char *)name, vn, namelen) == 0)              strncmp((char *)name, vn, namelen) == 0)
4491            {            {
4492            /* Check for open captures before ACCEPT */            /* Check for open captures before ACCEPT */
4493    
4494            if (verbs[i].op == OP_ACCEPT)            if (verbs[i].op == OP_ACCEPT)
4495              {              {
4496              open_capitem *oc;              open_capitem *oc;
4497              cd->had_accept = TRUE;              cd->had_accept = TRUE;
4498              for (oc = cd->open_caps; oc != NULL; oc = oc->next)              for (oc = cd->open_caps; oc != NULL; oc = oc->next)
4499                {                {
4500                *code++ = OP_CLOSE;                *code++ = OP_CLOSE;
4501                PUT2INC(code, 0, oc->number);                PUT2INC(code, 0, oc->number);
4502                }                }
4503              }              }
4504            *code++ = verbs[i].op;            *code++ = verbs[i].op;
4505            break;            break;
4506            }            }
# Line 4658  we set the flag only if there is a liter Line 4663  we set the flag only if there is a liter
4663            }            }
4664    
4665          /* Otherwise (did not start with "+" or "-"), start by looking for the          /* Otherwise (did not start with "+" or "-"), start by looking for the
4666          name. If we find a name, add one to the opcode to change OP_CREF or          name. If we find a name, add one to the opcode to change OP_CREF or
4667          OP_RREF into OP_NCREF or OP_NRREF. These behave exactly the same,          OP_RREF into OP_NCREF or OP_NRREF. These behave exactly the same,
4668          except they record that the reference was originally to a name. The          except they record that the reference was originally to a name. The
4669          information is used to check duplicate names. */          information is used to check duplicate names. */
4670    
4671          slot = cd->name_table;          slot = cd->name_table;
# Line 4887  we set the flag only if there is a liter Line 4892  we set the flag only if there is a liter
4892            is because the number of names, and hence the table size, is computed            is because the number of names, and hence the table size, is computed
4893            in the pre-compile, and it affects various numbers and pointers which            in the pre-compile, and it affects various numbers and pointers which
4894            would all have to be modified, and the compiled code moved down, if            would all have to be modified, and the compiled code moved down, if
4895            duplicates with the same number were omitted from the table. This            duplicates with the same number were omitted from the table. This
4896            doesn't seem worth the hassle. However, *different* names for the            doesn't seem worth the hassle. However, *different* names for the
4897            same number are not permitted. */            same number are not permitted. */
4898    
# Line 4895  we set the flag only if there is a liter Line 4900  we set the flag only if there is a liter
4900              {              {
4901              BOOL dupname = FALSE;              BOOL dupname = FALSE;
4902              slot = cd->name_table;              slot = cd->name_table;
4903    
4904              for (i = 0; i < cd->names_found; i++)              for (i = 0; i < cd->names_found; i++)
4905                {                {
4906                int crc = memcmp(name, slot+2, namelen);                int crc = memcmp(name, slot+2, namelen);
# Line 4909  we set the flag only if there is a liter Line 4914  we set the flag only if there is a liter
4914                      *errorcodeptr = ERR43;                      *errorcodeptr = ERR43;
4915                      goto FAILED;                      goto FAILED;
4916                      }                      }
4917                    else dupname = TRUE;                    else dupname = TRUE;
4918                    }                    }
4919                  else crc = -1;      /* Current name is a substring */                  else crc = -1;      /* Current name is a substring */
4920                  }                  }
4921    
4922                /* Make space in the table and break the loop for an earlier                /* Make space in the table and break the loop for an earlier
4923                name. For a duplicate or later name, carry on. We do this for                name. For a duplicate or later name, carry on. We do this for
4924                duplicates so that in the simple case (when ?(| is not used) they                duplicates so that in the simple case (when ?(| is not used) they
4925                are in order of their numbers. */                are in order of their numbers. */
4926    
4927                if (crc < 0)                if (crc < 0)
4928                  {                  {
4929                  memmove(slot + cd->name_entry_size, slot,                  memmove(slot + cd->name_entry_size, slot,
4930                    (cd->names_found - i) * cd->name_entry_size);                    (cd->names_found - i) * cd->name_entry_size);
4931                  break;                  break;
4932                  }                  }
4933    
4934                /* Continue the loop for a later or duplicate name */                /* Continue the loop for a later or duplicate name */
4935    
4936                slot += cd->name_entry_size;                slot += cd->name_entry_size;
4937                }                }
4938    
4939              /* For non-duplicate names, check for a duplicate number before              /* For non-duplicate names, check for a duplicate number before
4940              adding the new name. */              adding the new name. */
4941    
4942              if (!dupname)              if (!dupname)
4943                {                {
4944                uschar *cslot = cd->name_table;                uschar *cslot = cd->name_table;
# Line 4945  we set the flag only if there is a liter Line 4950  we set the flag only if there is a liter
4950                      {                      {
4951                      *errorcodeptr = ERR65;                      *errorcodeptr = ERR65;
4952                      goto FAILED;                      goto FAILED;
4953                      }                      }
4954                    }                    }
4955                  else i--;                  else i--;
4956                  cslot += cd->name_entry_size;                  cslot += cd->name_entry_size;
4957                  }                  }
4958                }                }
4959    
4960              PUT2(slot, 0, cd->bracount + 1);              PUT2(slot, 0, cd->bracount + 1);
4961              memcpy(slot + 2, name, namelen);              memcpy(slot + 2, name, namelen);
# Line 5131  we set the flag only if there is a liter Line 5136  we set the flag only if there is a liter
5136            if (lengthptr == NULL)            if (lengthptr == NULL)
5137              {              {
5138              *code = OP_END;              *code = OP_END;
5139              if (recno != 0)              if (recno != 0)
5140                called = _pcre_find_bracket(cd->start_code, utf8, recno);                called = _pcre_find_bracket(cd->start_code, utf8, recno);
5141    
5142              /* Forward reference */              /* Forward reference */
# Line 5248  we set the flag only if there is a liter Line 5253  we set the flag only if there is a liter
5253              {              {
5254              cd->external_options = newoptions;              cd->external_options = newoptions;
5255              }              }
5256           else            else
5257              {              {
5258              if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))              if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))
5259                {                {
# Line 5783  int branchfirstbyte, branchreqbyte; Line 5788  int branchfirstbyte, branchreqbyte;
5788  int length;  int length;
5789  int orig_bracount;  int orig_bracount;
5790  int max_bracount;  int max_bracount;
5791    int old_external_options = cd->external_options;
5792  branch_chain bc;  branch_chain bc;
5793    
5794  bc.outer = bcptr;  bc.outer = bcptr;
5795  bc.current = code;  bc.current_branch = code;
5796    
5797  firstbyte = reqbyte = REQ_UNSET;  firstbyte = reqbyte = REQ_UNSET;
5798    
# Line 5812  if (*code == OP_CBRA) Line 5818  if (*code == OP_CBRA)
5818    capnumber = GET2(code, 1 + LINK_SIZE);    capnumber = GET2(code, 1 + LINK_SIZE);
5819    capitem.number = capnumber;    capitem.number = capnumber;
5820    capitem.next = cd->open_caps;    capitem.next = cd->open_caps;
5821    cd->open_caps = &capitem;    cd->open_caps = &capitem;
5822    }    }
5823    
5824  /* Offset is set zero to mark that this bracket is still open */  /* Offset is set zero to mark that this bracket is still open */
5825    
# Line 5859  for (;;) Line 5865  for (;;)
5865      return FALSE;      return FALSE;
5866      }      }
5867    
5868      /* If the external options have changed during this branch, it means that we
5869      are at the top level, and a leading option setting has been encountered. We
5870      need to re-set the original option values to take account of this so that,
5871      during the pre-compile phase, we know to allow for a re-set at the start of
5872      subsequent branches. */
5873    
5874      if (old_external_options != cd->external_options)
5875        oldims = cd->external_options & PCRE_IMS;
5876    
5877    /* Keep the highest bracket count in case (?| was used and some branch    /* Keep the highest bracket count in case (?| was used and some branch
5878    has fewer than the rest. */    has fewer than the rest. */
5879    
# Line 5909  for (;;) Line 5924  for (;;)
5924    
5925      /* If lookbehind, check that this branch matches a fixed-length string, and      /* If lookbehind, check that this branch matches a fixed-length string, and
5926      put the length into the OP_REVERSE item. Temporarily mark the end of the      put the length into the OP_REVERSE item. Temporarily mark the end of the
5927      branch with OP_END. If the branch contains OP_RECURSE, the result is -3      branch with OP_END. If the branch contains OP_RECURSE, the result is -3
5928      because there may be forward references that we can't check here. Set a      because there may be forward references that we can't check here. Set a
5929      flag to cause another lookbehind check at the end. Why not do it all at the      flag to cause another lookbehind check at the end. Why not do it all at the
5930      end? Because common, erroneous checks are picked up here and the offset of      end? Because common, erroneous checks are picked up here and the offset of
5931      the problem can be shown. */      the problem can be shown. */
5932    
5933      if (lookbehind)      if (lookbehind)
# Line 5923  for (;;) Line 5938  for (;;)
5938        DPRINTF(("fixed length = %d\n", fixed_length));        DPRINTF(("fixed length = %d\n", fixed_length));
5939        if (fixed_length == -3)        if (fixed_length == -3)
5940          {          {
5941          cd->check_lookbehind = TRUE;          cd->check_lookbehind = TRUE;
5942          }          }
5943        else if (fixed_length < 0)        else if (fixed_length < 0)
5944          {          {
5945          *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;          *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;
# Line 5958  for (;;) Line 5973  for (;;)
5973          }          }
5974        while (branch_length > 0);        while (branch_length > 0);
5975        }        }
5976    
5977      /* If it was a capturing subpattern, remove it from the chain. */      /* If it was a capturing subpattern, remove it from the chain. */
5978    
5979      if (capnumber > 0) cd->open_caps = cd->open_caps->next;      if (capnumber > 0) cd->open_caps = cd->open_caps->next;
5980    
5981      /* Fill in the ket */      /* Fill in the ket */
# Line 5969  for (;;) Line 5984  for (;;)
5984      PUT(code, 1, code - start_bracket);      PUT(code, 1, code - start_bracket);
5985      code += 1 + LINK_SIZE;      code += 1 + LINK_SIZE;
5986    
5987      /* Resetting option if needed */      /* Reset options if needed. */
5988    
5989      if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)      if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)
5990        {        {
# Line 6018  for (;;) Line 6033  for (;;)
6033      {      {
6034      *code = OP_ALT;      *code = OP_ALT;
6035      PUT(code, 1, code - last_branch);      PUT(code, 1, code - last_branch);
6036      bc.current = last_branch = code;      bc.current_branch = last_branch = code;
6037      code += 1 + LINK_SIZE;      code += 1 + LINK_SIZE;
6038      }      }
6039    
# Line 6434  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 6449  while (ptr[skipatstart] == CHAR_LEFT_PAR
6449    
6450  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
6451  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
6452       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)       (*erroroffset = _pcre_valid_utf8((USPTR)pattern, -1)) >= 0)
6453    {    {
6454    errorcode = ERR44;    errorcode = ERR44;
6455    goto PCRE_EARLY_ERROR_RETURN2;    goto PCRE_EARLY_ERROR_RETURN2;
# Line 6631  if debugging, leave the test till after Line 6646  if debugging, leave the test till after
6646    
6647  *code++ = OP_END;  *code++ = OP_END;
6648    
6649  #ifndef DEBUG  #ifndef PCRE_DEBUG
6650  if (code - codestart > length) errorcode = ERR23;  if (code - codestart > length) errorcode = ERR23;
6651  #endif  #endif
6652    
# Line 6654  subpattern. */ Line 6669  subpattern. */
6669    
6670  if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;  if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
6671    
6672  /* If there were any lookbehind assertions that contained OP_RECURSE  /* If there were any lookbehind assertions that contained OP_RECURSE
6673  (recursions or subroutine calls), a flag is set for them to be checked here,  (recursions or subroutine calls), a flag is set for them to be checked here,
6674  because they may contain forward references. Actual recursions can't be fixed  because they may contain forward references. Actual recursions can't be fixed
6675  length, but subroutine calls can. It is done like this so that those without  length, but subroutine calls can. It is done like this so that those without
# Line 6665  length, and set their lengths. */ Line 6680  length, and set their lengths. */
6680  if (cd->check_lookbehind)  if (cd->check_lookbehind)
6681    {    {
6682    uschar *cc = (uschar *)codestart;    uschar *cc = (uschar *)codestart;
6683    
6684    /* Loop, searching for OP_REVERSE items, and process those that do not have    /* Loop, searching for OP_REVERSE items, and process those that do not have
6685    their length set. (Actually, it will also re-process any that have a length    their length set. (Actually, it will also re-process any that have a length
6686    of zero, but that is a pathological case, and it does no harm.) When we find    of zero, but that is a pathological case, and it does no harm.) When we find
6687    one, we temporarily terminate the branch it is in while we scan it. */    one, we temporarily terminate the branch it is in while we scan it. */
6688    
6689    for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1);    for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1);
6690         cc != NULL;         cc != NULL;
6691         cc = (uschar *)_pcre_find_bracket(cc, utf8, -1))         cc = (uschar *)_pcre_find_bracket(cc, utf8, -1))
6692      {      {
6693      if (GET(cc, 1) == 0)      if (GET(cc, 1) == 0)
6694        {        {
6695        int fixed_length;        int fixed_length;
6696        uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);        uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
6697        int end_op = *be;        int end_op = *be;
6698        *be = OP_END;        *be = OP_END;
6699        fixed_length = find_fixedlength(cc, re->options, TRUE, cd);        fixed_length = find_fixedlength(cc, re->options, TRUE, cd);
6700        *be = end_op;        *be = end_op;
# Line 6687  if (cd->check_lookbehind) Line 6702  if (cd->check_lookbehind)
6702        if (fixed_length < 0)        if (fixed_length < 0)
6703          {          {
6704          errorcode = (fixed_length == -2)? ERR36 : ERR25;          errorcode = (fixed_length == -2)? ERR36 : ERR25;
6705          break;          break;
6706          }          }
6707        PUT(cc, 1, fixed_length);        PUT(cc, 1, fixed_length);
6708        }        }
6709      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
6710      }      }
6711    }    }
6712    
6713  /* Failed to compile, or error while post-processing */  /* Failed to compile, or error while post-processing */
6714    
# Line 6755  if (reqbyte >= 0 && Line 6770  if (reqbyte >= 0 &&
6770  /* Print out the compiled data if debugging is enabled. This is never the  /* Print out the compiled data if debugging is enabled. This is never the
6771  case when building a production library. */  case when building a production library. */
6772    
6773  #ifdef DEBUG  #ifdef PCRE_DEBUG
6774    
6775  printf("Length = %d top_bracket = %d top_backref = %d\n",  printf("Length = %d top_bracket = %d top_backref = %d\n",
6776    length, re->top_bracket, re->top_backref);    length, re->top_bracket, re->top_backref);
# Line 6793  if (code - codestart > length) Line 6808  if (code - codestart > length)
6808    if (errorcodeptr != NULL) *errorcodeptr = ERR23;    if (errorcodeptr != NULL) *errorcodeptr = ERR23;
6809    return NULL;    return NULL;
6810    }    }
6811  #endif   /* DEBUG */  #endif   /* PCRE_DEBUG */
6812    
6813  return (pcre *)re;  return (pcre *)re;
6814  }  }

Legend:
Removed from v.459  
changed lines
  Added in v.487

  ViewVC Help
Powered by ViewVC 1.1.5