/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 438 by ph10, Sun Sep 6 20:00:47 2009 UTC revision 507 by ph10, Wed Mar 10 16:08:01 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2009 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 53  supporting internal functions that are n Line 53  supporting internal functions that are n
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    
56  /* When DEBUG is defined, we need the pcre_printint() function, which is also  /* When PCRE_DEBUG is defined, we need the pcre_printint() function, which is
57  used by pcretest. DEBUG is not defined when building a production library. */  also used by pcretest. PCRE_DEBUG is not defined when building a production
58    library. */
59    
60  #ifdef DEBUG  #ifdef PCRE_DEBUG
61  #include "pcre_printint.src"  #include "pcre_printint.src"
62  #endif  #endif
63    
# Line 91  is 4 there is plenty of room. */ Line 92  is 4 there is plenty of room. */
92    
93  #define COMPILE_WORK_SIZE (4096)  #define COMPILE_WORK_SIZE (4096)
94    
95    /* The overrun tests check for a slightly smaller size so that they detect the
96    overrun before it actually does run off the end of the data block. */
97    
98    #define WORK_SIZE_CHECK (COMPILE_WORK_SIZE - 100)
99    
100    
101  /* Table for handling escaped characters in the range '0'-'z'. Positive returns  /* Table for handling escaped characters in the range '0'-'z'. Positive returns
102  are simple data values; negative values are for special things like \d and so  are simple data values; negative values are for special things like \d and so
# Line 262  the number of relocations needed when a Line 268  the number of relocations needed when a
268  it is now one long string. We cannot use a table of offsets, because the  it is now one long string. We cannot use a table of offsets, because the
269  lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we  lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we
270  simply count through to the one we want - this isn't a performance issue  simply count through to the one we want - this isn't a performance issue
271  because these strings are used only when there is a compilation error. */  because these strings are used only when there is a compilation error.
272    
273    Each substring ends with \0 to insert a null character. This includes the final
274    substring, so that the whole string ends with \0\0, which can be detected when
275    counting through. */
276    
277  static const char error_texts[] =  static const char error_texts[] =
278    "no error\0"    "no error\0"
# Line 341  static const char error_texts[] = Line 351  static const char error_texts[] =
351    "number is too big\0"    "number is too big\0"
352    "subpattern name expected\0"    "subpattern name expected\0"
353    "digit expected after (?+\0"    "digit expected after (?+\0"
354    "] is an invalid data character in JavaScript compatibility mode";    "] is an invalid data character in JavaScript compatibility mode\0"
355      /* 65 */
356      "different names for subpatterns of the same number are not allowed\0";
357    
358  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
359  patterns. Note that the tables in chartables are dependent on the locale, and  patterns. Note that the tables in chartables are dependent on the locale, and
# Line 500  static const char * Line 511  static const char *
511  find_error_text(int n)  find_error_text(int n)
512  {  {
513  const char *s = error_texts;  const char *s = error_texts;
514  for (; n > 0; n--) while (*s++ != 0) {};  for (; n > 0; n--)
515      {
516      while (*s++ != 0) {};
517      if (*s == 0) return "Error text not found (please report)";
518      }
519  return s;  return s;
520  }  }
521    
# Line 1100  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1115  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1115        if (name != NULL && lorn == ptr - thisname &&        if (name != NULL && lorn == ptr - thisname &&
1116            strncmp((const char *)name, (const char *)thisname, lorn) == 0)            strncmp((const char *)name, (const char *)thisname, lorn) == 0)
1117          return *count;          return *count;
1118        term++;        term++;
1119        }        }
1120      }      }
1121    }    }
# Line 1146  for (; *ptr != 0; ptr++) Line 1161  for (; *ptr != 0; ptr++)
1161            break;            break;
1162          }          }
1163        else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT)        else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
1164          {          {
1165          negate_class = TRUE;          negate_class = TRUE;
1166          ptr++;          ptr++;
1167          }          }
1168        else break;        else break;
1169        }        }
1170    
# Line 1315  for (;;) Line 1330  for (;;)
1330    
1331      case OP_CALLOUT:      case OP_CALLOUT:
1332      case OP_CREF:      case OP_CREF:
1333        case OP_NCREF:
1334      case OP_RREF:      case OP_RREF:
1335        case OP_NRREF:
1336      case OP_DEF:      case OP_DEF:
1337      code += _pcre_OP_lengths[*code];      code += _pcre_OP_lengths[*code];
1338      break;      break;
# Line 1331  for (;;) Line 1348  for (;;)
1348    
1349    
1350  /*************************************************  /*************************************************
1351  *        Find the fixed length of a pattern      *  *        Find the fixed length of a branch       *
1352  *************************************************/  *************************************************/
1353    
1354  /* Scan a pattern and compute the fixed length of subject that will match it,  /* Scan a branch and compute the fixed length of subject that will match it,
1355  if the length is fixed. This is needed for dealing with backward assertions.  if the length is fixed. This is needed for dealing with backward assertions.
1356  In UTF8 mode, the result is in characters rather than bytes.  In UTF8 mode, the result is in characters rather than bytes. The branch is
1357    temporarily terminated with OP_END when this function is called.
1358    
1359    This function is called when a backward assertion is encountered, so that if it
1360    fails, the error message can point to the correct place in the pattern.
1361    However, we cannot do this when the assertion contains subroutine calls,
1362    because they can be forward references. We solve this by remembering this case
1363    and doing the check at the end; a flag specifies which mode we are running in.
1364    
1365  Arguments:  Arguments:
1366    code     points to the start of the pattern (the bracket)    code     points to the start of the pattern (the bracket)
1367    options  the compiling options    options  the compiling options
1368      atend    TRUE if called when the pattern is complete
1369      cd       the "compile data" structure
1370    
1371  Returns:   the fixed length, or -1 if there is no fixed length,  Returns:   the fixed length,
1372                 or -1 if there is no fixed length,
1373               or -2 if \C was encountered               or -2 if \C was encountered
1374                 or -3 if an OP_RECURSE item was encountered and atend is FALSE
1375  */  */
1376    
1377  static int  static int
1378  find_fixedlength(uschar *code, int options)  find_fixedlength(uschar *code, int options, BOOL atend, compile_data *cd)
1379  {  {
1380  int length = -1;  int length = -1;
1381    
# Line 1360  branch, check the length against that of Line 1388  branch, check the length against that of
1388  for (;;)  for (;;)
1389    {    {
1390    int d;    int d;
1391      uschar *ce, *cs;
1392    register int op = *cc;    register int op = *cc;
1393    switch (op)    switch (op)
1394      {      {
# Line 1367  for (;;) Line 1396  for (;;)
1396      case OP_BRA:      case OP_BRA:
1397      case OP_ONCE:      case OP_ONCE:
1398      case OP_COND:      case OP_COND:
1399      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options);      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options, atend, cd);
1400      if (d < 0) return d;      if (d < 0) return d;
1401      branchlength += d;      branchlength += d;
1402      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
# Line 1390  for (;;) Line 1419  for (;;)
1419      branchlength = 0;      branchlength = 0;
1420      break;      break;
1421    
1422        /* A true recursion implies not fixed length, but a subroutine call may
1423        be OK. If the subroutine is a forward reference, we can't deal with
1424        it until the end of the pattern, so return -3. */
1425    
1426        case OP_RECURSE:
1427        if (!atend) return -3;
1428        cs = ce = (uschar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
1429        do ce += GET(ce, 1); while (*ce == OP_ALT);       /* End subpattern */
1430        if (cc > cs && cc < ce) return -1;                /* Recursion */
1431        d = find_fixedlength(cs + 2, options, atend, cd);
1432        if (d < 0) return d;
1433        branchlength += d;
1434        cc += 1 + LINK_SIZE;
1435        break;
1436    
1437      /* Skip over assertive subpatterns */      /* Skip over assertive subpatterns */
1438    
1439      case OP_ASSERT:      case OP_ASSERT:
# Line 1403  for (;;) Line 1447  for (;;)
1447    
1448      case OP_REVERSE:      case OP_REVERSE:
1449      case OP_CREF:      case OP_CREF:
1450        case OP_NCREF:
1451      case OP_RREF:      case OP_RREF:
1452        case OP_NRREF:
1453      case OP_DEF:      case OP_DEF:
1454      case OP_OPT:      case OP_OPT:
1455      case OP_CALLOUT:      case OP_CALLOUT:
1456      case OP_SOD:      case OP_SOD:
1457      case OP_SOM:      case OP_SOM:
1458        case OP_SET_SOM:
1459      case OP_EOD:      case OP_EOD:
1460      case OP_EODN:      case OP_EODN:
1461      case OP_CIRC:      case OP_CIRC:
# Line 1426  for (;;) Line 1473  for (;;)
1473      branchlength++;      branchlength++;
1474      cc += 2;      cc += 2;
1475  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1476      if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)      if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
1477        cc += _pcre_utf8_table4[cc[-1] & 0x3f];        cc += _pcre_utf8_table4[cc[-1] & 0x3f];
1478  #endif  #endif
1479      break;      break;
# Line 1438  for (;;) Line 1485  for (;;)
1485      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1486      cc += 4;      cc += 4;
1487  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1488      if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)      if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
1489        cc += _pcre_utf8_table4[cc[-1] & 0x3f];        cc += _pcre_utf8_table4[cc[-1] & 0x3f];
1490  #endif  #endif
1491      break;      break;
# Line 1518  for (;;) Line 1565  for (;;)
1565    
1566    
1567  /*************************************************  /*************************************************
1568  *    Scan compiled regex for numbered bracket    *  *    Scan compiled regex for specific bracket    *
1569  *************************************************/  *************************************************/
1570    
1571  /* This little function scans through a compiled pattern until it finds a  /* This little function scans through a compiled pattern until it finds a
1572  capturing bracket with the given number.  capturing bracket with the given number, or, if the number is negative, an
1573    instance of OP_REVERSE for a lookbehind. The function is global in the C sense
1574    so that it can be called from pcre_study() when finding the minimum matching
1575    length.
1576    
1577  Arguments:  Arguments:
1578    code        points to start of expression    code        points to start of expression
1579    utf8        TRUE in UTF-8 mode    utf8        TRUE in UTF-8 mode
1580    number      the required bracket number    number      the required bracket number or negative to find a lookbehind
1581    
1582  Returns:      pointer to the opcode for the bracket, or NULL if not found  Returns:      pointer to the opcode for the bracket, or NULL if not found
1583  */  */
1584    
1585  static const uschar *  const uschar *
1586  find_bracket(const uschar *code, BOOL utf8, int number)  _pcre_find_bracket(const uschar *code, BOOL utf8, int number)
1587  {  {
1588  for (;;)  for (;;)
1589    {    {
# Line 1546  for (;;) Line 1596  for (;;)
1596    
1597    if (c == OP_XCLASS) code += GET(code, 1);    if (c == OP_XCLASS) code += GET(code, 1);
1598    
1599      /* Handle recursion */
1600    
1601      else if (c == OP_REVERSE)
1602        {
1603        if (number < 0) return (uschar *)code;
1604        code += _pcre_OP_lengths[c];
1605        }
1606    
1607    /* Handle capturing bracket */    /* Handle capturing bracket */
1608    
1609    else if (c == OP_CBRA)    else if (c == OP_CBRA)
# Line 1732  Arguments: Line 1790  Arguments:
1790    code        points to start of search    code        points to start of search
1791    endcode     points to where to stop    endcode     points to where to stop
1792    utf8        TRUE if in UTF8 mode    utf8        TRUE if in UTF8 mode
1793      cd          contains pointers to tables etc.
1794    
1795  Returns:      TRUE if what is matched could be empty  Returns:      TRUE if what is matched could be empty
1796  */  */
1797    
1798  static BOOL  static BOOL
1799  could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8)  could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8,
1800      compile_data *cd)
1801  {  {
1802  register int c;  register int c;
1803  for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);  for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);
# Line 1768  for (code = first_significant_code(code Line 1828  for (code = first_significant_code(code
1828      continue;      continue;
1829      }      }
1830    
1831      /* For a recursion/subroutine call, if its end has been reached, which
1832      implies a subroutine call, we can scan it. */
1833    
1834      if (c == OP_RECURSE)
1835        {
1836        BOOL empty_branch = FALSE;
1837        const uschar *scode = cd->start_code + GET(code, 1);
1838        if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
1839        do
1840          {
1841          if (could_be_empty_branch(scode, endcode, utf8, cd))
1842            {
1843            empty_branch = TRUE;
1844            break;
1845            }
1846          scode += GET(scode, 1);
1847          }
1848        while (*scode == OP_ALT);
1849        if (!empty_branch) return FALSE;  /* All branches are non-empty */
1850        continue;
1851        }
1852    
1853    /* For other groups, scan the branches. */    /* For other groups, scan the branches. */
1854    
1855    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE || c == OP_COND)    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE || c == OP_COND)
# Line 1786  for (code = first_significant_code(code Line 1868  for (code = first_significant_code(code
1868        empty_branch = FALSE;        empty_branch = FALSE;
1869        do        do
1870          {          {
1871          if (!empty_branch && could_be_empty_branch(code, endcode, utf8))          if (!empty_branch && could_be_empty_branch(code, endcode, utf8, cd))
1872            empty_branch = TRUE;            empty_branch = TRUE;
1873          code += GET(code, 1);          code += GET(code, 1);
1874          }          }
# Line 1913  for (code = first_significant_code(code Line 1995  for (code = first_significant_code(code
1995      case OP_POSQUERY:      case OP_POSQUERY:
1996      if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];      if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];
1997      break;      break;
1998    
1999      case OP_UPTO:      case OP_UPTO:
2000      case OP_MINUPTO:      case OP_MINUPTO:
2001      case OP_POSUPTO:      case OP_POSUPTO:
2002      if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];      if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
2003      break;      break;
2004  #endif  #endif
2005    
2006        /* None of the remaining opcodes are required to match a character. */
2007    
2008        default:
2009        break;
2010      }      }
2011    }    }
2012    
# Line 1942  Arguments: Line 2029  Arguments:
2029    endcode     points to where to stop (current RECURSE item)    endcode     points to where to stop (current RECURSE item)
2030    bcptr       points to the chain of current (unclosed) branch starts    bcptr       points to the chain of current (unclosed) branch starts
2031    utf8        TRUE if in UTF-8 mode    utf8        TRUE if in UTF-8 mode
2032      cd          pointers to tables etc
2033    
2034  Returns:      TRUE if what is matched could be empty  Returns:      TRUE if what is matched could be empty
2035  */  */
2036    
2037  static BOOL  static BOOL
2038  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
2039    BOOL utf8)    BOOL utf8, compile_data *cd)
2040  {  {
2041  while (bcptr != NULL && bcptr->current >= code)  while (bcptr != NULL && bcptr->current_branch >= code)
2042    {    {
2043    if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE;    if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8, cd))
2044        return FALSE;
2045    bcptr = bcptr->outer;    bcptr = bcptr->outer;
2046    }    }
2047  return TRUE;  return TRUE;
# Line 2614  BOOL utf8 = FALSE; Line 2703  BOOL utf8 = FALSE;
2703  uschar *utf8_char = NULL;  uschar *utf8_char = NULL;
2704  #endif  #endif
2705    
2706  #ifdef DEBUG  #ifdef PCRE_DEBUG
2707  if (lengthptr != NULL) DPRINTF((">> start branch\n"));  if (lengthptr != NULL) DPRINTF((">> start branch\n"));
2708  #endif  #endif
2709    
# Line 2673  for (;; ptr++) Line 2762  for (;; ptr++)
2762    
2763    if (lengthptr != NULL)    if (lengthptr != NULL)
2764      {      {
2765  #ifdef DEBUG  #ifdef PCRE_DEBUG
2766      if (code > cd->hwm) cd->hwm = code;                 /* High water info */      if (code > cd->hwm) cd->hwm = code;                 /* High water info */
2767  #endif  #endif
2768      if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */      if (code > cd->start_workspace + WORK_SIZE_CHECK)   /* Check for overrun */
2769        {        {
2770        *errorcodeptr = ERR52;        *errorcodeptr = ERR52;
2771        goto FAILED;        goto FAILED;
# Line 2725  for (;; ptr++) Line 2814  for (;; ptr++)
2814    /* In the real compile phase, just check the workspace used by the forward    /* In the real compile phase, just check the workspace used by the forward
2815    reference list. */    reference list. */
2816    
2817    else if (cd->hwm > cd->start_workspace + COMPILE_WORK_SIZE)    else if (cd->hwm > cd->start_workspace + WORK_SIZE_CHECK)
2818      {      {
2819      *errorcodeptr = ERR52;      *errorcodeptr = ERR52;
2820      goto FAILED;      goto FAILED;
# Line 3871  we set the flag only if there is a liter Line 3960  we set the flag only if there is a liter
3960    
3961        if (repeat_max == 0) goto END_REPEAT;        if (repeat_max == 0) goto END_REPEAT;
3962    
3963        /*--------------------------------------------------------------------*/        /*--------------------------------------------------------------------*/
3964        /* This code is obsolete from release 8.00; the restriction was finally        /* This code is obsolete from release 8.00; the restriction was finally
3965        removed: */        removed: */
3966    
3967        /* All real repeats make it impossible to handle partial matching (maybe        /* All real repeats make it impossible to handle partial matching (maybe
3968        one day we will be able to remove this restriction). */        one day we will be able to remove this restriction). */
3969    
3970        /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */        /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
3971        /*--------------------------------------------------------------------*/        /*--------------------------------------------------------------------*/
3972    
3973        /* Combine the op_type with the repeat_type */        /* Combine the op_type with the repeat_type */
3974    
# Line 4026  we set the flag only if there is a liter Line 4115  we set the flag only if there is a liter
4115          goto END_REPEAT;          goto END_REPEAT;
4116          }          }
4117    
4118        /*--------------------------------------------------------------------*/        /*--------------------------------------------------------------------*/
4119        /* This code is obsolete from release 8.00; the restriction was finally        /* This code is obsolete from release 8.00; the restriction was finally
4120        removed: */        removed: */
4121    
# Line 4034  we set the flag only if there is a liter Line 4123  we set the flag only if there is a liter
4123        one day we will be able to remove this restriction). */        one day we will be able to remove this restriction). */
4124    
4125        /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */        /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
4126        /*--------------------------------------------------------------------*/        /*--------------------------------------------------------------------*/
4127    
4128        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
4129          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 4169  we set the flag only if there is a liter Line 4258  we set the flag only if there is a liter
4258            {            {
4259            /* In the pre-compile phase, we don't actually do the replication. We            /* In the pre-compile phase, we don't actually do the replication. We
4260            just adjust the length as if we had. Do some paranoid checks for            just adjust the length as if we had. Do some paranoid checks for
4261            potential integer overflow. */            potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
4262              integer type when available, otherwise double. */
4263    
4264            if (lengthptr != NULL)            if (lengthptr != NULL)
4265              {              {
4266              int delta = (repeat_min - 1)*length_prevgroup;              int delta = (repeat_min - 1)*length_prevgroup;
4267              if ((double)(repeat_min - 1)*(double)length_prevgroup >              if ((INT64_OR_DOUBLE)(repeat_min - 1)*
4268                                                              (double)INT_MAX ||                    (INT64_OR_DOUBLE)length_prevgroup >
4269                        (INT64_OR_DOUBLE)INT_MAX ||
4270                  OFLOW_MAX - *lengthptr < delta)                  OFLOW_MAX - *lengthptr < delta)
4271                {                {
4272                *errorcodeptr = ERR20;                *errorcodeptr = ERR20;
# Line 4221  we set the flag only if there is a liter Line 4312  we set the flag only if there is a liter
4312          just adjust the length as if we had. For each repetition we must add 1          just adjust the length as if we had. For each repetition we must add 1
4313          to the length for BRAZERO and for all but the last repetition we must          to the length for BRAZERO and for all but the last repetition we must
4314          add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some          add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
4315          paranoid checks to avoid integer overflow. */          paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is
4316            a 64-bit integer type when available, otherwise double. */
4317    
4318          if (lengthptr != NULL && repeat_max > 0)          if (lengthptr != NULL && repeat_max > 0)
4319            {            {
4320            int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -            int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
4321                        2 - 2*LINK_SIZE;   /* Last one doesn't nest */                        2 - 2*LINK_SIZE;   /* Last one doesn't nest */
4322            if ((double)repeat_max *            if ((INT64_OR_DOUBLE)repeat_max *
4323                  (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)                  (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
4324                    > (double)INT_MAX ||                    > (INT64_OR_DOUBLE)INT_MAX ||
4325                OFLOW_MAX - *lengthptr < delta)                OFLOW_MAX - *lengthptr < delta)
4326              {              {
4327              *errorcodeptr = ERR20;              *errorcodeptr = ERR20;
# Line 4306  we set the flag only if there is a liter Line 4398  we set the flag only if there is a liter
4398            uschar *scode = bracode;            uschar *scode = bracode;
4399            do            do
4400              {              {
4401              if (could_be_empty_branch(scode, ketcode, utf8))              if (could_be_empty_branch(scode, ketcode, utf8, cd))
4402                {                {
4403                *bracode += OP_SBRA - OP_BRA;                *bracode += OP_SBRA - OP_BRA;
4404                break;                break;
# Line 4349  we set the flag only if there is a liter Line 4441  we set the flag only if there is a liter
4441      if (possessive_quantifier)      if (possessive_quantifier)
4442        {        {
4443        int len;        int len;
4444    
4445        if (*tempcode == OP_TYPEEXACT)        if (*tempcode == OP_TYPEEXACT)
4446          tempcode += _pcre_OP_lengths[*tempcode] +          tempcode += _pcre_OP_lengths[*tempcode] +
4447            ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);            ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);
4448    
4449        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
4450          {          {
4451          tempcode += _pcre_OP_lengths[*tempcode];          tempcode += _pcre_OP_lengths[*tempcode];
# Line 4361  we set the flag only if there is a liter Line 4453  we set the flag only if there is a liter
4453          if (utf8 && tempcode[-1] >= 0xc0)          if (utf8 && tempcode[-1] >= 0xc0)
4454            tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];            tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];
4455  #endif  #endif
4456          }          }
4457    
4458        len = code - tempcode;        len = code - tempcode;
4459        if (len > 0) switch (*tempcode)        if (len > 0) switch (*tempcode)
4460          {          {
# Line 4381  we set the flag only if there is a liter Line 4473  we set the flag only if there is a liter
4473          case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break;          case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break;
4474          case OP_NOTUPTO:  *tempcode = OP_NOTPOSUPTO; break;          case OP_NOTUPTO:  *tempcode = OP_NOTPOSUPTO; break;
4475    
4476            /* Because we are moving code along, we must ensure that any
4477            pending recursive references are updated. */
4478    
4479          default:          default:
4480            *code = OP_END;
4481            adjust_recurse(tempcode, 1 + LINK_SIZE, utf8, cd, save_hwm);
4482          memmove(tempcode + 1+LINK_SIZE, tempcode, len);          memmove(tempcode + 1+LINK_SIZE, tempcode, len);
4483          code += 1 + LINK_SIZE;          code += 1 + LINK_SIZE;
4484          len += 1 + LINK_SIZE;          len += 1 + LINK_SIZE;
# Line 4440  we set the flag only if there is a liter Line 4537  we set the flag only if there is a liter
4537          if (namelen == verbs[i].len &&          if (namelen == verbs[i].len &&
4538              strncmp((char *)name, vn, namelen) == 0)              strncmp((char *)name, vn, namelen) == 0)
4539            {            {
4540            *code = verbs[i].op;            /* Check for open captures before ACCEPT */
4541            if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;  
4542              if (verbs[i].op == OP_ACCEPT)
4543                {
4544                open_capitem *oc;
4545                cd->had_accept = TRUE;
4546                for (oc = cd->open_caps; oc != NULL; oc = oc->next)
4547                  {
4548                  *code++ = OP_CLOSE;
4549                  PUT2INC(code, 0, oc->number);
4550                  }
4551                }
4552              *code++ = verbs[i].op;
4553            break;            break;
4554            }            }
4555          vn += verbs[i].len + 1;          vn += verbs[i].len + 1;
# Line 4603  we set the flag only if there is a liter Line 4711  we set the flag only if there is a liter
4711            }            }
4712    
4713          /* Otherwise (did not start with "+" or "-"), start by looking for the          /* Otherwise (did not start with "+" or "-"), start by looking for the
4714          name. */          name. If we find a name, add one to the opcode to change OP_CREF or
4715            OP_RREF into OP_NCREF or OP_NRREF. These behave exactly the same,
4716            except they record that the reference was originally to a name. The
4717            information is used to check duplicate names. */
4718    
4719          slot = cd->name_table;          slot = cd->name_table;
4720          for (i = 0; i < cd->names_found; i++)          for (i = 0; i < cd->names_found; i++)
# Line 4618  we set the flag only if there is a liter Line 4729  we set the flag only if there is a liter
4729            {            {
4730            recno = GET2(slot, 0);            recno = GET2(slot, 0);
4731            PUT2(code, 2+LINK_SIZE, recno);            PUT2(code, 2+LINK_SIZE, recno);
4732              code[1+LINK_SIZE]++;
4733            }            }
4734    
4735          /* Search the pattern for a forward reference */          /* Search the pattern for a forward reference */
# Line 4626  we set the flag only if there is a liter Line 4738  we set the flag only if there is a liter
4738                          (options & PCRE_EXTENDED) != 0)) > 0)                          (options & PCRE_EXTENDED) != 0)) > 0)
4739            {            {
4740            PUT2(code, 2+LINK_SIZE, i);            PUT2(code, 2+LINK_SIZE, i);
4741              code[1+LINK_SIZE]++;
4742            }            }
4743    
4744          /* If terminator == 0 it means that the name followed directly after          /* If terminator == 0 it means that the name followed directly after
# Line 4818  we set the flag only if there is a liter Line 4931  we set the flag only if there is a liter
4931                }                }
4932              }              }
4933    
4934            /* In the real compile, create the entry in the table */            /* In the real compile, create the entry in the table, maintaining
4935              alphabetical order. Duplicate names for different numbers are
4936              permitted only if PCRE_DUPNAMES is set. Duplicate names for the same
4937              number are always OK. (An existing number can be re-used if (?|
4938              appears in the pattern.) In either event, a duplicate name results in
4939              a duplicate entry in the table, even if the number is the same. This
4940              is because the number of names, and hence the table size, is computed
4941              in the pre-compile, and it affects various numbers and pointers which
4942              would all have to be modified, and the compiled code moved down, if
4943              duplicates with the same number were omitted from the table. This
4944              doesn't seem worth the hassle. However, *different* names for the
4945              same number are not permitted. */
4946    
4947            else            else
4948              {              {
4949                BOOL dupname = FALSE;
4950              slot = cd->name_table;              slot = cd->name_table;
4951    
4952              for (i = 0; i < cd->names_found; i++)              for (i = 0; i < cd->names_found; i++)
4953                {                {
4954                int crc = memcmp(name, slot+2, namelen);                int crc = memcmp(name, slot+2, namelen);
# Line 4830  we set the flag only if there is a liter Line 4956  we set the flag only if there is a liter
4956                  {                  {
4957                  if (slot[2+namelen] == 0)                  if (slot[2+namelen] == 0)
4958                    {                    {
4959                    if ((options & PCRE_DUPNAMES) == 0)                    if (GET2(slot, 0) != cd->bracount + 1 &&
4960                          (options & PCRE_DUPNAMES) == 0)
4961                      {                      {
4962                      *errorcodeptr = ERR43;                      *errorcodeptr = ERR43;
4963                      goto FAILED;                      goto FAILED;
4964                      }                      }
4965                      else dupname = TRUE;
4966                    }                    }
4967                  else crc = -1;      /* Current name is substring */                  else crc = -1;      /* Current name is a substring */
4968                  }                  }
4969    
4970                  /* Make space in the table and break the loop for an earlier
4971                  name. For a duplicate or later name, carry on. We do this for
4972                  duplicates so that in the simple case (when ?(| is not used) they
4973                  are in order of their numbers. */
4974    
4975                if (crc < 0)                if (crc < 0)
4976                  {                  {
4977                  memmove(slot + cd->name_entry_size, slot,                  memmove(slot + cd->name_entry_size, slot,
4978                    (cd->names_found - i) * cd->name_entry_size);                    (cd->names_found - i) * cd->name_entry_size);
4979                  break;                  break;
4980                  }                  }
4981    
4982                  /* Continue the loop for a later or duplicate name */
4983    
4984                slot += cd->name_entry_size;                slot += cd->name_entry_size;
4985                }                }
4986    
4987                /* For non-duplicate names, check for a duplicate number before
4988                adding the new name. */
4989    
4990                if (!dupname)
4991                  {
4992                  uschar *cslot = cd->name_table;
4993                  for (i = 0; i < cd->names_found; i++)
4994                    {
4995                    if (cslot != slot)
4996                      {
4997                      if (GET2(cslot, 0) == cd->bracount + 1)
4998                        {
4999                        *errorcodeptr = ERR65;
5000                        goto FAILED;
5001                        }
5002                      }
5003                    else i--;
5004                    cslot += cd->name_entry_size;
5005                    }
5006                  }
5007    
5008              PUT2(slot, 0, cd->bracount + 1);              PUT2(slot, 0, cd->bracount + 1);
5009              memcpy(slot + 2, name, namelen);              memcpy(slot + 2, name, namelen);
5010              slot[2+namelen] = 0;              slot[2+namelen] = 0;
5011              }              }
5012            }            }
5013    
5014          /* In both cases, count the number of names we've encountered. */          /* In both pre-compile and compile, count the number of names we've
5015            encountered. */
5016    
         ptr++;                    /* Move past > or ' */  
5017          cd->names_found++;          cd->names_found++;
5018            ptr++;                    /* Move past > or ' */
5019          goto NUMBERED_GROUP;          goto NUMBERED_GROUP;
5020    
5021    
# Line 5025  we set the flag only if there is a liter Line 5184  we set the flag only if there is a liter
5184            if (lengthptr == NULL)            if (lengthptr == NULL)
5185              {              {
5186              *code = OP_END;              *code = OP_END;
5187              if (recno != 0) called = find_bracket(cd->start_code, utf8, recno);              if (recno != 0)
5188                  called = _pcre_find_bracket(cd->start_code, utf8, recno);
5189    
5190              /* Forward reference */              /* Forward reference */
5191    
# Line 5037  we set the flag only if there is a liter Line 5197  we set the flag only if there is a liter
5197                  *errorcodeptr = ERR15;                  *errorcodeptr = ERR15;
5198                  goto FAILED;                  goto FAILED;
5199                  }                  }
5200    
5201                  /* Fudge the value of "called" so that when it is inserted as an
5202                  offset below, what it actually inserted is the reference number
5203                  of the group. */
5204    
5205                called = cd->start_code + recno;                called = cd->start_code + recno;
5206                PUTINC(cd->hwm, 0, code + 2 + LINK_SIZE - cd->start_code);                PUTINC(cd->hwm, 0, code + 2 + LINK_SIZE - cd->start_code);
5207                }                }
# Line 5046  we set the flag only if there is a liter Line 5211  we set the flag only if there is a liter
5211              recursion that could loop for ever, and diagnose that case. */              recursion that could loop for ever, and diagnose that case. */
5212    
5213              else if (GET(called, 1) == 0 &&              else if (GET(called, 1) == 0 &&
5214                       could_be_empty(called, code, bcptr, utf8))                       could_be_empty(called, code, bcptr, utf8, cd))
5215                {                {
5216                *errorcodeptr = ERR40;                *errorcodeptr = ERR40;
5217                goto FAILED;                goto FAILED;
# Line 5141  we set the flag only if there is a liter Line 5306  we set the flag only if there is a liter
5306              {              {
5307              cd->external_options = newoptions;              cd->external_options = newoptions;
5308              }              }
5309           else            else
5310              {              {
5311              if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))              if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))
5312                {                {
# Line 5478  we set the flag only if there is a liter Line 5643  we set the flag only if there is a liter
5643    
5644        if (-c >= ESC_REF)        if (-c >= ESC_REF)
5645          {          {
5646            open_capitem *oc;
5647          recno = -c - ESC_REF;          recno = -c - ESC_REF;
5648    
5649          HANDLE_REFERENCE:    /* Come here from named backref handling */          HANDLE_REFERENCE:    /* Come here from named backref handling */
# Line 5487  we set the flag only if there is a liter Line 5653  we set the flag only if there is a liter
5653          PUT2INC(code, 0, recno);          PUT2INC(code, 0, recno);
5654          cd->backref_map |= (recno < 32)? (1 << recno) : 1;          cd->backref_map |= (recno < 32)? (1 << recno) : 1;
5655          if (recno > cd->top_backref) cd->top_backref = recno;          if (recno > cd->top_backref) cd->top_backref = recno;
5656    
5657            /* Check to see if this back reference is recursive, that it, it
5658            is inside the group that it references. A flag is set so that the
5659            group can be made atomic. */
5660    
5661            for (oc = cd->open_caps; oc != NULL; oc = oc->next)
5662              {
5663              if (oc->number == recno)
5664                {
5665                oc->flag = TRUE;
5666                break;
5667                }
5668              }
5669          }          }
5670    
5671        /* So are Unicode property matches, if supported. */        /* So are Unicode property matches, if supported. */
# Line 5669  uschar *code = *codeptr; Line 5848  uschar *code = *codeptr;
5848  uschar *last_branch = code;  uschar *last_branch = code;
5849  uschar *start_bracket = code;  uschar *start_bracket = code;
5850  uschar *reverse_count = NULL;  uschar *reverse_count = NULL;
5851    open_capitem capitem;
5852    int capnumber = 0;
5853  int firstbyte, reqbyte;  int firstbyte, reqbyte;
5854  int branchfirstbyte, branchreqbyte;  int branchfirstbyte, branchreqbyte;
5855  int length;  int length;
5856  int orig_bracount;  int orig_bracount;
5857  int max_bracount;  int max_bracount;
5858    int old_external_options = cd->external_options;
5859  branch_chain bc;  branch_chain bc;
5860    
5861  bc.outer = bcptr;  bc.outer = bcptr;
5862  bc.current = code;  bc.current_branch = code;
5863    
5864  firstbyte = reqbyte = REQ_UNSET;  firstbyte = reqbyte = REQ_UNSET;
5865    
# Line 5695  the code that abstracts option settings Line 5877  the code that abstracts option settings
5877  them global. It tests the value of length for (2 + 2*LINK_SIZE) in the  them global. It tests the value of length for (2 + 2*LINK_SIZE) in the
5878  pre-compile phase to find out whether anything has yet been compiled or not. */  pre-compile phase to find out whether anything has yet been compiled or not. */
5879    
5880    /* If this is a capturing subpattern, add to the chain of open capturing items
5881    so that we can detect them if (*ACCEPT) is encountered. This is also used to
5882    detect groups that contain recursive back references to themselves. */
5883    
5884    if (*code == OP_CBRA)
5885      {
5886      capnumber = GET2(code, 1 + LINK_SIZE);
5887      capitem.number = capnumber;
5888      capitem.next = cd->open_caps;
5889      capitem.flag = FALSE;
5890      cd->open_caps = &capitem;
5891      }
5892    
5893  /* Offset is set zero to mark that this bracket is still open */  /* Offset is set zero to mark that this bracket is still open */
5894    
5895  PUT(code, 1, 0);  PUT(code, 1, 0);
# Line 5739  for (;;) Line 5934  for (;;)
5934      return FALSE;      return FALSE;
5935      }      }
5936    
5937      /* If the external options have changed during this branch, it means that we
5938      are at the top level, and a leading option setting has been encountered. We
5939      need to re-set the original option values to take account of this so that,
5940      during the pre-compile phase, we know to allow for a re-set at the start of
5941      subsequent branches. */
5942    
5943      if (old_external_options != cd->external_options)
5944        oldims = cd->external_options & PCRE_IMS;
5945    
5946    /* Keep the highest bracket count in case (?| was used and some branch    /* Keep the highest bracket count in case (?| was used and some branch
5947    has fewer than the rest. */    has fewer than the rest. */
5948    
# Line 5789  for (;;) Line 5993  for (;;)
5993    
5994      /* If lookbehind, check that this branch matches a fixed-length string, and      /* If lookbehind, check that this branch matches a fixed-length string, and
5995      put the length into the OP_REVERSE item. Temporarily mark the end of the      put the length into the OP_REVERSE item. Temporarily mark the end of the
5996      branch with OP_END. */      branch with OP_END. If the branch contains OP_RECURSE, the result is -3
5997        because there may be forward references that we can't check here. Set a
5998        flag to cause another lookbehind check at the end. Why not do it all at the
5999        end? Because common, erroneous checks are picked up here and the offset of
6000        the problem can be shown. */
6001    
6002      if (lookbehind)      if (lookbehind)
6003        {        {
6004        int fixed_length;        int fixed_length;
6005        *code = OP_END;        *code = OP_END;
6006        fixed_length = find_fixedlength(last_branch, options);        fixed_length = find_fixedlength(last_branch, options, FALSE, cd);
6007        DPRINTF(("fixed length = %d\n", fixed_length));        DPRINTF(("fixed length = %d\n", fixed_length));
6008        if (fixed_length < 0)        if (fixed_length == -3)
6009            {
6010            cd->check_lookbehind = TRUE;
6011            }
6012          else if (fixed_length < 0)
6013          {          {
6014          *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;          *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;
6015          *ptrptr = ptr;          *ptrptr = ptr;
6016          return FALSE;          return FALSE;
6017          }          }
6018        PUT(reverse_count, 0, fixed_length);        else { PUT(reverse_count, 0, fixed_length); }
6019        }        }
6020      }      }
6021    
# Line 5837  for (;;) Line 6049  for (;;)
6049      PUT(code, 1, code - start_bracket);      PUT(code, 1, code - start_bracket);
6050      code += 1 + LINK_SIZE;      code += 1 + LINK_SIZE;
6051    
6052      /* Resetting option if needed */      /* If it was a capturing subpattern, check to see if it contained any
6053        recursive back references. If so, we must wrap it in atomic brackets.
6054        In any event, remove the block from the chain. */
6055    
6056        if (capnumber > 0)
6057          {
6058          if (cd->open_caps->flag)
6059            {
6060            memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
6061              code - start_bracket);
6062            *start_bracket = OP_ONCE;
6063            code += 1 + LINK_SIZE;
6064            PUT(start_bracket, 1, code - start_bracket);
6065            *code = OP_KET;
6066            PUT(code, 1, code - start_bracket);
6067            code += 1 + LINK_SIZE;
6068            length += 2 + 2*LINK_SIZE;
6069            }
6070          cd->open_caps = cd->open_caps->next;
6071          }
6072    
6073        /* Reset options if needed. */
6074    
6075      if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)      if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)
6076        {        {
# Line 5886  for (;;) Line 6119  for (;;)
6119      {      {
6120      *code = OP_ALT;      *code = OP_ALT;
6121      PUT(code, 1, code - last_branch);      PUT(code, 1, code - last_branch);
6122      bc.current = last_branch = code;      bc.current_branch = last_branch = code;
6123      code += 1 + LINK_SIZE;      code += 1 + LINK_SIZE;
6124      }      }
6125    
# Line 6033  do { Line 6266  do {
6266       switch (*scode)       switch (*scode)
6267         {         {
6268         case OP_CREF:         case OP_CREF:
6269           case OP_NCREF:
6270         case OP_RREF:         case OP_RREF:
6271           case OP_NRREF:
6272         case OP_DEF:         case OP_DEF:
6273         return FALSE;         return FALSE;
6274    
# Line 6202  int length = 1;  /* For final END opcode Line 6437  int length = 1;  /* For final END opcode
6437  int firstbyte, reqbyte, newline;  int firstbyte, reqbyte, newline;
6438  int errorcode = 0;  int errorcode = 0;
6439  int skipatstart = 0;  int skipatstart = 0;
6440  #ifdef SUPPORT_UTF8  BOOL utf8 = (options & PCRE_UTF8) != 0;
 BOOL utf8;  
 #endif  
6441  size_t size;  size_t size;
6442  uschar *code;  uschar *code;
6443  const uschar *codestart;  const uschar *codestart;
# Line 6301  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 6534  while (ptr[skipatstart] == CHAR_LEFT_PAR
6534  /* Can't support UTF8 unless PCRE has been compiled to include the code. */  /* Can't support UTF8 unless PCRE has been compiled to include the code. */
6535    
6536  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
 utf8 = (options & PCRE_UTF8) != 0;  
6537  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
6538       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)       (*erroroffset = _pcre_valid_utf8((USPTR)pattern, -1)) >= 0)
6539    {    {
6540    errorcode = ERR44;    errorcode = ERR44;
6541    goto PCRE_EARLY_ERROR_RETURN2;    goto PCRE_EARLY_ERROR_RETURN2;
6542    }    }
6543  #else  #else
6544  if ((options & PCRE_UTF8) != 0)  if (utf8)
6545    {    {
6546    errorcode = ERR32;    errorcode = ERR32;
6547    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN;
# Line 6398  cd->end_pattern = (const uschar *)(patte Line 6630  cd->end_pattern = (const uschar *)(patte
6630  cd->req_varyopt = 0;  cd->req_varyopt = 0;
6631  cd->external_options = options;  cd->external_options = options;
6632  cd->external_flags = 0;  cd->external_flags = 0;
6633    cd->open_caps = NULL;
6634    
6635  /* Now do the pre-compile. On error, errorcode will be set non-zero, so we  /* Now do the pre-compile. On error, errorcode will be set non-zero, so we
6636  don't need to look at the result of the function here. The initial options have  don't need to look at the result of the function here. The initial options have
# Line 6472  cd->start_code = codestart; Line 6705  cd->start_code = codestart;
6705  cd->hwm = cworkspace;  cd->hwm = cworkspace;
6706  cd->req_varyopt = 0;  cd->req_varyopt = 0;
6707  cd->had_accept = FALSE;  cd->had_accept = FALSE;
6708    cd->check_lookbehind = FALSE;
6709    cd->open_caps = NULL;
6710    
6711  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
6712  error, errorcode will be set non-zero, so we don't need to look at the result  error, errorcode will be set non-zero, so we don't need to look at the result
# Line 6497  if debugging, leave the test till after Line 6732  if debugging, leave the test till after
6732    
6733  *code++ = OP_END;  *code++ = OP_END;
6734    
6735  #ifndef DEBUG  #ifndef PCRE_DEBUG
6736  if (code - codestart > length) errorcode = ERR23;  if (code - codestart > length) errorcode = ERR23;
6737  #endif  #endif
6738    
# Line 6510  while (errorcode == 0 && cd->hwm > cwork Line 6745  while (errorcode == 0 && cd->hwm > cwork
6745    cd->hwm -= LINK_SIZE;    cd->hwm -= LINK_SIZE;
6746    offset = GET(cd->hwm, 0);    offset = GET(cd->hwm, 0);
6747    recno = GET(codestart, offset);    recno = GET(codestart, offset);
6748    groupptr = find_bracket(codestart, (re->options & PCRE_UTF8) != 0, recno);    groupptr = _pcre_find_bracket(codestart, utf8, recno);
6749    if (groupptr == NULL) errorcode = ERR53;    if (groupptr == NULL) errorcode = ERR53;
6750      else PUT(((uschar *)codestart), offset, groupptr - codestart);      else PUT(((uschar *)codestart), offset, groupptr - codestart);
6751    }    }
# Line 6520  subpattern. */ Line 6755  subpattern. */
6755    
6756  if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;  if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
6757    
6758    /* If there were any lookbehind assertions that contained OP_RECURSE
6759    (recursions or subroutine calls), a flag is set for them to be checked here,
6760    because they may contain forward references. Actual recursions can't be fixed
6761    length, but subroutine calls can. It is done like this so that those without
6762    OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The
6763    exceptional ones forgo this. We scan the pattern to check that they are fixed
6764    length, and set their lengths. */
6765    
6766    if (cd->check_lookbehind)
6767      {
6768      uschar *cc = (uschar *)codestart;
6769    
6770      /* Loop, searching for OP_REVERSE items, and process those that do not have
6771      their length set. (Actually, it will also re-process any that have a length
6772      of zero, but that is a pathological case, and it does no harm.) When we find
6773      one, we temporarily terminate the branch it is in while we scan it. */
6774    
6775      for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1);
6776           cc != NULL;
6777           cc = (uschar *)_pcre_find_bracket(cc, utf8, -1))
6778        {
6779        if (GET(cc, 1) == 0)
6780          {
6781          int fixed_length;
6782          uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
6783          int end_op = *be;
6784          *be = OP_END;
6785          fixed_length = find_fixedlength(cc, re->options, TRUE, cd);
6786          *be = end_op;
6787          DPRINTF(("fixed length = %d\n", fixed_length));
6788          if (fixed_length < 0)
6789            {
6790            errorcode = (fixed_length == -2)? ERR36 : ERR25;
6791            break;
6792            }
6793          PUT(cc, 1, fixed_length);
6794          }
6795        cc += 1 + LINK_SIZE;
6796        }
6797      }
6798    
6799  /* Failed to compile, or error while post-processing */  /* Failed to compile, or error while post-processing */
6800    
6801  if (errorcode != 0)  if (errorcode != 0)
# Line 6580  if (reqbyte >= 0 && Line 6856  if (reqbyte >= 0 &&
6856  /* Print out the compiled data if debugging is enabled. This is never the  /* Print out the compiled data if debugging is enabled. This is never the
6857  case when building a production library. */  case when building a production library. */
6858    
6859  #ifdef DEBUG  #ifdef PCRE_DEBUG
   
6860  printf("Length = %d top_bracket = %d top_backref = %d\n",  printf("Length = %d top_bracket = %d top_backref = %d\n",
6861    length, re->top_bracket, re->top_backref);    length, re->top_bracket, re->top_backref);
6862    
# Line 6618  if (code - codestart > length) Line 6893  if (code - codestart > length)
6893    if (errorcodeptr != NULL) *errorcodeptr = ERR23;    if (errorcodeptr != NULL) *errorcodeptr = ERR23;
6894    return NULL;    return NULL;
6895    }    }
6896  #endif   /* DEBUG */  #endif   /* PCRE_DEBUG */
6897    
6898  return (pcre *)re;  return (pcre *)re;
6899  }  }

Legend:
Removed from v.438  
changed lines
  Added in v.507

  ViewVC Help
Powered by ViewVC 1.1.5