/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 496 by ph10, Tue Mar 2 19:11:17 2010 UTC revision 504 by ph10, Mon Mar 8 08:57:04 2010 UTC
# Line 263  the number of relocations needed when a Line 263  the number of relocations needed when a
263  it is now one long string. We cannot use a table of offsets, because the  it is now one long string. We cannot use a table of offsets, because the
264  lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we  lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we
265  simply count through to the one we want - this isn't a performance issue  simply count through to the one we want - this isn't a performance issue
266  because these strings are used only when there is a compilation error. */  because these strings are used only when there is a compilation error.
267    
268    Each substring ends with \0 to insert a null character. This includes the final
269    substring, so that the whole string ends with \0\0, which can be detected when
270    counting through. */
271    
272  static const char error_texts[] =  static const char error_texts[] =
273    "no error\0"    "no error\0"
# Line 344  static const char error_texts[] = Line 348  static const char error_texts[] =
348    "digit expected after (?+\0"    "digit expected after (?+\0"
349    "] is an invalid data character in JavaScript compatibility mode\0"    "] is an invalid data character in JavaScript compatibility mode\0"
350    /* 65 */    /* 65 */
351    "different names for subpatterns of the same number are not allowed";    "different names for subpatterns of the same number are not allowed\0";
   
352    
353  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
354  patterns. Note that the tables in chartables are dependent on the locale, and  patterns. Note that the tables in chartables are dependent on the locale, and
# Line 503  static const char * Line 506  static const char *
506  find_error_text(int n)  find_error_text(int n)
507  {  {
508  const char *s = error_texts;  const char *s = error_texts;
509  for (; n > 0; n--) while (*s++ != 0) {};  for (; n > 0; n--)
510      {
511      while (*s++ != 0) {};
512      if (*s == 0) return "Error text not found (please report)";
513      }
514  return s;  return s;
515  }  }
516    
# Line 1443  for (;;) Line 1450  for (;;)
1450      case OP_CALLOUT:      case OP_CALLOUT:
1451      case OP_SOD:      case OP_SOD:
1452      case OP_SOM:      case OP_SOM:
1453        case OP_SET_SOM:
1454      case OP_EOD:      case OP_EOD:
1455      case OP_EODN:      case OP_EODN:
1456      case OP_CIRC:      case OP_CIRC:
# Line 1777  Arguments: Line 1785  Arguments:
1785    code        points to start of search    code        points to start of search
1786    endcode     points to where to stop    endcode     points to where to stop
1787    utf8        TRUE if in UTF8 mode    utf8        TRUE if in UTF8 mode
1788      cd          contains pointers to tables etc.
1789    
1790  Returns:      TRUE if what is matched could be empty  Returns:      TRUE if what is matched could be empty
1791  */  */
1792    
1793  static BOOL  static BOOL
1794  could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8)  could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8,
1795      compile_data *cd)
1796  {  {
1797  register int c;  register int c;
1798  for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);  for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);
# Line 1792  for (code = first_significant_code(code Line 1802  for (code = first_significant_code(code
1802    const uschar *ccode;    const uschar *ccode;
1803    
1804    c = *code;    c = *code;
1805    
1806    /* Skip over forward assertions; the other assertions are skipped by    /* Skip over forward assertions; the other assertions are skipped by
1807    first_significant_code() with a TRUE final argument. */    first_significant_code() with a TRUE final argument. */
1808    
# Line 1812  for (code = first_significant_code(code Line 1822  for (code = first_significant_code(code
1822      c = *code;      c = *code;
1823      continue;      continue;
1824      }      }
1825    
1826      /* For a recursion/subroutine call, if its end has been reached, which
1827      implies a subroutine call, we can scan it. */
1828    
1829      if (c == OP_RECURSE)
1830        {
1831        BOOL empty_branch = FALSE;
1832        const uschar *scode = cd->start_code + GET(code, 1);
1833        if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
1834        do
1835          {
1836          if (could_be_empty_branch(scode, endcode, utf8, cd))
1837            {
1838            empty_branch = TRUE;
1839            break;
1840            }
1841          scode += GET(scode, 1);
1842          }
1843        while (*scode == OP_ALT);
1844        if (!empty_branch) return FALSE;  /* All branches are non-empty */
1845        continue;
1846        }
1847    
1848    /* For other groups, scan the branches. */    /* For other groups, scan the branches. */
1849    
# Line 1831  for (code = first_significant_code(code Line 1863  for (code = first_significant_code(code
1863        empty_branch = FALSE;        empty_branch = FALSE;
1864        do        do
1865          {          {
1866          if (!empty_branch && could_be_empty_branch(code, endcode, utf8))          if (!empty_branch && could_be_empty_branch(code, endcode, utf8, cd))
1867            empty_branch = TRUE;            empty_branch = TRUE;
1868          code += GET(code, 1);          code += GET(code, 1);
1869          }          }
# Line 1965  for (code = first_significant_code(code Line 1997  for (code = first_significant_code(code
1997      if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];      if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
1998      break;      break;
1999  #endif  #endif
2000    
2001        /* None of the remaining opcodes are required to match a character. */
2002    
2003        default:
2004        break;
2005      }      }
2006    }    }
2007    
# Line 1987  Arguments: Line 2024  Arguments:
2024    endcode     points to where to stop (current RECURSE item)    endcode     points to where to stop (current RECURSE item)
2025    bcptr       points to the chain of current (unclosed) branch starts    bcptr       points to the chain of current (unclosed) branch starts
2026    utf8        TRUE if in UTF-8 mode    utf8        TRUE if in UTF-8 mode
2027      cd          pointers to tables etc
2028    
2029  Returns:      TRUE if what is matched could be empty  Returns:      TRUE if what is matched could be empty
2030  */  */
2031    
2032  static BOOL  static BOOL
2033  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
2034    BOOL utf8)    BOOL utf8, compile_data *cd)
2035  {  {
2036  while (bcptr != NULL && bcptr->current_branch >= code)  while (bcptr != NULL && bcptr->current_branch >= code)
2037    {    {
2038    if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8))    if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8, cd))
2039      return FALSE;      return FALSE;
2040    bcptr = bcptr->outer;    bcptr = bcptr->outer;
2041    }    }
# Line 4355  we set the flag only if there is a liter Line 4393  we set the flag only if there is a liter
4393            uschar *scode = bracode;            uschar *scode = bracode;
4394            do            do
4395              {              {
4396              if (could_be_empty_branch(scode, ketcode, utf8))              if (could_be_empty_branch(scode, ketcode, utf8, cd))
4397                {                {
4398                *bracode += OP_SBRA - OP_BRA;                *bracode += OP_SBRA - OP_BRA;
4399                break;                break;
# Line 5168  we set the flag only if there is a liter Line 5206  we set the flag only if there is a liter
5206              recursion that could loop for ever, and diagnose that case. */              recursion that could loop for ever, and diagnose that case. */
5207    
5208              else if (GET(called, 1) == 0 &&              else if (GET(called, 1) == 0 &&
5209                       could_be_empty(called, code, bcptr, utf8))                       could_be_empty(called, code, bcptr, utf8, cd))
5210                {                {
5211                *errorcodeptr = ERR40;                *errorcodeptr = ERR40;
5212                goto FAILED;                goto FAILED;

Legend:
Removed from v.496  
changed lines
  Added in v.504

  ViewVC Help
Powered by ViewVC 1.1.5