/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 488 by ph10, Mon Jan 11 15:29:42 2010 UTC revision 505 by ph10, Tue Mar 9 16:50:47 2010 UTC
# Line 92  is 4 there is plenty of room. */ Line 92  is 4 there is plenty of room. */
92    
93  #define COMPILE_WORK_SIZE (4096)  #define COMPILE_WORK_SIZE (4096)
94    
95    /* The overrun tests check for a slightly smaller size so that they detect the
96    overrun before it actually does run off the end of the data block. */
97    
98    #define WORK_SIZE_CHECK (COMPILE_WORK_SIZE - 100)
99    
100    
101  /* Table for handling escaped characters in the range '0'-'z'. Positive returns  /* Table for handling escaped characters in the range '0'-'z'. Positive returns
102  are simple data values; negative values are for special things like \d and so  are simple data values; negative values are for special things like \d and so
# Line 263  the number of relocations needed when a Line 268  the number of relocations needed when a
268  it is now one long string. We cannot use a table of offsets, because the  it is now one long string. We cannot use a table of offsets, because the
269  lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we  lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we
270  simply count through to the one we want - this isn't a performance issue  simply count through to the one we want - this isn't a performance issue
271  because these strings are used only when there is a compilation error. */  because these strings are used only when there is a compilation error.
272    
273    Each substring ends with \0 to insert a null character. This includes the final
274    substring, so that the whole string ends with \0\0, which can be detected when
275    counting through. */
276    
277  static const char error_texts[] =  static const char error_texts[] =
278    "no error\0"    "no error\0"
# Line 344  static const char error_texts[] = Line 353  static const char error_texts[] =
353    "digit expected after (?+\0"    "digit expected after (?+\0"
354    "] is an invalid data character in JavaScript compatibility mode\0"    "] is an invalid data character in JavaScript compatibility mode\0"
355    /* 65 */    /* 65 */
356    "different names for subpatterns of the same number are not allowed";    "different names for subpatterns of the same number are not allowed\0";
   
357    
358  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
359  patterns. Note that the tables in chartables are dependent on the locale, and  patterns. Note that the tables in chartables are dependent on the locale, and
# Line 503  static const char * Line 511  static const char *
511  find_error_text(int n)  find_error_text(int n)
512  {  {
513  const char *s = error_texts;  const char *s = error_texts;
514  for (; n > 0; n--) while (*s++ != 0) {};  for (; n > 0; n--)
515      {
516      while (*s++ != 0) {};
517      if (*s == 0) return "Error text not found (please report)";
518      }
519  return s;  return s;
520  }  }
521    
# Line 1443  for (;;) Line 1455  for (;;)
1455      case OP_CALLOUT:      case OP_CALLOUT:
1456      case OP_SOD:      case OP_SOD:
1457      case OP_SOM:      case OP_SOM:
1458        case OP_SET_SOM:
1459      case OP_EOD:      case OP_EOD:
1460      case OP_EODN:      case OP_EODN:
1461      case OP_CIRC:      case OP_CIRC:
# Line 1777  Arguments: Line 1790  Arguments:
1790    code        points to start of search    code        points to start of search
1791    endcode     points to where to stop    endcode     points to where to stop
1792    utf8        TRUE if in UTF8 mode    utf8        TRUE if in UTF8 mode
1793      cd          contains pointers to tables etc.
1794    
1795  Returns:      TRUE if what is matched could be empty  Returns:      TRUE if what is matched could be empty
1796  */  */
1797    
1798  static BOOL  static BOOL
1799  could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8)  could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8,
1800      compile_data *cd)
1801  {  {
1802  register int c;  register int c;
1803  for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);  for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);
# Line 1792  for (code = first_significant_code(code Line 1807  for (code = first_significant_code(code
1807    const uschar *ccode;    const uschar *ccode;
1808    
1809    c = *code;    c = *code;
1810    
1811    /* Skip over forward assertions; the other assertions are skipped by    /* Skip over forward assertions; the other assertions are skipped by
1812    first_significant_code() with a TRUE final argument. */    first_significant_code() with a TRUE final argument. */
1813    
# Line 1812  for (code = first_significant_code(code Line 1827  for (code = first_significant_code(code
1827      c = *code;      c = *code;
1828      continue;      continue;
1829      }      }
1830    
1831      /* For a recursion/subroutine call, if its end has been reached, which
1832      implies a subroutine call, we can scan it. */
1833    
1834      if (c == OP_RECURSE)
1835        {
1836        BOOL empty_branch = FALSE;
1837        const uschar *scode = cd->start_code + GET(code, 1);
1838        if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
1839        do
1840          {
1841          if (could_be_empty_branch(scode, endcode, utf8, cd))
1842            {
1843            empty_branch = TRUE;
1844            break;
1845            }
1846          scode += GET(scode, 1);
1847          }
1848        while (*scode == OP_ALT);
1849        if (!empty_branch) return FALSE;  /* All branches are non-empty */
1850        continue;
1851        }
1852    
1853    /* For other groups, scan the branches. */    /* For other groups, scan the branches. */
1854    
# Line 1831  for (code = first_significant_code(code Line 1868  for (code = first_significant_code(code
1868        empty_branch = FALSE;        empty_branch = FALSE;
1869        do        do
1870          {          {
1871          if (!empty_branch && could_be_empty_branch(code, endcode, utf8))          if (!empty_branch && could_be_empty_branch(code, endcode, utf8, cd))
1872            empty_branch = TRUE;            empty_branch = TRUE;
1873          code += GET(code, 1);          code += GET(code, 1);
1874          }          }
# Line 1965  for (code = first_significant_code(code Line 2002  for (code = first_significant_code(code
2002      if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];      if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
2003      break;      break;
2004  #endif  #endif
2005    
2006        /* None of the remaining opcodes are required to match a character. */
2007    
2008        default:
2009        break;
2010      }      }
2011    }    }
2012    
# Line 1987  Arguments: Line 2029  Arguments:
2029    endcode     points to where to stop (current RECURSE item)    endcode     points to where to stop (current RECURSE item)
2030    bcptr       points to the chain of current (unclosed) branch starts    bcptr       points to the chain of current (unclosed) branch starts
2031    utf8        TRUE if in UTF-8 mode    utf8        TRUE if in UTF-8 mode
2032      cd          pointers to tables etc
2033    
2034  Returns:      TRUE if what is matched could be empty  Returns:      TRUE if what is matched could be empty
2035  */  */
2036    
2037  static BOOL  static BOOL
2038  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
2039    BOOL utf8)    BOOL utf8, compile_data *cd)
2040  {  {
2041  while (bcptr != NULL && bcptr->current_branch >= code)  while (bcptr != NULL && bcptr->current_branch >= code)
2042    {    {
2043    if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8))    if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8, cd))
2044      return FALSE;      return FALSE;
2045    bcptr = bcptr->outer;    bcptr = bcptr->outer;
2046    }    }
# Line 2722  for (;; ptr++) Line 2765  for (;; ptr++)
2765  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
2766      if (code > cd->hwm) cd->hwm = code;                 /* High water info */      if (code > cd->hwm) cd->hwm = code;                 /* High water info */
2767  #endif  #endif
2768      if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */      if (code > cd->start_workspace + WORK_SIZE_CHECK)   /* Check for overrun */
2769        {        {
2770        *errorcodeptr = ERR52;        *errorcodeptr = ERR52;
2771        goto FAILED;        goto FAILED;
# Line 2771  for (;; ptr++) Line 2814  for (;; ptr++)
2814    /* In the real compile phase, just check the workspace used by the forward    /* In the real compile phase, just check the workspace used by the forward
2815    reference list. */    reference list. */
2816    
2817    else if (cd->hwm > cd->start_workspace + COMPILE_WORK_SIZE)    else if (cd->hwm > cd->start_workspace + WORK_SIZE_CHECK)
2818      {      {
2819      *errorcodeptr = ERR52;      *errorcodeptr = ERR52;
2820      goto FAILED;      goto FAILED;
# Line 4355  we set the flag only if there is a liter Line 4398  we set the flag only if there is a liter
4398            uschar *scode = bracode;            uschar *scode = bracode;
4399            do            do
4400              {              {
4401              if (could_be_empty_branch(scode, ketcode, utf8))              if (could_be_empty_branch(scode, ketcode, utf8, cd))
4402                {                {
4403                *bracode += OP_SBRA - OP_BRA;                *bracode += OP_SBRA - OP_BRA;
4404                break;                break;
# Line 4430  we set the flag only if there is a liter Line 4473  we set the flag only if there is a liter
4473          case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break;          case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break;
4474          case OP_NOTUPTO:  *tempcode = OP_NOTPOSUPTO; break;          case OP_NOTUPTO:  *tempcode = OP_NOTPOSUPTO; break;
4475    
4476            /* Because we are moving code along, we must ensure that any
4477            pending recursive references are updated. */
4478    
4479          default:          default:
4480            *code = OP_END;
4481            adjust_recurse(tempcode, 1 + LINK_SIZE, utf8, cd, save_hwm);
4482          memmove(tempcode + 1+LINK_SIZE, tempcode, len);          memmove(tempcode + 1+LINK_SIZE, tempcode, len);
4483          code += 1 + LINK_SIZE;          code += 1 + LINK_SIZE;
4484          len += 1 + LINK_SIZE;          len += 1 + LINK_SIZE;
# Line 5149  we set the flag only if there is a liter Line 5197  we set the flag only if there is a liter
5197                  *errorcodeptr = ERR15;                  *errorcodeptr = ERR15;
5198                  goto FAILED;                  goto FAILED;
5199                  }                  }
5200    
5201                  /* Fudge the value of "called" so that when it is inserted as an
5202                  offset below, what it actually inserted is the reference number
5203                  of the group. */
5204    
5205                called = cd->start_code + recno;                called = cd->start_code + recno;
5206                PUTINC(cd->hwm, 0, code + 2 + LINK_SIZE - cd->start_code);                PUTINC(cd->hwm, 0, code + 2 + LINK_SIZE - cd->start_code);
5207                }                }
# Line 5158  we set the flag only if there is a liter Line 5211  we set the flag only if there is a liter
5211              recursion that could loop for ever, and diagnose that case. */              recursion that could loop for ever, and diagnose that case. */
5212    
5213              else if (GET(called, 1) == 0 &&              else if (GET(called, 1) == 0 &&
5214                       could_be_empty(called, code, bcptr, utf8))                       could_be_empty(called, code, bcptr, utf8, cd))
5215                {                {
5216                *errorcodeptr = ERR40;                *errorcodeptr = ERR40;
5217                goto FAILED;                goto FAILED;
# Line 6804  if (reqbyte >= 0 && Line 6857  if (reqbyte >= 0 &&
6857  case when building a production library. */  case when building a production library. */
6858    
6859  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
   
6860  printf("Length = %d top_bracket = %d top_backref = %d\n",  printf("Length = %d top_bracket = %d top_backref = %d\n",
6861    length, re->top_bracket, re->top_backref);    length, re->top_bracket, re->top_backref);
6862    

Legend:
Removed from v.488  
changed lines
  Added in v.505

  ViewVC Help
Powered by ViewVC 1.1.5