/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 286 by ph10, Mon Dec 17 14:46:11 2007 UTC revision 323 by ph10, Wed Mar 5 17:23:42 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 1737  return TRUE; Line 1737  return TRUE;
1737  *************************************************/  *************************************************/
1738    
1739  /* This function is called when the sequence "[:" or "[." or "[=" is  /* This function is called when the sequence "[:" or "[." or "[=" is
1740  encountered in a character class. It checks whether this is followed by an  encountered in a character class. It checks whether this is followed by a
1741  optional ^ and then a sequence of letters, terminated by a matching ":]" or  sequence of characters terminated by a matching ":]" or ".]" or "=]". If we
1742  ".]" or "=]".  reach an unescaped ']' without the special preceding character, return FALSE.
1743    
1744    Originally, this function only recognized a sequence of letters between the
1745    terminators, but it seems that Perl recognizes any sequence of characters,
1746    though of course unknown POSIX names are subsequently rejected. Perl gives an
1747    "Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE
1748    didn't consider this to be a POSIX class. Likewise for [:1234:].
1749    
1750    The problem in trying to be exactly like Perl is in the handling of escapes. We
1751    have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
1752    class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
1753    below handles the special case of \], but does not try to do any other escape
1754    processing. This makes it different from Perl for cases such as [:l\ower:]
1755    where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize
1756    "l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
1757    I think.
1758    
1759  Argument:  Arguments:
1760    ptr      pointer to the initial [    ptr      pointer to the initial [
1761    endptr   where to return the end pointer    endptr   where to return the end pointer
   cd       pointer to compile data  
1762    
1763  Returns:   TRUE or FALSE  Returns:   TRUE or FALSE
1764  */  */
1765    
1766  static BOOL  static BOOL
1767  check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)  check_posix_syntax(const uschar *ptr, const uschar **endptr)
1768  {  {
1769  int terminator;          /* Don't combine these lines; the Solaris cc */  int terminator;          /* Don't combine these lines; the Solaris cc */
1770  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
1771  if (*(++ptr) == '^') ptr++;  for (++ptr; *ptr != 0; ptr++)
 while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;  
 if (*ptr == terminator && ptr[1] == ']')  
1772    {    {
1773    *endptr = ptr;    if (*ptr == '\\' && ptr[1] == ']') ptr++; else
1774    return TRUE;      {
1775        if (*ptr == ']') return FALSE;
1776        if (*ptr == terminator && ptr[1] == ']')
1777          {
1778          *endptr = ptr;
1779          return TRUE;
1780          }
1781        }
1782    }    }
1783  return FALSE;  return FALSE;
1784  }  }
# Line 2094  if (next >= 0) switch(op_code) Line 2113  if (next >= 0) switch(op_code)
2113    /* For OP_NOT, "item" must be a single-byte character. */    /* For OP_NOT, "item" must be a single-byte character. */
2114    
2115    case OP_NOT:    case OP_NOT:
   if (next < 0) return FALSE;  /* Not a character */  
2116    if (item == next) return TRUE;    if (item == next) return TRUE;
2117    if ((options & PCRE_CASELESS) == 0) return FALSE;    if ((options & PCRE_CASELESS) == 0) return FALSE;
2118  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2357  uschar classbits[32]; Line 2375  uschar classbits[32];
2375  BOOL class_utf8;  BOOL class_utf8;
2376  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
2377  uschar *class_utf8data;  uschar *class_utf8data;
2378    uschar *class_utf8data_base;
2379  uschar utf8_char[6];  uschar utf8_char[6];
2380  #else  #else
2381  BOOL utf8 = FALSE;  BOOL utf8 = FALSE;
# Line 2620  for (;; ptr++) Line 2639  for (;; ptr++)
2639      they are encountered at the top level, so we'll do that too. */      they are encountered at the top level, so we'll do that too. */
2640    
2641      if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&      if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
2642          check_posix_syntax(ptr, &tempptr, cd))          check_posix_syntax(ptr, &tempptr))
2643        {        {
2644        *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;        *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;
2645        goto FAILED;        goto FAILED;
# Line 2668  for (;; ptr++) Line 2687  for (;; ptr++)
2687  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2688      class_utf8 = FALSE;                       /* No chars >= 256 */      class_utf8 = FALSE;                       /* No chars >= 256 */
2689      class_utf8data = code + LINK_SIZE + 2;    /* For UTF-8 items */      class_utf8data = code + LINK_SIZE + 2;    /* For UTF-8 items */
2690        class_utf8data_base = class_utf8data;     /* For resetting in pass 1 */
2691  #endif  #endif
2692    
2693      /* Process characters until ] is reached. By writing this as a "do" it      /* Process characters until ] is reached. By writing this as a "do" it
# Line 2683  for (;; ptr++) Line 2703  for (;; ptr++)
2703          {                           /* Braces are required because the */          {                           /* Braces are required because the */
2704          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
2705          }          }
2706    
2707          /* In the pre-compile phase, accumulate the length of any UTF-8 extra
2708          data and reset the pointer. This is so that very large classes that
2709          contain a zillion UTF-8 characters no longer overwrite the work space
2710          (which is on the stack). */
2711    
2712          if (lengthptr != NULL)
2713            {
2714            *lengthptr += class_utf8data - class_utf8data_base;
2715            class_utf8data = class_utf8data_base;
2716            }
2717    
2718  #endif  #endif
2719    
2720        /* Inside \Q...\E everything is literal except \E */        /* Inside \Q...\E everything is literal except \E */
# Line 2706  for (;; ptr++) Line 2738  for (;; ptr++)
2738    
2739        if (c == '[' &&        if (c == '[' &&
2740            (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&            (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
2741            check_posix_syntax(ptr, &tempptr, cd))            check_posix_syntax(ptr, &tempptr))
2742          {          {
2743          BOOL local_negate = FALSE;          BOOL local_negate = FALSE;
2744          int posix_class, taboffset, tabopt;          int posix_class, taboffset, tabopt;
# Line 5788  to fill in forward references to subpatt Line 5820  to fill in forward references to subpatt
5820    
5821  uschar cworkspace[COMPILE_WORK_SIZE];  uschar cworkspace[COMPILE_WORK_SIZE];
5822    
   
5823  /* Set this early so that early errors get offset 0. */  /* Set this early so that early errors get offset 0. */
5824    
5825  ptr = (const uschar *)pattern;  ptr = (const uschar *)pattern;

Legend:
Removed from v.286  
changed lines
  Added in v.323

  ViewVC Help
Powered by ViewVC 1.1.5