/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 295 by ph10, Mon Dec 31 17:00:24 2007 UTC revision 305 by ph10, Sun Jan 20 20:07:32 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 1738  return TRUE; Line 1738  return TRUE;
1738    
1739  /* This function is called when the sequence "[:" or "[." or "[=" is  /* This function is called when the sequence "[:" or "[." or "[=" is
1740  encountered in a character class. It checks whether this is followed by a  encountered in a character class. It checks whether this is followed by a
1741  sequence of characters terminated by a matching ":]" or ".]" or "=]". If we  sequence of characters terminated by a matching ":]" or ".]" or "=]". If we
1742  reach an unescaped ']' without the special preceding character, return FALSE.  reach an unescaped ']' without the special preceding character, return FALSE.
1743    
1744  Originally, this function only recognized a sequence of letters between the  Originally, this function only recognized a sequence of letters between the
1745  terminators, but it seems that Perl recognizes any sequence of characters,  terminators, but it seems that Perl recognizes any sequence of characters,
1746  though of course unknown POSIX names are subsequently rejected. Perl gives an  though of course unknown POSIX names are subsequently rejected. Perl gives an
1747  "Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE  "Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE
1748  didn't consider this to be a POSIX class. Likewise for [:1234:].  didn't consider this to be a POSIX class. Likewise for [:1234:].
1749    
1750  The problem in trying to be exactly like Perl is in the handling of escapes. We  The problem in trying to be exactly like Perl is in the handling of escapes. We
1751  have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX  have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
1752  class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code  class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
1753  below handles the special case of \], but does not try to do any other escape  below handles the special case of \], but does not try to do any other escape
1754  processing. This makes it different from Perl for cases such as [:l\ower:]  processing. This makes it different from Perl for cases such as [:l\ower:]
1755  where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize  where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize
1756  "l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,  "l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
1757  I think.  I think.
1758    
1759  Arguments:  Arguments:
# Line 1771  terminator = *(++ptr);   /* compiler war Line 1771  terminator = *(++ptr);   /* compiler war
1771  for (++ptr; *ptr != 0; ptr++)  for (++ptr; *ptr != 0; ptr++)
1772    {    {
1773    if (*ptr == '\\' && ptr[1] == ']') ptr++; else    if (*ptr == '\\' && ptr[1] == ']') ptr++; else
1774      {      {
1775      if (*ptr == ']') return FALSE;      if (*ptr == ']') return FALSE;
1776      if (*ptr == terminator && ptr[1] == ']')      if (*ptr == terminator && ptr[1] == ']')
1777        {        {
1778        *endptr = ptr;        *endptr = ptr;
1779        return TRUE;        return TRUE;
1780        }        }
1781      }      }
1782    }    }
1783  return FALSE;  return FALSE;
1784  }  }
1785    
# Line 2376  uschar classbits[32]; Line 2376  uschar classbits[32];
2376  BOOL class_utf8;  BOOL class_utf8;
2377  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
2378  uschar *class_utf8data;  uschar *class_utf8data;
2379    uschar *class_utf8data_base;
2380  uschar utf8_char[6];  uschar utf8_char[6];
2381  #else  #else
2382  BOOL utf8 = FALSE;  BOOL utf8 = FALSE;
# Line 2687  for (;; ptr++) Line 2688  for (;; ptr++)
2688  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2689      class_utf8 = FALSE;                       /* No chars >= 256 */      class_utf8 = FALSE;                       /* No chars >= 256 */
2690      class_utf8data = code + LINK_SIZE + 2;    /* For UTF-8 items */      class_utf8data = code + LINK_SIZE + 2;    /* For UTF-8 items */
2691        class_utf8data_base = class_utf8data;     /* For resetting in pass 1 */
2692  #endif  #endif
2693    
2694      /* Process characters until ] is reached. By writing this as a "do" it      /* Process characters until ] is reached. By writing this as a "do" it
# Line 2702  for (;; ptr++) Line 2704  for (;; ptr++)
2704          {                           /* Braces are required because the */          {                           /* Braces are required because the */
2705          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
2706          }          }
2707    
2708          /* In the pre-compile phase, accumulate the length of any UTF-8 extra
2709          data and reset the pointer. This is so that very large classes that
2710          contain a zillion UTF-8 characters no longer overwrite the work space
2711          (which is on the stack). */
2712    
2713          if (lengthptr != NULL)
2714            {
2715            *lengthptr += class_utf8data - class_utf8data_base;
2716            class_utf8data = class_utf8data_base;
2717            }
2718    
2719  #endif  #endif
2720    
2721        /* Inside \Q...\E everything is literal except \E */        /* Inside \Q...\E everything is literal except \E */
# Line 5807  to fill in forward references to subpatt Line 5821  to fill in forward references to subpatt
5821    
5822  uschar cworkspace[COMPILE_WORK_SIZE];  uschar cworkspace[COMPILE_WORK_SIZE];
5823    
   
5824  /* Set this early so that early errors get offset 0. */  /* Set this early so that early errors get offset 0. */
5825    
5826  ptr = (const uschar *)pattern;  ptr = (const uschar *)pattern;

Legend:
Removed from v.295  
changed lines
  Added in v.305

  ViewVC Help
Powered by ViewVC 1.1.5