/[pcre]/code/trunk/pcre_xclass.c
ViewVC logotype

Diff of /code/trunk/pcre_xclass.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 236 by ph10, Tue Sep 11 12:57:06 2007 UTC revision 1376 by ph10, Sat Oct 12 18:02:11 2013 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 39  POSSIBILITY OF SUCH DAMAGE. Line 39  POSSIBILITY OF SUCH DAMAGE.
39    
40    
41  /* This module contains an internal function that is used to match an extended  /* This module contains an internal function that is used to match an extended
42  class (one that contains characters whose values are > 255). It is used by both  class. It is used by both pcre_exec() and pcre_def_exec(). */
 pcre_exec() and pcre_def_exec(). */  
43    
44    
45  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
# Line 55  pcre_exec() and pcre_def_exec(). */ Line 54  pcre_exec() and pcre_def_exec(). */
54  *************************************************/  *************************************************/
55    
56  /* This function is called to match a character against an extended class that  /* This function is called to match a character against an extended class that
57  might contain values > 255.  might contain values > 255 and/or Unicode properties.
58    
59  Arguments:  Arguments:
60    c           the character    c           the character
# Line 65  Returns:      TRUE if character matches, Line 64  Returns:      TRUE if character matches,
64  */  */
65    
66  BOOL  BOOL
67  _pcre_xclass(int c, const uschar *data)  PRIV(xclass)(pcre_uint32 c, const pcre_uchar *data, BOOL utf)
68  {  {
69  int t;  pcre_uchar t;
70  BOOL negated = (*data & XCL_NOT) != 0;  BOOL negated = (*data & XCL_NOT) != 0;
71    
72    (void)utf;
73    #ifdef COMPILE_PCRE8
74    /* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
75    utf = TRUE;
76    #endif
77    
78  /* Character values < 256 are matched against a bitmap, if one is present. If  /* Character values < 256 are matched against a bitmap, if one is present. If
79  not, we still carry on, because there may be ranges that start below 256 in the  not, we still carry on, because there may be ranges that start below 256 in the
80  additional data. */  additional data. */
81    
82  if (c < 256)  if (c < 256)
83    {    {
84    if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)    if ((*data & XCL_MAP) != 0 &&
85      return !negated;   /* char found */      (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0)
86        return !negated; /* char found */
87    }    }
88    
89  /* First skip the bit map if present. Then match against the list of Unicode  /* First skip the bit map if present. Then match against the list of Unicode
90  properties or large chars or ranges that end with a large char. We won't ever  properties or large chars or ranges that end with a large char. We won't ever
91  encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */  encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
92    
93  if ((*data++ & XCL_MAP) != 0) data += 32;  if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre_uchar);
94    
95  while ((t = *data++) != XCL_END)  while ((t = *data++) != XCL_END)
96    {    {
97    int x, y;    pcre_uint32 x, y;
98    if (t == XCL_SINGLE)    if (t == XCL_SINGLE)
99      {      {
100      GETCHARINC(x, data);  #ifdef SUPPORT_UTF
101        if (utf)
102          {
103          GETCHARINC(x, data); /* macro generates multiple statements */
104          }
105        else
106    #endif
107          x = *data++;
108      if (c == x) return !negated;      if (c == x) return !negated;
109      }      }
110    else if (t == XCL_RANGE)    else if (t == XCL_RANGE)
111      {      {
112      GETCHARINC(x, data);  #ifdef SUPPORT_UTF
113      GETCHARINC(y, data);      if (utf)
114          {
115          GETCHARINC(x, data); /* macro generates multiple statements */
116          GETCHARINC(y, data); /* macro generates multiple statements */
117          }
118        else
119    #endif
120          {
121          x = *data++;
122          y = *data++;
123          }
124      if (c >= x && c <= y) return !negated;      if (c >= x && c <= y) return !negated;
125      }      }
126    
127  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
128    else  /* XCL_PROP & XCL_NOTPROP */    else  /* XCL_PROP & XCL_NOTPROP */
129      {      {
130      int chartype, script;      const ucd_record *prop = GET_UCD(c);
     int category = _pcre_ucp_findprop(c, &chartype, &script);  
131    
132      switch(*data)      switch(*data)
133        {        {
# Line 114  while ((t = *data++) != XCL_END) Line 136  while ((t = *data++) != XCL_END)
136        break;        break;
137    
138        case PT_LAMP:        case PT_LAMP:
139        if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) ==        if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
140            (t == XCL_PROP)) return !negated;             prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
141        break;        break;
142    
143        case PT_GC:        case PT_GC:
144        if ((data[1] == category) == (t == XCL_PROP)) return !negated;        if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))
145            return !negated;
146        break;        break;
147    
148        case PT_PC:        case PT_PC:
149        if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;        if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
150        break;        break;
151    
152        case PT_SC:        case PT_SC:
153        if ((data[1] == script) == (t == XCL_PROP)) return !negated;        if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
154          break;
155    
156          case PT_ALNUM:
157          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
158               PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))
159            return !negated;
160          break;
161    
162          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
163          which means that Perl space and POSIX space are now identical. PCRE
164          was changed at release 8.34. */
165    
166          case PT_SPACE:    /* Perl space */
167          case PT_PXSPACE:  /* POSIX space */
168          switch(c)
169            {
170            HSPACE_CASES:
171            VSPACE_CASES:
172            if (t == XCL_PROP) return !negated;
173            break;
174    
175            default:
176            if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == (t == XCL_PROP))
177              return !negated;
178            break;
179            }
180          break;
181    
182          case PT_WORD:
183          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
184               PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
185                 == (t == XCL_PROP))
186            return !negated;
187          break;
188    
189          case PT_UCNC:
190          if (c < 0xa0)
191            {
192            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
193                 c == CHAR_GRAVE_ACCENT) == (t == XCL_PROP))
194              return !negated;
195            }
196          else
197            {
198            if ((c < 0xd800 || c > 0xdfff) == (t == XCL_PROP))
199              return !negated;
200            }
201        break;        break;
202    
203        /* This should never occur, but compilers may mutter if there is no        /* This should never occur, but compilers may mutter if there is no

Legend:
Removed from v.236  
changed lines
  Added in v.1376

  ViewVC Help
Powered by ViewVC 1.1.5