8 |
|
|
9 |
Written by: Philip Hazel <ph10@cam.ac.uk> |
Written by: Philip Hazel <ph10@cam.ac.uk> |
10 |
|
|
11 |
Copyright (c) 1997-1999 University of Cambridge |
Copyright (c) 1997-2003 University of Cambridge |
12 |
|
|
13 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
14 |
Permission is granted to anyone to use this software for any purpose on any |
Permission is granted to anyone to use this software for any purpose on any |
58 |
Returns: pointer to the contiguous block of data |
Returns: pointer to the contiguous block of data |
59 |
*/ |
*/ |
60 |
|
|
61 |
unsigned const char * |
const unsigned char * |
62 |
pcre_maketables(void) |
pcre_maketables(void) |
63 |
{ |
{ |
64 |
unsigned char *yield, *p; |
unsigned char *yield, *p; |
81 |
|
|
82 |
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i); |
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i); |
83 |
|
|
84 |
/* Then the character class tables */ |
/* Then the character class tables. Don't try to be clever and save effort |
85 |
|
on exclusive ones - in some locales things may be different. Note that the |
86 |
|
table for "space" includes everything "isspace" gives, including VT in the |
87 |
|
default locale. This makes it work for the POSIX class [:space:]. */ |
88 |
|
|
89 |
memset(p, 0, cbit_length); |
memset(p, 0, cbit_length); |
90 |
for (i = 0; i < 256; i++) |
for (i = 0; i < 256; i++) |
91 |
{ |
{ |
92 |
if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7); |
if (isdigit(i)) |
93 |
if (isalnum(i) || i == '_') |
{ |
94 |
p[cbit_word + i/8] |= 1 << (i&7); |
p[cbit_digit + i/8] |= 1 << (i&7); |
95 |
|
p[cbit_word + i/8] |= 1 << (i&7); |
96 |
|
} |
97 |
|
if (isupper(i)) |
98 |
|
{ |
99 |
|
p[cbit_upper + i/8] |= 1 << (i&7); |
100 |
|
p[cbit_word + i/8] |= 1 << (i&7); |
101 |
|
} |
102 |
|
if (islower(i)) |
103 |
|
{ |
104 |
|
p[cbit_lower + i/8] |= 1 << (i&7); |
105 |
|
p[cbit_word + i/8] |= 1 << (i&7); |
106 |
|
} |
107 |
|
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7); |
108 |
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7); |
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7); |
109 |
|
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7); |
110 |
|
if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7); |
111 |
|
if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7); |
112 |
|
if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7); |
113 |
|
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7); |
114 |
} |
} |
115 |
p += cbit_length; |
p += cbit_length; |
116 |
|
|
117 |
/* Finally, the character type table */ |
/* Finally, the character type table. In this, we exclude VT from the white |
118 |
|
space chars, because Perl doesn't recognize it as such for \s and for comments |
119 |
|
within regexes. */ |
120 |
|
|
121 |
for (i = 0; i < 256; i++) |
for (i = 0; i < 256; i++) |
122 |
{ |
{ |
123 |
int x = 0; |
int x = 0; |
124 |
if (isspace(i)) x += ctype_space; |
if (i != 0x0b && isspace(i)) x += ctype_space; |
125 |
if (isalpha(i)) x += ctype_letter; |
if (isalpha(i)) x += ctype_letter; |
126 |
if (isdigit(i)) x += ctype_digit; |
if (isdigit(i)) x += ctype_digit; |
127 |
if (isxdigit(i)) x += ctype_xdigit; |
if (isxdigit(i)) x += ctype_xdigit; |