/[pcre]/code/trunk/pcre_byte_order.c
ViewVC logotype

Contents of /code/trunk/pcre_byte_order.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1055 - (show annotations)
Tue Oct 16 15:53:30 2012 UTC (7 years, 2 months ago) by chpe
File MIME type: text/plain
File size: 9106 byte(s)
pcre32: Add 32-bit library

Create libpcre32 that operates on 32-bit characters (UTF-32).

This turned out to be surprisingly simple after the UTF-16 support
was introduced; mostly just extra ifdefs and adjusting and adding
some tests.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains an internal function that tests a compiled pattern to
42 see if it was compiled with the opposite endianness. If so, it uses an
43 auxiliary local function to flip the appropriate bytes. */
44
45
46 #ifdef HAVE_CONFIG_H
47 #include "config.h"
48 #endif
49
50 #include "pcre_internal.h"
51
52
53 /*************************************************
54 * Swap byte functions *
55 *************************************************/
56
57 /* The following functions swap the bytes of a pcre_uint16
58 and pcre_uint32 value.
59
60 Arguments:
61 value any number
62
63 Returns: the byte swapped value
64 */
65
66 static pcre_uint32
67 swap_uint32(pcre_uint32 value)
68 {
69 return ((value & 0x000000ff) << 24) |
70 ((value & 0x0000ff00) << 8) |
71 ((value & 0x00ff0000) >> 8) |
72 (value >> 24);
73 }
74
75 static pcre_uint16
76 swap_uint16(pcre_uint16 value)
77 {
78 return (value >> 8) | (value << 8);
79 }
80
81
82 /*************************************************
83 * Test for a byte-flipped compiled regex *
84 *************************************************/
85
86 /* This function swaps the bytes of a compiled pattern usually
87 loaded form the disk. It also sets the tables pointer, which
88 is likely an invalid pointer after reload.
89
90 Arguments:
91 argument_re points to the compiled expression
92 extra_data points to extra data or is NULL
93 tables points to the character tables or NULL
94
95 Returns: 0 if the swap is successful, negative on error
96 */
97
98 #if defined COMPILE_PCRE8
99 PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *argument_re,
100 pcre_extra *extra_data, const unsigned char *tables)
101 #elif defined COMPILE_PCRE16
102 PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *argument_re,
103 pcre16_extra *extra_data, const unsigned char *tables)
104 #elif defined COMPILE_PCRE32
105 PCRE_EXP_DECL int pcre32_pattern_to_host_byte_order(pcre32 *argument_re,
106 pcre32_extra *extra_data, const unsigned char *tables)
107 #endif
108 {
109 REAL_PCRE *re = (REAL_PCRE *)argument_re;
110 pcre_study_data *study;
111 #ifndef COMPILE_PCRE8
112 pcre_uchar *ptr;
113 int length;
114 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
115 BOOL utf;
116 BOOL utf16_char;
117 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
118 #endif /* !COMPILE_PCRE8 */
119
120 if (re == NULL) return PCRE_ERROR_NULL;
121 if (re->magic_number == MAGIC_NUMBER)
122 {
123 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
124 re->tables = tables;
125 return 0;
126 }
127
128 if (re->magic_number != REVERSED_MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
129 if ((swap_uint16(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
130
131 re->magic_number = MAGIC_NUMBER;
132 re->size = swap_uint32(re->size);
133 re->options = swap_uint32(re->options);
134 re->flags = swap_uint16(re->flags);
135 re->top_bracket = swap_uint16(re->top_bracket);
136 re->top_backref = swap_uint16(re->top_backref);
137 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
138 re->first_char = swap_uint16(re->first_char);
139 re->req_char = swap_uint16(re->req_char);
140 #elif defined COMPILE_PCRE32
141 re->first_char = swap_uint32(re->first_char);
142 re->req_char = swap_uint32(re->req_char);
143 #endif
144 re->name_table_offset = swap_uint16(re->name_table_offset);
145 re->name_entry_size = swap_uint16(re->name_entry_size);
146 re->name_count = swap_uint16(re->name_count);
147 re->ref_count = swap_uint16(re->ref_count);
148 re->tables = tables;
149 #ifdef COMPILE_PCRE32
150 re->dummy1 = swap_uint16(re->dummy1);
151 re->dummy2 = swap_uint16(re->dummy2);
152 #endif
153
154 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
155 {
156 study = (pcre_study_data *)extra_data->study_data;
157 study->size = swap_uint32(study->size);
158 study->flags = swap_uint32(study->flags);
159 study->minlength = swap_uint32(study->minlength);
160 }
161
162 #ifndef COMPILE_PCRE8
163 ptr = (pcre_uchar *)re + re->name_table_offset;
164 length = re->name_count * re->name_entry_size;
165 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
166 utf = (re->options & PCRE_UTF16) != 0;
167 utf16_char = FALSE;
168 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
169
170 while(TRUE)
171 {
172 /* Swap previous characters. */
173 while (length-- > 0)
174 {
175 #if defined COMPILE_PCRE16
176 *ptr = swap_uint16(*ptr);
177 #elif defined COMPILE_PCRE32
178 *ptr = swap_uint32(*ptr);
179 #endif
180 ptr++;
181 }
182 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
183 if (utf16_char)
184 {
185 if (HAS_EXTRALEN(ptr[-1]))
186 {
187 /* We know that there is only one extra character in UTF-16. */
188 *ptr = swap_uint16(*ptr);
189 ptr++;
190 }
191 }
192 utf16_char = FALSE;
193 #endif /* SUPPORT_UTF */
194
195 /* Get next opcode. */
196 length = 0;
197 #if defined COMPILE_PCRE16
198 *ptr = swap_uint16(*ptr);
199 #elif defined COMPILE_PCRE32
200 *ptr = swap_uint32(*ptr);
201 #endif
202 switch (*ptr)
203 {
204 case OP_END:
205 return 0;
206
207 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
208 case OP_CHAR:
209 case OP_CHARI:
210 case OP_NOT:
211 case OP_NOTI:
212 case OP_STAR:
213 case OP_MINSTAR:
214 case OP_PLUS:
215 case OP_MINPLUS:
216 case OP_QUERY:
217 case OP_MINQUERY:
218 case OP_UPTO:
219 case OP_MINUPTO:
220 case OP_EXACT:
221 case OP_POSSTAR:
222 case OP_POSPLUS:
223 case OP_POSQUERY:
224 case OP_POSUPTO:
225 case OP_STARI:
226 case OP_MINSTARI:
227 case OP_PLUSI:
228 case OP_MINPLUSI:
229 case OP_QUERYI:
230 case OP_MINQUERYI:
231 case OP_UPTOI:
232 case OP_MINUPTOI:
233 case OP_EXACTI:
234 case OP_POSSTARI:
235 case OP_POSPLUSI:
236 case OP_POSQUERYI:
237 case OP_POSUPTOI:
238 case OP_NOTSTAR:
239 case OP_NOTMINSTAR:
240 case OP_NOTPLUS:
241 case OP_NOTMINPLUS:
242 case OP_NOTQUERY:
243 case OP_NOTMINQUERY:
244 case OP_NOTUPTO:
245 case OP_NOTMINUPTO:
246 case OP_NOTEXACT:
247 case OP_NOTPOSSTAR:
248 case OP_NOTPOSPLUS:
249 case OP_NOTPOSQUERY:
250 case OP_NOTPOSUPTO:
251 case OP_NOTSTARI:
252 case OP_NOTMINSTARI:
253 case OP_NOTPLUSI:
254 case OP_NOTMINPLUSI:
255 case OP_NOTQUERYI:
256 case OP_NOTMINQUERYI:
257 case OP_NOTUPTOI:
258 case OP_NOTMINUPTOI:
259 case OP_NOTEXACTI:
260 case OP_NOTPOSSTARI:
261 case OP_NOTPOSPLUSI:
262 case OP_NOTPOSQUERYI:
263 case OP_NOTPOSUPTOI:
264 if (utf) utf16_char = TRUE;
265 #endif
266 /* Fall through. */
267
268 default:
269 length = PRIV(OP_lengths)[*ptr] - 1;
270 break;
271
272 case OP_CLASS:
273 case OP_NCLASS:
274 /* Skip the character bit map. */
275 ptr += 32/sizeof(pcre_uchar);
276 length = 0;
277 break;
278
279 case OP_XCLASS:
280 /* Reverse the size of the XCLASS instance. */
281 ptr++;
282 #if defined COMPILE_PCRE16
283 *ptr = swap_uint16(*ptr);
284 #elif defined COMPILE_PCRE32
285 *ptr = swap_uint32(*ptr);
286 #endif
287 #ifndef COMPILE_PCRE32
288 if (LINK_SIZE > 1)
289 {
290 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
291 ptr++;
292 *ptr = swap_uint16(*ptr);
293 }
294 #endif
295 ptr++;
296 length = (GET(ptr, -LINK_SIZE)) - (1 + LINK_SIZE + 1);
297 #if defined COMPILE_PCRE16
298 *ptr = swap_uint16(*ptr);
299 #elif defined COMPILE_PCRE32
300 *ptr = swap_uint32(*ptr);
301 #endif
302 if ((*ptr & XCL_MAP) != 0)
303 {
304 /* Skip the character bit map. */
305 ptr += 32/sizeof(pcre_uchar);
306 length -= 32/sizeof(pcre_uchar);
307 }
308 break;
309 }
310 ptr++;
311 }
312 /* Control should never reach here in 16/32 bit mode. */
313 #endif /* !COMPILE_PCRE8 */
314
315 return 0;
316 }
317
318 /* End of pcre_byte_order.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5