Parent Directory
|
Revision Log
|
Patch
revision 200 by ph10, Wed Aug 1 09:10:40 2007 UTC | revision 1117 by chpe, Tue Oct 16 15:57:27 2012 UTC | |
---|---|---|
# | Line 4 | Line 4 |
4 | ||
5 | /* This program was hacked up as a tester for PCRE. I really should have | /* This program was hacked up as a tester for PCRE. I really should have |
6 | written it more tidily in the first place. Will I ever learn? It has grown and | written it more tidily in the first place. Will I ever learn? It has grown and |
7 | been extended and consequently is now rather, er, *very* untidy in places. | been extended and consequently is now rather, er, *very* untidy in places. The |
8 | addition of 16-bit support has made it even worse. :-( | |
9 | ||
10 | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------------- |
11 | Redistribution and use in source and binary forms, with or without | Redistribution and use in source and binary forms, with or without |
# | Line 35 POSSIBILITY OF SUCH DAMAGE. | Line 36 POSSIBILITY OF SUCH DAMAGE. |
36 | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------------- |
37 | */ | */ |
38 | ||
39 | /* This program now supports the testing of both the 8-bit and 16-bit PCRE | |
40 | libraries in a single program. This is different from the modules such as | |
41 | pcre_compile.c in the library itself, which are compiled separately for each | |
42 | mode. If both modes are enabled, for example, pcre_compile.c is compiled twice | |
43 | (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is | |
44 | compiled only once. Therefore, it must not make use of any of the macros from | |
45 | pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does, | |
46 | however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls | |
47 | only supported library functions. */ | |
48 | ||
49 | #ifdef HAVE_CONFIG_H | #ifdef HAVE_CONFIG_H |
50 | #include <config.h> | #include "config.h" |
51 | #endif | #endif |
52 | ||
53 | #include <ctype.h> | #include <ctype.h> |
# | Line 48 POSSIBILITY OF SUCH DAMAGE. | Line 58 POSSIBILITY OF SUCH DAMAGE. |
58 | #include <locale.h> | #include <locale.h> |
59 | #include <errno.h> | #include <errno.h> |
60 | ||
61 | /* Both libreadline and libedit are optionally supported. The user-supplied | |
62 | original patch uses readline/readline.h for libedit, but in at least one system | |
63 | it is installed as editline/readline.h, so the configuration code now looks for | |
64 | that first, falling back to readline/readline.h. */ | |
65 | ||
66 | #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) | |
67 | #ifdef HAVE_UNISTD_H | |
68 | #include <unistd.h> | |
69 | #endif | |
70 | #if defined(SUPPORT_LIBREADLINE) | |
71 | #include <readline/readline.h> | |
72 | #include <readline/history.h> | |
73 | #else | |
74 | #if defined(HAVE_EDITLINE_READLINE_H) | |
75 | #include <editline/readline.h> | |
76 | #else | |
77 | #include <readline/readline.h> | |
78 | #endif | |
79 | #endif | |
80 | #endif | |
81 | ||
82 | /* A number of things vary for Windows builds. Originally, pcretest opened its | /* A number of things vary for Windows builds. Originally, pcretest opened its |
83 | input and output without "b"; then I was told that "b" was needed in some | input and output without "b"; then I was told that "b" was needed in some |
# | Line 63 input mode under Windows. */ | Line 93 input mode under Windows. */ |
93 | #define INPUT_MODE "r" | #define INPUT_MODE "r" |
94 | #define OUTPUT_MODE "wb" | #define OUTPUT_MODE "wb" |
95 | ||
96 | #ifndef isatty | |
97 | #define isatty _isatty /* This is what Windows calls them, I'm told, */ | |
98 | #endif /* though in some environments they seem to */ | |
99 | /* be already defined, hence the #ifndefs. */ | |
100 | #ifndef fileno | |
101 | #define fileno _fileno | |
102 | #endif | |
103 | ||
104 | /* A user sent this fix for Borland Builder 5 under Windows. */ | |
105 | ||
106 | #ifdef __BORLANDC__ | |
107 | #define _setmode(handle, mode) setmode(handle, mode) | |
108 | #endif | |
109 | ||
110 | /* Not Windows */ | |
111 | ||
112 | #else | #else |
113 | #include <sys/time.h> /* These two includes are needed */ | #include <sys/time.h> /* These two includes are needed */ |
114 | #include <sys/resource.h> /* for setrlimit(). */ | #include <sys/resource.h> /* for setrlimit(). */ |
115 | #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */ | |
116 | #define INPUT_MODE "r" | |
117 | #define OUTPUT_MODE "w" | |
118 | #else | |
119 | #define INPUT_MODE "rb" | #define INPUT_MODE "rb" |
120 | #define OUTPUT_MODE "wb" | #define OUTPUT_MODE "wb" |
121 | #endif | #endif |
122 | #endif | |
123 | ||
124 | #define PRIV(name) name | |
125 | ||
126 | /* We have to include pcre_internal.h because we need the internal info for | /* We have to include pcre_internal.h because we need the internal info for |
127 | displaying the results of pcre_study() and we also need to know about the | displaying the results of pcre_study() and we also need to know about the |
# | Line 81 here before pcre_internal.h so that the | Line 133 here before pcre_internal.h so that the |
133 | appropriately for an application, not for building PCRE. */ | appropriately for an application, not for building PCRE. */ |
134 | ||
135 | #include "pcre.h" | #include "pcre.h" |
136 | ||
137 | #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 | |
138 | /* Configure internal macros to 32 bit mode. */ | |
139 | #define COMPILE_PCRE32 | |
140 | #endif | |
141 | #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32 | |
142 | /* Configure internal macros to 16 bit mode. */ | |
143 | #define COMPILE_PCRE16 | |
144 | #endif | |
145 | #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32 | |
146 | /* Configure internal macros to 16 bit mode. */ | |
147 | #define COMPILE_PCRE8 | |
148 | #endif | |
149 | ||
150 | #include "pcre_internal.h" | #include "pcre_internal.h" |
151 | ||
152 | /* We need access to the data tables that PCRE uses. So as not to have to keep | /* The pcre_printint() function, which prints the internal form of a compiled |
153 | two copies, we include the source file here, changing the names of the external | regex, is held in a separate file so that (a) it can be compiled in either |
154 | symbols to prevent clashes. */ | 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c |
155 | when that is compiled in debug mode. */ | |
#define _pcre_utf8_table1 utf8_table1 | ||
#define _pcre_utf8_table1_size utf8_table1_size | ||
#define _pcre_utf8_table2 utf8_table2 | ||
#define _pcre_utf8_table3 utf8_table3 | ||
#define _pcre_utf8_table4 utf8_table4 | ||
#define _pcre_utt utt | ||
#define _pcre_utt_size utt_size | ||
#define _pcre_OP_lengths OP_lengths | ||
156 | ||
157 | #include "pcre_tables.c" | #ifdef SUPPORT_PCRE8 |
158 | void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths); | |
159 | #endif | |
160 | #ifdef SUPPORT_PCRE16 | |
161 | void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths); | |
162 | #endif | |
163 | #ifdef SUPPORT_PCRE32 | |
164 | void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths); | |
165 | #endif | |
166 | ||
167 | /* We also need the pcre_printint() function for printing out compiled | /* We need access to some of the data tables that PCRE uses. So as not to have |
168 | patterns. This function is in a separate file so that it can be included in | to keep two copies, we include the source files here, changing the names of the |
169 | pcre_compile.c when that module is compiled with debugging enabled. | external symbols to prevent clashes. */ |
170 | ||
171 | The definition of the macro PRINTABLE, which determines whether to print an | #define PCRE_INCLUDED |
172 | ||
173 | #include "pcre_tables.c" | |
174 | #include "pcre_ucd.c" | |
175 | ||
176 | /* The definition of the macro PRINTABLE, which determines whether to print an | |
177 | output character as-is or as a hex value when showing compiled patterns, is | output character as-is or as a hex value when showing compiled patterns, is |
178 | contained in this file. We uses it here also, in cases when the locale has not | the same as in the printint.src file. We uses it here in cases when the locale |
179 | been explicitly changed, so as to get consistent output from systems that | has not been explicitly changed, so as to get consistent output from systems |
180 | differ in their output from isprint() even in the "C" locale. */ | that differ in their output from isprint() even in the "C" locale. */ |
181 | ||
182 | #include "pcre_printint.src" | #ifdef EBCDIC |
183 | #define PRINTABLE(c) ((c) >= 64 && (c) < 255) | |
184 | #else | |
185 | #define PRINTABLE(c) ((c) >= 32 && (c) < 127) | |
186 | #endif | |
187 | ||
188 | #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c)) | #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c)) |
189 | ||
190 | /* Posix support is disabled in 16 or 32 bit only mode. */ | |
191 | #if !defined SUPPORT_PCRE8 && !defined NOPOSIX | |
192 | #define NOPOSIX | |
193 | #endif | |
194 | ||
195 | /* It is possible to compile this test program without including support for | /* It is possible to compile this test program without including support for |
196 | testing the POSIX interface, though this is not available via the standard | testing the POSIX interface, though this is not available via the standard |
# | Line 121 Makefile. */ | Line 200 Makefile. */ |
200 | #include "pcreposix.h" | #include "pcreposix.h" |
201 | #endif | #endif |
202 | ||
203 | /* It is also possible, for the benefit of the version currently imported into | /* It is also possible, originally for the benefit of a version that was |
204 | Exim, to build pcretest without support for UTF8 (define NOUTF8), without the | imported into Exim, to build pcretest without support for UTF8 or UTF16 (define |
205 | interface to the DFA matcher (NODFA), and without the doublecheck of the old | NOUTF), without the interface to the DFA matcher (NODFA). In fact, we |
206 | "info" function (define NOINFOCHECK). In fact, we automatically cut out the | automatically cut out the UTF support if PCRE is built without it. */ |
207 | UTF8 support if PCRE is built without it. */ | |
208 | #ifndef SUPPORT_UTF | |
209 | #ifndef SUPPORT_UTF8 | #ifndef NOUTF |
210 | #ifndef NOUTF8 | #define NOUTF |
#define NOUTF8 | ||
211 | #endif | #endif |
212 | #endif | #endif |
213 | ||
214 | /* To make the code a bit tidier for 8/16/32-bit support, we define macros | |
215 | for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called | |
216 | only from one place and is handled differently). I couldn't dream up any way of | |
217 | using a single macro to do this in a generic way, because of the many different | |
218 | argument requirements. We know that at least one of SUPPORT_PCRE8 and | |
219 | SUPPORT_PCRE16 must be set. First define macros for each individual mode; then | |
220 | use these in the definitions of generic macros. | |
221 | ||
222 | **** Special note about the PCHARSxxx macros: the address of the string to be | |
223 | printed is always given as two arguments: a base address followed by an offset. | |
224 | The base address is cast to the correct data size for 8 or 16 bit data; the | |
225 | offset is in units of this size. If the string were given as base+offset in one | |
226 | argument, the casting might be incorrectly applied. */ | |
227 | ||
228 | #ifdef SUPPORT_PCRE8 | |
229 | ||
230 | #define PCHARS8(lv, p, offset, len, f) \ | |
231 | lv = pchars((pcre_uint8 *)(p) + offset, len, f) | |
232 | ||
233 | #define PCHARSV8(p, offset, len, f) \ | |
234 | (void)pchars((pcre_uint8 *)(p) + offset, len, f) | |
235 | ||
236 | #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \ | |
237 | p = read_capture_name8(p, cn8, re) | |
238 | ||
239 | #define STRLEN8(p) ((int)strlen((char *)p)) | |
240 | ||
241 | #define SET_PCRE_CALLOUT8(callout) \ | |
242 | pcre_callout = callout | |
243 | ||
244 | #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \ | |
245 | pcre_assign_jit_stack(extra, callback, userdata) | |
246 | ||
247 | #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \ | |
248 | re = pcre_compile((char *)pat, options, error, erroffset, tables) | |
249 | ||
250 | #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \ | |
251 | namesptr, cbuffer, size) \ | |
252 | rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \ | |
253 | (char *)namesptr, cbuffer, size) | |
254 | ||
255 | #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \ | |
256 | rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size) | |
257 | ||
258 | #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \ | |
259 | offsets, size_offsets, workspace, size_workspace) \ | |
260 | count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \ | |
261 | offsets, size_offsets, workspace, size_workspace) | |
262 | ||
263 | #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \ | |
264 | offsets, size_offsets) \ | |
265 | count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \ | |
266 | offsets, size_offsets) | |
267 | ||
268 | #define PCRE_FREE_STUDY8(extra) \ | |
269 | pcre_free_study(extra) | |
270 | ||
271 | #define PCRE_FREE_SUBSTRING8(substring) \ | |
272 | pcre_free_substring(substring) | |
273 | ||
274 | #define PCRE_FREE_SUBSTRING_LIST8(listptr) \ | |
275 | pcre_free_substring_list(listptr) | |
276 | ||
277 | #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \ | |
278 | getnamesptr, subsptr) \ | |
279 | rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \ | |
280 | (char *)getnamesptr, subsptr) | |
281 | ||
282 | #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \ | |
283 | n = pcre_get_stringnumber(re, (char *)ptr) | |
284 | ||
285 | #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \ | |
286 | rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr) | |
287 | ||
288 | #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \ | |
289 | rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr) | |
290 | ||
291 | #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \ | |
292 | rc = pcre_pattern_to_host_byte_order(re, extra, tables) | |
293 | ||
294 | #define PCRE_PRINTINT8(re, outfile, debug_lengths) \ | |
295 | pcre_printint(re, outfile, debug_lengths) | |
296 | ||
297 | #define PCRE_STUDY8(extra, re, options, error) \ | |
298 | extra = pcre_study(re, options, error) | |
299 | ||
300 | #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \ | |
301 | pcre_jit_stack_alloc(startsize, maxsize) | |
302 | ||
303 | #define PCRE_JIT_STACK_FREE8(stack) \ | |
304 | pcre_jit_stack_free(stack) | |
305 | ||
306 | #endif /* SUPPORT_PCRE8 */ | |
307 | ||
308 | /* -----------------------------------------------------------*/ | |
309 | ||
310 | #ifdef SUPPORT_PCRE16 | |
311 | ||
312 | #define PCHARS16(lv, p, offset, len, f) \ | |
313 | lv = pchars16((PCRE_SPTR16)(p) + offset, len, f) | |
314 | ||
315 | #define PCHARSV16(p, offset, len, f) \ | |
316 | (void)pchars16((PCRE_SPTR16)(p) + offset, len, f) | |
317 | ||
318 | #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \ | |
319 | p = read_capture_name16(p, cn16, re) | |
320 | ||
321 | #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p)) | |
322 | ||
323 | #define SET_PCRE_CALLOUT16(callout) \ | |
324 | pcre16_callout = (int (*)(pcre16_callout_block *))callout | |
325 | ||
326 | #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \ | |
327 | pcre16_assign_jit_stack((pcre16_extra *)extra, \ | |
328 | (pcre16_jit_callback)callback, userdata) | |
329 | ||
330 | #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \ | |
331 | re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \ | |
332 | tables) | |
333 | ||
334 | #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \ | |
335 | namesptr, cbuffer, size) \ | |
336 | rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \ | |
337 | count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2) | |
338 | ||
339 | #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \ | |
340 | rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \ | |
341 | (PCRE_UCHAR16 *)cbuffer, size/2) | |
342 | ||
343 | #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \ | |
344 | offsets, size_offsets, workspace, size_workspace) \ | |
345 | count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \ | |
346 | (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \ | |
347 | workspace, size_workspace) | |
348 | ||
349 | #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \ | |
350 | offsets, size_offsets) \ | |
351 | count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \ | |
352 | len, start_offset, options, offsets, size_offsets) | |
353 | ||
354 | #define PCRE_FREE_STUDY16(extra) \ | |
355 | pcre16_free_study((pcre16_extra *)extra) | |
356 | ||
357 | #define PCRE_FREE_SUBSTRING16(substring) \ | |
358 | pcre16_free_substring((PCRE_SPTR16)substring) | |
359 | ||
360 | #define PCRE_FREE_SUBSTRING_LIST16(listptr) \ | |
361 | pcre16_free_substring_list((PCRE_SPTR16 *)listptr) | |
362 | ||
363 | #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \ | |
364 | getnamesptr, subsptr) \ | |
365 | rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \ | |
366 | count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr) | |
367 | ||
368 | #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \ | |
369 | n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr) | |
370 | ||
371 | #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \ | |
372 | rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \ | |
373 | (PCRE_SPTR16 *)(void*)subsptr) | |
374 | ||
375 | #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \ | |
376 | rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \ | |
377 | (PCRE_SPTR16 **)(void*)listptr) | |
378 | ||
379 | #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \ | |
380 | rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \ | |
381 | tables) | |
382 | ||
383 | #define PCRE_PRINTINT16(re, outfile, debug_lengths) \ | |
384 | pcre16_printint(re, outfile, debug_lengths) | |
385 | ||
386 | #define PCRE_STUDY16(extra, re, options, error) \ | |
387 | extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error) | |
388 | ||
389 | #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \ | |
390 | (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize) | |
391 | ||
392 | #define PCRE_JIT_STACK_FREE16(stack) \ | |
393 | pcre16_jit_stack_free((pcre16_jit_stack *)stack) | |
394 | ||
395 | #endif /* SUPPORT_PCRE16 */ | |
396 | ||
397 | /* -----------------------------------------------------------*/ | |
398 | ||
399 | #ifdef SUPPORT_PCRE32 | |
400 | ||
401 | #define PCHARS32(lv, p, offset, len, f) \ | |
402 | lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f) | |
403 | ||
404 | #define PCHARSV32(p, offset, len, f) \ | |
405 | (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f) | |
406 | ||
407 | #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \ | |
408 | p = read_capture_name32(p, cn32, re) | |
409 | ||
410 | #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p)) | |
411 | ||
412 | #define SET_PCRE_CALLOUT32(callout) \ | |
413 | pcre32_callout = (int (*)(pcre32_callout_block *))callout | |
414 | ||
415 | #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \ | |
416 | pcre32_assign_jit_stack((pcre32_extra *)extra, \ | |
417 | (pcre32_jit_callback)callback, userdata) | |
418 | ||
419 | #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \ | |
420 | re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \ | |
421 | tables) | |
422 | ||
423 | #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \ | |
424 | namesptr, cbuffer, size) \ | |
425 | rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \ | |
426 | count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2) | |
427 | ||
428 | #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \ | |
429 | rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \ | |
430 | (PCRE_UCHAR32 *)cbuffer, size/2) | |
431 | ||
432 | #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \ | |
433 | offsets, size_offsets, workspace, size_workspace) \ | |
434 | count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \ | |
435 | (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \ | |
436 | workspace, size_workspace) | |
437 | ||
438 | #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \ | |
439 | offsets, size_offsets) \ | |
440 | count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \ | |
441 | len, start_offset, options, offsets, size_offsets) | |
442 | ||
443 | #define PCRE_FREE_STUDY32(extra) \ | |
444 | pcre32_free_study((pcre32_extra *)extra) | |
445 | ||
446 | #define PCRE_FREE_SUBSTRING32(substring) \ | |
447 | pcre32_free_substring((PCRE_SPTR32)substring) | |
448 | ||
449 | #define PCRE_FREE_SUBSTRING_LIST32(listptr) \ | |
450 | pcre32_free_substring_list((PCRE_SPTR32 *)listptr) | |
451 | ||
452 | #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \ | |
453 | getnamesptr, subsptr) \ | |
454 | rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \ | |
455 | count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr) | |
456 | ||
457 | #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \ | |
458 | n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr) | |
459 | ||
460 | #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \ | |
461 | rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \ | |
462 | (PCRE_SPTR32 *)(void*)subsptr) | |
463 | ||
464 | #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \ | |
465 | rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \ | |
466 | (PCRE_SPTR32 **)(void*)listptr) | |
467 | ||
468 | #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \ | |
469 | rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \ | |
470 | tables) | |
471 | ||
472 | #define PCRE_PRINTINT32(re, outfile, debug_lengths) \ | |
473 | pcre32_printint(re, outfile, debug_lengths) | |
474 | ||
475 | #define PCRE_STUDY32(extra, re, options, error) \ | |
476 | extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error) | |
477 | ||
478 | #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \ | |
479 | (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize) | |
480 | ||
481 | #define PCRE_JIT_STACK_FREE32(stack) \ | |
482 | pcre32_jit_stack_free((pcre32_jit_stack *)stack) | |
483 | ||
484 | #endif /* SUPPORT_PCRE32 */ | |
485 | ||
486 | ||
487 | /* ----- Both modes are supported; a runtime test is needed, except for | |
488 | pcre_config(), and the JIT stack functions, when it doesn't matter which | |
489 | version is called. ----- */ | |
490 | ||
491 | enum { | |
492 | PCRE8_MODE, | |
493 | PCRE16_MODE, | |
494 | PCRE32_MODE | |
495 | }; | |
496 | ||
497 | #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2 | |
498 | ||
499 | #define CHAR_SIZE (1 << pcre_mode) | |
500 | ||
501 | #define PCHARS(lv, p, offset, len, f) \ | |
502 | if (pcre_mode == PCRE32_MODE) \ | |
503 | PCHARS32(lv, p, offset, len, f); \ | |
504 | else if (pcre_mode == PCRE16_MODE) \ | |
505 | PCHARS16(lv, p, offset, len, f); \ | |
506 | else \ | |
507 | PCHARS8(lv, p, offset, len, f) | |
508 | ||
509 | #define PCHARSV(p, offset, len, f) \ | |
510 | if (pcre_mode == PCRE32_MODE) \ | |
511 | PCHARSV32(p, offset, len, f); \ | |
512 | else if (pcre_mode == PCRE16_MODE) \ | |
513 | PCHARSV16(p, offset, len, f); \ | |
514 | else \ | |
515 | PCHARSV8(p, offset, len, f) | |
516 | ||
517 | #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \ | |
518 | if (pcre_mode == PCRE32_MODE) \ | |
519 | READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \ | |
520 | else if (pcre_mode == PCRE16_MODE) \ | |
521 | READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \ | |
522 | else \ | |
523 | READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) | |
524 | ||
525 | #define SET_PCRE_CALLOUT(callout) \ | |
526 | if (pcre_mode == PCRE32_MODE) \ | |
527 | SET_PCRE_CALLOUT32(callout); \ | |
528 | else if (pcre_mode == PCRE16_MODE) \ | |
529 | SET_PCRE_CALLOUT16(callout); \ | |
530 | else \ | |
531 | SET_PCRE_CALLOUT8(callout) | |
532 | ||
533 | #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p)) | |
534 | ||
535 | #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \ | |
536 | if (pcre_mode == PCRE32_MODE) \ | |
537 | PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \ | |
538 | else if (pcre_mode == PCRE16_MODE) \ | |
539 | PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \ | |
540 | else \ | |
541 | PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) | |
542 | ||
543 | #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \ | |
544 | if (pcre_mode == PCRE32_MODE) \ | |
545 | PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \ | |
546 | else if (pcre_mode == PCRE16_MODE) \ | |
547 | PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \ | |
548 | else \ | |
549 | PCRE_COMPILE8(re, pat, options, error, erroffset, tables) | |
550 | ||
551 | #define PCRE_CONFIG pcre_config | |
552 | ||
553 | #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \ | |
554 | namesptr, cbuffer, size) \ | |
555 | if (pcre_mode == PCRE32_MODE) \ | |
556 | PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \ | |
557 | namesptr, cbuffer, size); \ | |
558 | else if (pcre_mode == PCRE16_MODE) \ | |
559 | PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \ | |
560 | namesptr, cbuffer, size); \ | |
561 | else \ | |
562 | PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \ | |
563 | namesptr, cbuffer, size) | |
564 | ||
565 | #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \ | |
566 | if (pcre_mode == PCRE32_MODE) \ | |
567 | PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \ | |
568 | else if (pcre_mode == PCRE16_MODE) \ | |
569 | PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \ | |
570 | else \ | |
571 | PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) | |
572 | ||
573 | #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \ | |
574 | offsets, size_offsets, workspace, size_workspace) \ | |
575 | if (pcre_mode == PCRE32_MODE) \ | |
576 | PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \ | |
577 | offsets, size_offsets, workspace, size_workspace); \ | |
578 | else if (pcre_mode == PCRE16_MODE) \ | |
579 | PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \ | |
580 | offsets, size_offsets, workspace, size_workspace); \ | |
581 | else \ | |
582 | PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \ | |
583 | offsets, size_offsets, workspace, size_workspace) | |
584 | ||
585 | #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \ | |
586 | offsets, size_offsets) \ | |
587 | if (pcre_mode == PCRE32_MODE) \ | |
588 | PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \ | |
589 | offsets, size_offsets); \ | |
590 | else if (pcre_mode == PCRE16_MODE) \ | |
591 | PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \ | |
592 | offsets, size_offsets); \ | |
593 | else \ | |
594 | PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \ | |
595 | offsets, size_offsets) | |
596 | ||
597 | #define PCRE_FREE_STUDY(extra) \ | |
598 | if (pcre_mode == PCRE32_MODE) \ | |
599 | PCRE_FREE_STUDY32(extra); \ | |
600 | else if (pcre_mode == PCRE16_MODE) \ | |
601 | PCRE_FREE_STUDY16(extra); \ | |
602 | else \ | |
603 | PCRE_FREE_STUDY8(extra) | |
604 | ||
605 | #define PCRE_FREE_SUBSTRING(substring) \ | |
606 | if (pcre_mode == PCRE32_MODE) \ | |
607 | PCRE_FREE_SUBSTRING32(substring); \ | |
608 | else if (pcre_mode == PCRE16_MODE) \ | |
609 | PCRE_FREE_SUBSTRING16(substring); \ | |
610 | else \ | |
611 | PCRE_FREE_SUBSTRING8(substring) | |
612 | ||
613 | #define PCRE_FREE_SUBSTRING_LIST(listptr) \ | |
614 | if (pcre_mode == PCRE32_MODE) \ | |
615 | PCRE_FREE_SUBSTRING_LIST32(listptr); \ | |
616 | else if (pcre_mode == PCRE16_MODE) \ | |
617 | PCRE_FREE_SUBSTRING_LIST16(listptr); \ | |
618 | else \ | |
619 | PCRE_FREE_SUBSTRING_LIST8(listptr) | |
620 | ||
621 | #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \ | |
622 | getnamesptr, subsptr) \ | |
623 | if (pcre_mode == PCRE32_MODE) \ | |
624 | PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \ | |
625 | getnamesptr, subsptr); \ | |
626 | else if (pcre_mode == PCRE16_MODE) \ | |
627 | PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \ | |
628 | getnamesptr, subsptr); \ | |
629 | else \ | |
630 | PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \ | |
631 | getnamesptr, subsptr) | |
632 | ||
633 | #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \ | |
634 | if (pcre_mode == PCRE32_MODE) \ | |
635 | PCRE_GET_STRINGNUMBER32(n, rc, ptr); \ | |
636 | else if (pcre_mode == PCRE16_MODE) \ | |
637 | PCRE_GET_STRINGNUMBER16(n, rc, ptr); \ | |
638 | else \ | |
639 | PCRE_GET_STRINGNUMBER8(n, rc, ptr) | |
640 | ||
641 | #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \ | |
642 | if (pcre_mode == PCRE32_MODE) \ | |
643 | PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \ | |
644 | else if (pcre_mode == PCRE16_MODE) \ | |
645 | PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \ | |
646 | else \ | |
647 | PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr) | |
648 | ||
649 | #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \ | |
650 | if (pcre_mode == PCRE32_MODE) \ | |
651 | PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \ | |
652 | else if (pcre_mode == PCRE16_MODE) \ | |
653 | PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \ | |
654 | else \ | |
655 | PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) | |
656 | ||
657 | #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \ | |
658 | (pcre_mode == PCRE32_MODE ? \ | |
659 | PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \ | |
660 | : pcre_mode == PCRE16_MODE ? \ | |
661 | PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \ | |
662 | : PCRE_JIT_STACK_ALLOC8(startsize, maxsize)) | |
663 | ||
664 | #define PCRE_JIT_STACK_FREE(stack) \ | |
665 | if (pcre_mode == PCRE32_MODE) \ | |
666 | PCRE_JIT_STACK_FREE32(stack); \ | |
667 | else if (pcre_mode == PCRE16_MODE) \ | |
668 | PCRE_JIT_STACK_FREE16(stack); \ | |
669 | else \ | |
670 | PCRE_JIT_STACK_FREE8(stack) | |
671 | ||
672 | #define PCRE_MAKETABLES \ | |
673 | (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables()) | |
674 | ||
675 | #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \ | |
676 | if (pcre_mode == PCRE32_MODE) \ | |
677 | PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \ | |
678 | else if (pcre_mode == PCRE16_MODE) \ | |
679 | PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \ | |
680 | else \ | |
681 | PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) | |
682 | ||
683 | #define PCRE_PRINTINT(re, outfile, debug_lengths) \ | |
684 | if (pcre_mode == PCRE32_MODE) \ | |
685 | PCRE_PRINTINT32(re, outfile, debug_lengths); \ | |
686 | else if (pcre_mode == PCRE16_MODE) \ | |
687 | PCRE_PRINTINT16(re, outfile, debug_lengths); \ | |
688 | else \ | |
689 | PCRE_PRINTINT8(re, outfile, debug_lengths) | |
690 | ||
691 | #define PCRE_STUDY(extra, re, options, error) \ | |
692 | if (pcre_mode == PCRE32_MODE) \ | |
693 | PCRE_STUDY32(extra, re, options, error); \ | |
694 | else if (pcre_mode == PCRE16_MODE) \ | |
695 | PCRE_STUDY16(extra, re, options, error); \ | |
696 | else \ | |
697 | PCRE_STUDY8(extra, re, options, error) | |
698 | ||
699 | /* ----- Only 8-bit mode is supported ----- */ | |
700 | ||
701 | #elif defined SUPPORT_PCRE8 | |
702 | #define CHAR_SIZE 1 | |
703 | #define PCHARS PCHARS8 | |
704 | #define PCHARSV PCHARSV8 | |
705 | #define READ_CAPTURE_NAME READ_CAPTURE_NAME8 | |
706 | #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8 | |
707 | #define STRLEN STRLEN8 | |
708 | #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8 | |
709 | #define PCRE_COMPILE PCRE_COMPILE8 | |
710 | #define PCRE_CONFIG pcre_config | |
711 | #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8 | |
712 | #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8 | |
713 | #define PCRE_DFA_EXEC PCRE_DFA_EXEC8 | |
714 | #define PCRE_EXEC PCRE_EXEC8 | |
715 | #define PCRE_FREE_STUDY PCRE_FREE_STUDY8 | |
716 | #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8 | |
717 | #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8 | |
718 | #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8 | |
719 | #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8 | |
720 | #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8 | |
721 | #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8 | |
722 | #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8 | |
723 | #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8 | |
724 | #define PCRE_MAKETABLES pcre_maketables() | |
725 | #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8 | |
726 | #define PCRE_PRINTINT PCRE_PRINTINT8 | |
727 | #define PCRE_STUDY PCRE_STUDY8 | |
728 | ||
729 | /* ----- Only 16-bit mode is supported ----- */ | |
730 | ||
731 | #elif defined SUPPORT_PCRE16 | |
732 | #define CHAR_SIZE 2 | |
733 | #define PCHARS PCHARS16 | |
734 | #define PCHARSV PCHARSV16 | |
735 | #define READ_CAPTURE_NAME READ_CAPTURE_NAME16 | |
736 | #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16 | |
737 | #define STRLEN STRLEN16 | |
738 | #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16 | |
739 | #define PCRE_COMPILE PCRE_COMPILE16 | |
740 | #define PCRE_CONFIG pcre16_config | |
741 | #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16 | |
742 | #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16 | |
743 | #define PCRE_DFA_EXEC PCRE_DFA_EXEC16 | |
744 | #define PCRE_EXEC PCRE_EXEC16 | |
745 | #define PCRE_FREE_STUDY PCRE_FREE_STUDY16 | |
746 | #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16 | |
747 | #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16 | |
748 | #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16 | |
749 | #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16 | |
750 | #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16 | |
751 | #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16 | |
752 | #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16 | |
753 | #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16 | |
754 | #define PCRE_MAKETABLES pcre16_maketables() | |
755 | #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16 | |
756 | #define PCRE_PRINTINT PCRE_PRINTINT16 | |
757 | #define PCRE_STUDY PCRE_STUDY16 | |
758 | ||
759 | /* ----- Only 32-bit mode is supported ----- */ | |
760 | ||
761 | #elif defined SUPPORT_PCRE32 | |
762 | #define CHAR_SIZE 4 | |
763 | #define PCHARS PCHARS32 | |
764 | #define PCHARSV PCHARSV32 | |
765 | #define READ_CAPTURE_NAME READ_CAPTURE_NAME32 | |
766 | #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32 | |
767 | #define STRLEN STRLEN32 | |
768 | #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32 | |
769 | #define PCRE_COMPILE PCRE_COMPILE32 | |
770 | #define PCRE_CONFIG pcre32_config | |
771 | #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32 | |
772 | #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32 | |
773 | #define PCRE_DFA_EXEC PCRE_DFA_EXEC32 | |
774 | #define PCRE_EXEC PCRE_EXEC32 | |
775 | #define PCRE_FREE_STUDY PCRE_FREE_STUDY32 | |
776 | #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32 | |
777 | #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32 | |
778 | #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32 | |
779 | #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32 | |
780 | #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32 | |
781 | #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32 | |
782 | #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32 | |
783 | #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32 | |
784 | #define PCRE_MAKETABLES pcre32_maketables() | |
785 | #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32 | |
786 | #define PCRE_PRINTINT PCRE_PRINTINT32 | |
787 | #define PCRE_STUDY PCRE_STUDY32 | |
788 | ||
789 | #endif | |
790 | ||
791 | /* ----- End of mode-specific function call macros ----- */ | |
792 | ||
793 | ||
794 | /* Other parameters */ | /* Other parameters */ |
795 | ||
# | Line 144 UTF8 support if PCRE is built without it | Line 801 UTF8 support if PCRE is built without it |
801 | #endif | #endif |
802 | #endif | #endif |
803 | ||
804 | #if !defined NODFA | |
805 | #define DFA_WS_DIMENSION 1000 | |
806 | #endif | |
807 | ||
808 | /* This is the default loop count for timing. */ | /* This is the default loop count for timing. */ |
809 | ||
810 | #define LOOPREPEAT 500000 | #define LOOPREPEAT 500000 |
# | Line 156 static int callout_count; | Line 817 static int callout_count; |
817 | static int callout_extra; | static int callout_extra; |
818 | static int callout_fail_count; | static int callout_fail_count; |
819 | static int callout_fail_id; | static int callout_fail_id; |
820 | static int debug_lengths; | |
821 | static int first_callout; | static int first_callout; |
822 | static int jit_was_used; | |
823 | static int locale_set = 0; | static int locale_set = 0; |
824 | static int show_malloc; | static int show_malloc; |
825 | static int use_utf8; | static int use_utf; |
826 | static size_t gotten_store; | static size_t gotten_store; |
827 | static size_t first_gotten_store = 0; | |
828 | static const unsigned char *last_callout_mark = NULL; | |
829 | ||
830 | /* The buffers grow automatically if very long input lines are encountered. */ | /* The buffers grow automatically if very long input lines are encountered. */ |
831 | ||
832 | static int buffer_size = 50000; | static int buffer_size = 50000; |
833 | static uschar *buffer = NULL; | static pcre_uint8 *buffer = NULL; |
834 | static uschar *dbuffer = NULL; | static pcre_uint8 *pbuffer = NULL; |
static uschar *pbuffer = NULL; | ||
835 | ||
836 | /* Another buffer is needed translation to 16/32-bit character strings. It will | |
837 | obtained and extended as required. */ | |
838 | ||
839 | #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32) | |
840 | ||
841 | /************************************************* | /* We need the table of operator lengths that is used for 16/32-bit compiling, in |
842 | * Read or extend an input line * | order to swap bytes in a pattern for saving/reloading testing. Luckily, the |
843 | *************************************************/ | data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted |
844 | appropriately for the 16/32-bit world. Just as a safety check, make sure that | |
845 | COMPILE_PCRE[16|32] is *not* set. */ | |
846 | ||
847 | /* Input lines are read into buffer, but both patterns and data lines can be | #ifdef COMPILE_PCRE16 |
848 | continued over multiple input lines. In addition, if the buffer fills up, we | #error COMPILE_PCRE16 must not be set when compiling pcretest.c |
849 | want to automatically expand it so as to be able to handle extremely large | #endif |
lines that are needed for certain stress tests. When the input buffer is | ||
expanded, the other two buffers must also be expanded likewise, and the | ||
contents of pbuffer, which are a copy of the input for callouts, must be | ||
preserved (for when expansion happens for a data line). This is not the most | ||
optimal way of handling this, but hey, this is just a test program! | ||
850 | ||
851 | Arguments: | #ifdef COMPILE_PCRE32 |
852 | f the file to read | #error COMPILE_PCRE32 must not be set when compiling pcretest.c |
853 | start where in buffer to start (this *must* be within buffer) | #endif |
854 | ||
855 | Returns: pointer to the start of new data | #if LINK_SIZE == 2 |
856 | could be a copy of start, or could be moved | #undef LINK_SIZE |
857 | NULL if no data read and EOF reached | #define LINK_SIZE 1 |
858 | */ | #elif LINK_SIZE == 3 || LINK_SIZE == 4 |
859 | #undef LINK_SIZE | |
860 | #define LINK_SIZE 2 | |
861 | #else | |
862 | #error LINK_SIZE must be either 2, 3, or 4 | |
863 | #endif | |
864 | ||
865 | static uschar * | #undef IMM2_SIZE |
866 | extend_inputline(FILE *f, uschar *start) | #define IMM2_SIZE 1 |
{ | ||
uschar *here = start; | ||
867 | ||
868 | for (;;) | #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */ |
{ | ||
int rlen = buffer_size - (here - buffer); | ||
869 | ||
870 | if (rlen > 1000) | #ifdef SUPPORT_PCRE16 |
871 | { | static int buffer16_size = 0; |
872 | int dlen; | static pcre_uint16 *buffer16 = NULL; |
873 | if (fgets((char *)here, rlen, f) == NULL) | static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS }; |
874 | return (here == start)? NULL : start; | #endif /* SUPPORT_PCRE16 */ |
875 | dlen = (int)strlen((char *)here); | |
876 | if (dlen > 0 && here[dlen - 1] == '\n') return start; | #ifdef SUPPORT_PCRE32 |
877 | here += dlen; | static int buffer32_size = 0; |
878 | } | static pcre_uint32 *buffer32 = NULL; |
879 | static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS }; | |
880 | #endif /* SUPPORT_PCRE32 */ | |
881 | ||
882 | /* If we have 8-bit support, default to it; if there is also | |
883 | 16-or 32-bit support, it can be changed by an option. If there is no 8-bit support, | |
884 | there must be 16-or 32-bit support, so default it to 1. */ | |
885 | ||
886 | #if defined SUPPORT_PCRE8 | |
887 | static int pcre_mode = PCRE8_MODE; | |
888 | #elif defined SUPPORT_PCRE16 | |
889 | static int pcre_mode = PCRE16_MODE; | |
890 | #elif defined SUPPORT_PCRE32 | |
891 | static int pcre_mode = PCRE32_MODE; | |
892 | #endif | |
893 | ||
894 | else | /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */ |
{ | ||
int new_buffer_size = 2*buffer_size; | ||
uschar *new_buffer = (unsigned char *)malloc(new_buffer_size); | ||
uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size); | ||
uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size); | ||
895 | ||
896 | if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL) | static int jit_study_bits[] = |
897 | { | { |
898 | fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size); | PCRE_STUDY_JIT_COMPILE, |
899 | exit(1); | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE, |
900 | } | PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE, |
901 | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE, | |
902 | PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE, | |
903 | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE, | |
904 | PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + | |
905 | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE | |
906 | }; | |
907 | ||
908 | #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \ | |
909 | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) | |
910 | ||
911 | /* Textual explanations for runtime error codes */ | |
912 | ||
913 | static const char *errtexts[] = { | |
914 | NULL, /* 0 is no error */ | |
915 | NULL, /* NOMATCH is handled specially */ | |
916 | "NULL argument passed", | |
917 | "bad option value", | |
918 | "magic number missing", | |
919 | "unknown opcode - pattern overwritten?", | |
920 | "no more memory", | |
921 | NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */ | |
922 | "match limit exceeded", | |
923 | "callout error code", | |
924 | NULL, /* BADUTF8/16 is handled specially */ | |
925 | NULL, /* BADUTF8/16 offset is handled specially */ | |
926 | NULL, /* PARTIAL is handled specially */ | |
927 | "not used - internal error", | |
928 | "internal error - pattern overwritten?", | |
929 | "bad count value", | |
930 | "item unsupported for DFA matching", | |
931 | "backreference condition or recursion test not supported for DFA matching", | |
932 | "match limit not supported for DFA matching", | |
933 | "workspace size exceeded in DFA matching", | |
934 | "too much recursion for DFA matching", | |
935 | "recursion limit exceeded", | |
936 | "not used - internal error", | |
937 | "invalid combination of newline options", | |
938 | "bad offset value", | |
939 | NULL, /* SHORTUTF8/16 is handled specially */ | |
940 | "nested recursion at the same subject position", | |
941 | "JIT stack limit reached", | |
942 | "pattern compiled in wrong mode: 8-bit/16-bit error", | |
943 | "pattern compiled with other endianness", | |
944 | "invalid data in workspace for DFA restart" | |
945 | }; | |
946 | ||
memcpy(new_buffer, buffer, buffer_size); | ||
memcpy(new_pbuffer, pbuffer, buffer_size); | ||
947 | ||
948 | buffer_size = new_buffer_size; | /************************************************* |
949 | * Alternate character tables * | |
950 | *************************************************/ | |
951 | ||
952 | start = new_buffer + (start - buffer); | /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby |
953 | here = new_buffer + (here - buffer); | using the default tables of the library. However, the T option can be used to |
954 | select alternate sets of tables, for different kinds of testing. Note also that | |
955 | the L (locale) option also adjusts the tables. */ | |
956 | ||
957 | /* This is the set of tables distributed as default with PCRE. It recognizes | |
958 | only ASCII characters. */ | |
959 | ||
960 | static const pcre_uint8 tables0[] = { | |
961 | ||
962 | /* This table is a lower casing table. */ | |
963 | ||
964 | 0, 1, 2, 3, 4, 5, 6, 7, | |
965 | 8, 9, 10, 11, 12, 13, 14, 15, | |
966 | 16, 17, 18, 19, 20, 21, 22, 23, | |
967 | 24, 25, 26, 27, 28, 29, 30, 31, | |
968 | 32, 33, 34, 35, 36, 37, 38, 39, | |
969 | 40, 41, 42, 43, 44, 45, 46, 47, | |
970 | 48, 49, 50, 51, 52, 53, 54, 55, | |
971 | 56, 57, 58, 59, 60, 61, 62, 63, | |
972 | 64, 97, 98, 99,100,101,102,103, | |
973 | 104,105,106,107,108,109,110,111, | |
974 | 112,113,114,115,116,117,118,119, | |
975 | 120,121,122, 91, 92, 93, 94, 95, | |
976 | 96, 97, 98, 99,100,101,102,103, | |
977 | 104,105,106,107,108,109,110,111, | |
978 | 112,113,114,115,116,117,118,119, | |
979 | 120,121,122,123,124,125,126,127, | |
980 | 128,129,130,131,132,133,134,135, | |
981 | 136,137,138,139,140,141,142,143, | |
982 | 144,145,146,147,148,149,150,151, | |
983 | 152,153,154,155,156,157,158,159, | |
984 | 160,161,162,163,164,165,166,167, | |
985 | 168,169,170,171,172,173,174,175, | |
986 | 176,177,178,179,180,181,182,183, | |
987 | 184,185,186,187,188,189,190,191, | |
988 | 192,193,194,195,196,197,198,199, | |
989 | 200,201,202,203,204,205,206,207, | |
990 | 208,209,210,211,212,213,214,215, | |
991 | 216,217,218,219,220,221,222,223, | |
992 | 224,225,226,227,228,229,230,231, | |
993 | 232,233,234,235,236,237,238,239, | |
994 | 240,241,242,243,244,245,246,247, | |
995 | 248,249,250,251,252,253,254,255, | |
996 | ||
997 | /* This table is a case flipping table. */ | |
998 | ||
999 | 0, 1, 2, 3, 4, 5, 6, 7, | |
1000 | 8, 9, 10, 11, 12, 13, 14, 15, | |
1001 | 16, 17, 18, 19, 20, 21, 22, 23, | |
1002 | 24, 25, 26, 27, 28, 29, 30, 31, | |
1003 | 32, 33, 34, 35, 36, 37, 38, 39, | |
1004 | 40, 41, 42, 43, 44, 45, 46, 47, | |
1005 | 48, 49, 50, 51, 52, 53, 54, 55, | |
1006 | 56, 57, 58, 59, 60, 61, 62, 63, | |
1007 | 64, 97, 98, 99,100,101,102,103, | |
1008 | 104,105,106,107,108,109,110,111, | |
1009 | 112,113,114,115,116,117,118,119, | |
1010 | 120,121,122, 91, 92, 93, 94, 95, | |
1011 | 96, 65, 66, 67, 68, 69, 70, 71, | |
1012 | 72, 73, 74, 75, 76, 77, 78, 79, | |
1013 | 80, 81, 82, 83, 84, 85, 86, 87, | |
1014 | 88, 89, 90,123,124,125,126,127, | |
1015 | 128,129,130,131,132,133,134,135, | |
1016 | 136,137,138,139,140,141,142,143, | |
1017 | 144,145,146,147,148,149,150,151, | |
1018 | 152,153,154,155,156,157,158,159, | |
1019 | 160,161,162,163,164,165,166,167, | |
1020 | 168,169,170,171,172,173,174,175, | |
1021 | 176,177,178,179,180,181,182,183, | |
1022 | 184,185,186,187,188,189,190,191, | |
1023 | 192,193,194,195,196,197,198,199, | |
1024 | 200,201,202,203,204,205,206,207, | |
1025 | 208,209,210,211,212,213,214,215, | |
1026 | 216,217,218,219,220,221,222,223, | |
1027 | 224,225,226,227,228,229,230,231, | |
1028 | 232,233,234,235,236,237,238,239, | |
1029 | 240,241,242,243,244,245,246,247, | |
1030 | 248,249,250,251,252,253,254,255, | |
1031 | ||
1032 | /* This table contains bit maps for various character classes. Each map is 32 | |
1033 | bytes long and the bits run from the least significant end of each byte. The | |
1034 | classes that have their own maps are: space, xdigit, digit, upper, lower, word, | |
1035 | graph, print, punct, and cntrl. Other classes are built from combinations. */ | |
1036 | ||
1037 | 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, | |
1038 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1039 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1040 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1041 | ||
1042 | 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, | |
1043 | 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, | |
1044 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1045 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1046 | ||
1047 | 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, | |
1048 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1049 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1050 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1051 | ||
1052 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1053 | 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00, | |
1054 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1055 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1056 | ||
1057 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1058 | 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07, | |
1059 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1060 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1061 | ||
1062 | 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, | |
1063 | 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07, | |
1064 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1065 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1066 | ||
1067 | 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, | |
1068 | 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, | |
1069 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1070 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1071 | ||
1072 | 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, | |
1073 | 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, | |
1074 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1075 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1076 | ||
1077 | 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, | |
1078 | 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78, | |
1079 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1080 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1081 | ||
1082 | 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, | |
1083 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80, | |
1084 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1085 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
1086 | ||
1087 | /* This table identifies various classes of character by individual bits: | |
1088 | 0x01 white space character | |
1089 | 0x02 letter | |
1090 | 0x04 decimal digit | |
1091 | 0x08 hexadecimal digit | |
1092 | 0x10 alphanumeric or '_' | |
1093 | 0x80 regular expression metacharacter or binary zero | |
1094 | */ | |
1095 | ||
1096 | free(buffer); | 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ |
1097 | free(dbuffer); | 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */ |
1098 | free(pbuffer); | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ |
1099 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ | |
1100 | 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */ | |
1101 | 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */ | |
1102 | 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ | |
1103 | 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */ | |
1104 | 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */ | |
1105 | 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */ | |
1106 | 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */ | |
1107 | 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */ | |
1108 | 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */ | |
1109 | 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */ | |
1110 | 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */ | |
1111 | 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */ | |
1112 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ | |
1113 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ | |
1114 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ | |
1115 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ | |
1116 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ | |
1117 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ | |
1118 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ | |
1119 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ | |
1120 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ | |
1121 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ | |
1122 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ | |
1123 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ | |
1124 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ | |
1125 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ | |
1126 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ | |
1127 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ | |
1128 | ||
1129 | /* This is a set of tables that came orginally from a Windows user. It seems to | |
1130 | be at least an approximation of ISO 8859. In particular, there are characters | |
1131 | greater than 128 that are marked as spaces, letters, etc. */ | |
1132 | ||
1133 | static const pcre_uint8 tables1[] = { | |
1134 | 0,1,2,3,4,5,6,7, | |
1135 | 8,9,10,11,12,13,14,15, | |
1136 | 16,17,18,19,20,21,22,23, | |
1137 | 24,25,26,27,28,29,30,31, | |
1138 | 32,33,34,35,36,37,38,39, | |
1139 | 40,41,42,43,44,45,46,47, | |
1140 | 48,49,50,51,52,53,54,55, | |
1141 | 56,57,58,59,60,61,62,63, | |
1142 | 64,97,98,99,100,101,102,103, | |
1143 | 104,105,106,107,108,109,110,111, | |
1144 | 112,113,114,115,116,117,118,119, | |
1145 | 120,121,122,91,92,93,94,95, | |
1146 | 96,97,98,99,100,101,102,103, | |
1147 | 104,105,106,107,108,109,110,111, | |
1148 | 112,113,114,115,116,117,118,119, | |
1149 | 120,121,122,123,124,125,126,127, | |
1150 | 128,129,130,131,132,133,134,135, | |
1151 | 136,137,138,139,140,141,142,143, | |
1152 | 144,145,146,147,148,149,150,151, | |
1153 | 152,153,154,155,156,157,158,159, | |
1154 | 160,161,162,163,164,165,166,167, | |
1155 | 168,169,170,171,172,173,174,175, | |
1156 | 176,177,178,179,180,181,182,183, | |
1157 | 184,185,186,187,188,189,190,191, | |
1158 | 224,225,226,227,228,229,230,231, | |
1159 | 232,233,234,235,236,237,238,239, | |
1160 | 240,241,242,243,244,245,246,215, | |
1161 | 248,249,250,251,252,253,254,223, | |
1162 | 224,225,226,227,228,229,230,231, | |
1163 | 232,233,234,235,236,237,238,239, | |
1164 | 240,241,242,243,244,245,246,247, | |
1165 | 248,249,250,251,252,253,254,255, | |
1166 | 0,1,2,3,4,5,6,7, | |
1167 | 8,9,10,11,12,13,14,15, | |
1168 | 16,17,18,19,20,21,22,23, | |
1169 | 24,25,26,27,28,29,30,31, | |
1170 | 32,33,34,35,36,37,38,39, | |
1171 | 40,41,42,43,44,45,46,47, | |
1172 | 48,49,50,51,52,53,54,55, | |
1173 | 56,57,58,59,60,61,62,63, | |
1174 | 64,97,98,99,100,101,102,103, | |
1175 | 104,105,106,107,108,109,110,111, | |
1176 | 112,113,114,115,116,117,118,119, | |
1177 | 120,121,122,91,92,93,94,95, | |
1178 | 96,65,66,67,68,69,70,71, | |
1179 | 72,73,74,75,76,77,78,79, | |
1180 | 80,81,82,83,84,85,86,87, | |
1181 | 88,89,90,123,124,125,126,127, | |
1182 | 128,129,130,131,132,133,134,135, | |
1183 | 136,137,138,139,140,141,142,143, | |
1184 | 144,145,146,147,148,149,150,151, | |
1185 | 152,153,154,155,156,157,158,159, | |
1186 | 160,161,162,163,164,165,166,167, | |
1187 | 168,169,170,171,172,173,174,175, | |
1188 | 176,177,178,179,180,181,182,183, | |
1189 | 184,185,186,187,188,189,190,191, | |
1190 | 224,225,226,227,228,229,230,231, | |
1191 | 232,233,234,235,236,237,238,239, | |
1192 | 240,241,242,243,244,245,246,215, | |
1193 | 248,249,250,251,252,253,254,223, | |
1194 | 192,193,194,195,196,197,198,199, | |
1195 | 200,201,202,203,204,205,206,207, | |
1196 | 208,209,210,211,212,213,214,247, | |
1197 | 216,217,218,219,220,221,222,255, | |
1198 | 0,62,0,0,1,0,0,0, | |
1199 | 0,0,0,0,0,0,0,0, | |
1200 | 32,0,0,0,1,0,0,0, | |
1201 | 0,0,0,0,0,0,0,0, | |
1202 | 0,0,0,0,0,0,255,3, | |
1203 | 126,0,0,0,126,0,0,0, | |
1204 | 0,0,0,0,0,0,0,0, | |
1205 | 0,0,0,0,0,0,0,0, | |
1206 | 0,0,0,0,0,0,255,3, | |
1207 | 0,0,0,0,0,0,0,0, | |
1208 | 0,0,0,0,0,0,12,2, | |
1209 | 0,0,0,0,0,0,0,0, | |
1210 | 0,0,0,0,0,0,0,0, | |
1211 | 254,255,255,7,0,0,0,0, | |
1212 | 0,0,0,0,0,0,0,0, | |
1213 | 255,255,127,127,0,0,0,0, | |
1214 | 0,0,0,0,0,0,0,0, | |
1215 | 0,0,0,0,254,255,255,7, | |
1216 | 0,0,0,0,0,4,32,4, | |
1217 | 0,0,0,128,255,255,127,255, | |
1218 | 0,0,0,0,0,0,255,3, | |
1219 | 254,255,255,135,254,255,255,7, | |
1220 | 0,0,0,0,0,4,44,6, | |
1221 | 255,255,127,255,255,255,127,255, | |
1222 | 0,0,0,0,254,255,255,255, | |
1223 | 255,255,255,255,255,255,255,127, | |
1224 | 0,0,0,0,254,255,255,255, | |
1225 | 255,255,255,255,255,255,255,255, | |
1226 | 0,2,0,0,255,255,255,255, | |
1227 | 255,255,255,255,255,255,255,127, | |
1228 | 0,0,0,0,255,255,255,255, | |
1229 | 255,255,255,255,255,255,255,255, | |
1230 | 0,0,0,0,254,255,0,252, | |
1231 | 1,0,0,248,1,0,0,120, | |
1232 | 0,0,0,0,254,255,255,255, | |
1233 | 0,0,128,0,0,0,128,0, | |
1234 | 255,255,255,255,0,0,0,0, | |
1235 | 0,0,0,0,0,0,0,128, | |
1236 | 255,255,255,255,0,0,0,0, | |
1237 | 0,0,0,0,0,0,0,0, | |
1238 | 128,0,0,0,0,0,0,0, | |
1239 | 0,1,1,0,1,1,0,0, | |
1240 | 0,0,0,0,0,0,0,0, | |
1241 | 0,0,0,0,0,0,0,0, | |
1242 | 1,0,0,0,128,0,0,0, | |
1243 | 128,128,128,128,0,0,128,0, | |
1244 | 28,28,28,28,28,28,28,28, | |
1245 | 28,28,0,0,0,0,0,128, | |
1246 | 0,26,26,26,26,26,26,18, | |
1247 | 18,18,18,18,18,18,18,18, | |
1248 | 18,18,18,18,18,18,18,18, | |
1249 | 18,18,18,128,128,0,128,16, | |
1250 | 0,26,26,26,26,26,26,18, | |
1251 | 18,18,18,18,18,18,18,18, | |
1252 | 18,18,18,18,18,18,18,18, | |
1253 | 18,18,18,128,128,0,0,0, | |
1254 | 0,0,0,0,0,1,0,0, | |
1255 | 0,0,0,0,0,0,0,0, | |
1256 | 0,0,0,0,0,0,0,0, | |
1257 | 0,0,0,0,0,0,0,0, | |
1258 | 1,0,0,0,0,0,0,0, | |
1259 | 0,0,18,0,0,0,0,0, | |
1260 | 0,0,20,20,0,18,0,0, | |
1261 | 0,20,18,0,0,0,0,0, | |
1262 | 18,18,18,18,18,18,18,18, | |
1263 | 18,18,18,18,18,18,18,18, | |
1264 | 18,18,18,18,18,18,18,0, | |
1265 | 18,18,18,18,18,18,18,18, | |
1266 | 18,18,18,18,18,18,18,18, | |
1267 | 18,18,18,18,18,18,18,18, | |
1268 | 18,18,18,18,18,18,18,0, | |
1269 | 18,18,18,18,18,18,18,18 | |
1270 | }; | |
1271 | ||
buffer = new_buffer; | ||
dbuffer = new_dbuffer; | ||
pbuffer = new_pbuffer; | ||
} | ||
} | ||
1272 | ||
return NULL; /* Control never gets here */ | ||
} | ||
1273 | ||
1274 | ||
1275 | #ifndef HAVE_STRERROR | |
1276 | /************************************************* | |
1277 | * Provide strerror() for non-ANSI libraries * | |
1278 | *************************************************/ | |
1279 | ||
1280 | /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror() | |
1281 | in their libraries, but can provide the same facility by this simple | |
1282 | alternative function. */ | |
1283 | ||
1284 | extern int sys_nerr; | |
1285 | extern char *sys_errlist[]; | |
1286 | ||
1287 | char * | |
1288 | strerror(int n) | |
1289 | { | |
1290 | if (n < 0 || n >= sys_nerr) return "unknown error number"; | |
1291 | return sys_errlist[n]; | |
1292 | } | |
1293 | #endif /* HAVE_STRERROR */ | |
1294 | ||
1295 | ||
1296 | ||
1297 | /************************************************* | /************************************************* |
1298 | * Read number from string * | * Print newline configuration * |
1299 | *************************************************/ | *************************************************/ |
1300 | ||
1301 | /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess | /* |
1302 | around with conditional compilation, just do the job by hand. It is only used | Arguments: |
1303 | for unpicking arguments, so just keep it simple. | rc the return code from PCRE_CONFIG_NEWLINE |
1304 | isc TRUE if called from "-C newline" | |
1305 | Arguments: | Returns: nothing |
str string to be converted | ||
endptr where to put the end pointer | ||
Returns: the unsigned long | ||
1306 | */ | */ |
1307 | ||
1308 | static int | static void |
1309 | get_value(unsigned char *str, unsigned char **endptr) | print_newline_config(int rc, BOOL isc) |
1310 | { | { |
1311 | int result = 0; | const char *s = NULL; |
1312 | while(*str != 0 && isspace(*str)) str++; | if (!isc) printf(" Newline sequence is "); |
1313 | while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0'); | switch(rc) |
1314 | *endptr = str; | { |
1315 | return(result); | case CHAR_CR: s = "CR"; break; |
1316 | case CHAR_LF: s = "LF"; break; | |
1317 | case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break; | |
1318 | case -1: s = "ANY"; break; | |
1319 | case -2: s = "ANYCRLF"; break; | |
1320 | ||
1321 | default: | |
1322 | printf("a non-standard value: 0x%04x\n", rc); | |
1323 | return; | |
1324 | } | |
1325 | ||
1326 | printf("%s\n", s); | |
1327 | } | } |
1328 | ||
1329 | ||
1330 | ||
1331 | /************************************************* | |
1332 | * JIT memory callback * | |
1333 | *************************************************/ | |
1334 | ||
1335 | static pcre_jit_stack* jit_callback(void *arg) | |
1336 | { | |
1337 | jit_was_used = TRUE; | |
1338 | return (pcre_jit_stack *)arg; | |
1339 | } | |
1340 | ||
1341 | ||
1342 | #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32 | |
1343 | /************************************************* | /************************************************* |
1344 | * Convert UTF-8 string to value * | * Convert UTF-8 string to value * |
1345 | *************************************************/ | *************************************************/ |
# | Line 295 Returns: > 0 => the number of byte | Line 1355 Returns: > 0 => the number of byte |
1355 | -6 to 0 => malformed UTF-8 character at offset = (-return) | -6 to 0 => malformed UTF-8 character at offset = (-return) |
1356 | */ | */ |
1357 | ||
#if !defined NOUTF8 | ||
1358 | static int | static int |
1359 | utf82ord(unsigned char *utf8bytes, int *vptr) | utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr) |
1360 | { | { |
1361 | int c = *utf8bytes++; | pcre_uint32 c = *utf8bytes++; |
1362 | int d = c; | pcre_uint32 d = c; |
1363 | int i, j, s; | int i, j, s; |
1364 | ||
1365 | for (i = -1; i < 6; i++) /* i is number of additional bytes */ | for (i = -1; i < 6; i++) /* i is number of additional bytes */ |
# | Line 337 if (j != i) return -(i+1); | Line 1395 if (j != i) return -(i+1); |
1395 | *vptr = d; | *vptr = d; |
1396 | return i+1; | return i+1; |
1397 | } | } |
1398 | #endif /* NOUTF || SUPPORT_PCRE16 */ | |
#endif | ||
1399 | ||
1400 | ||
1401 | ||
1402 | #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32 | |
1403 | /************************************************* | /************************************************* |
1404 | * Convert character value to UTF-8 * | * Convert character value to UTF-8 * |
1405 | *************************************************/ | *************************************************/ |
# | Line 356 Arguments: | Line 1414 Arguments: |
1414 | Returns: number of characters placed in the buffer | Returns: number of characters placed in the buffer |
1415 | */ | */ |
1416 | ||
#if !defined NOUTF8 | ||
1417 | static int | static int |
1418 | ord2utf8(int cvalue, uschar *utf8bytes) | ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes) |
1419 | { | { |
1420 | register int i, j; | register int i, j; |
1421 | if (cvalue > 0x7fffffffu) | |
1422 | return -1; | |
1423 | for (i = 0; i < utf8_table1_size; i++) | for (i = 0; i < utf8_table1_size; i++) |
1424 | if (cvalue <= utf8_table1[i]) break; | if (cvalue <= utf8_table1[i]) break; |
1425 | utf8bytes += i; | utf8bytes += i; |
# | Line 373 for (j = i; j > 0; j--) | Line 1431 for (j = i; j > 0; j--) |
1431 | *utf8bytes = utf8_table2[i] | cvalue; | *utf8bytes = utf8_table2[i] | cvalue; |
1432 | return i + 1; | return i + 1; |
1433 | } | } |
1434 | #endif | #endif |
1435 | ||
1436 | ||
1437 | #ifdef SUPPORT_PCRE16 | |
1438 | /************************************************* | /************************************************* |
1439 | * Print character string * | * Convert a string to 16-bit * |
1440 | *************************************************/ | *************************************************/ |
1441 | ||
1442 | /* Character string printing function. Must handle UTF-8 strings in utf8 | /* In non-UTF mode, the space needed for a 16-bit string is exactly double the |
1443 | mode. Yields number of characters printed. If handed a NULL file, just counts | 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than |
1444 | chars without printing. */ | double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4 |
1445 | in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The | |
1446 | result is always left in buffer16. | |
1447 | ||
1448 | Note that this function does not object to surrogate values. This is | |
1449 | deliberate; it makes it possible to construct UTF-16 strings that are invalid, | |
1450 | for the purpose of testing that they are correctly faulted. | |
1451 | ||
1452 | Patterns to be converted are either plain ASCII or UTF-8; data lines are always | |
1453 | in UTF-8 so that values greater than 255 can be handled. | |
1454 | ||
1455 | Arguments: | |
1456 | data TRUE if converting a data line; FALSE for a regex | |
1457 | p points to a byte string | |
1458 | utf true if UTF-8 (to be converted to UTF-16) | |
1459 | len number of bytes in the string (excluding trailing zero) | |
1460 | ||
1461 | Returns: number of 16-bit data items used (excluding trailing zero) | |
1462 | OR -1 if a UTF-8 string is malformed | |
1463 | OR -2 if a value > 0x10ffff is encountered | |
1464 | OR -3 if a value > 0xffff is encountered when not in UTF mode | |
1465 | */ | |
1466 | ||
1467 | static int pchars(unsigned char *p, int length, FILE *f) | static int |
1468 | to16(int data, pcre_uint8 *p, int utf, int len) | |
1469 | { | { |
1470 | int c = 0; | pcre_uint16 *pp; |
int yield = 0; | ||
1471 | ||
1472 | while (length-- > 0) | if (buffer16_size < 2*len + 2) |
1473 | { | { |
1474 | #if !defined NOUTF8 | if (buffer16 != NULL) free(buffer16); |
1475 | if (use_utf8) | buffer16_size = 2*len + 2; |
1476 | buffer16 = (pcre_uint16 *)malloc(buffer16_size); | |
1477 | if (buffer16 == NULL) | |
1478 | { | { |
1479 | int rc = utf82ord(p, &c); | fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size); |
1480 | exit(1); | |
if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */ | ||
{ | ||
length -= rc - 1; | ||
p += rc; | ||
if (PRINTHEX(c)) | ||
{ | ||
if (f != NULL) fprintf(f, "%c", c); | ||
yield++; | ||
} | ||
else | ||
{ | ||
int n = 4; | ||
if (f != NULL) fprintf(f, "\\x{%02x}", c); | ||
yield += (n <= 0x000000ff)? 2 : | ||
(n <= 0x00000fff)? 3 : | ||
(n <= 0x0000ffff)? 4 : | ||
(n <= 0x000fffff)? 5 : 6; | ||
} | ||
continue; | ||
} | ||
1481 | } | } |
1482 | #endif | } |
1483 | ||
1484 | /* Not UTF-8, or malformed UTF-8 */ | pp = buffer16; |
1485 | ||
1486 | c = *p++; | if (!utf && !data) |
1487 | if (PRINTHEX(c)) | { |
1488 | { | while (len-- > 0) *pp++ = *p++; |
1489 | if (f != NULL) fprintf(f, "%c", c); | } |
1490 | yield++; | |
1491 | } | else |
1492 | else | { |
1493 | pcre_uint32 c = 0; | |
1494 | while (len > 0) | |
1495 | { | { |
1496 | if (f != NULL) fprintf(f, "\\x%02x", c); | int chlen = utf82ord(p, &c); |
1497 | yield += 4; | if (chlen <= 0) return -1; |
1498 | if (c > 0x10ffff) return -2; | |
1499 | p += chlen; | |
1500 | len -= chlen; | |
1501 | if (c < 0x10000) *pp++ = c; else | |
1502 | { | |
1503 | if (!utf) return -3; | |
1504 | c -= 0x10000; | |
1505 | *pp++ = 0xD800 | (c >> 10); | |
1506 | *pp++ = 0xDC00 | (c & 0x3ff); | |
1507 | } | |
1508 | } | } |
1509 | } | } |
1510 | ||
1511 | return yield; | *pp = 0; |
1512 | return pp - buffer16; | |
1513 | } | } |
1514 | #endif | |
1515 | ||
1516 | #ifdef SUPPORT_PCRE32 | |
1517 | /************************************************* | /************************************************* |
1518 | * Callout function * | * Convert a string to 32-bit * |
1519 | *************************************************/ | *************************************************/ |
1520 | ||
1521 | /* Called from PCRE as a result of the (?C) item. We print out where we are in | /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the |
1522 | the match. Yield zero unless more callouts than the fail count, or the callout | 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four |
1523 | data is not zero. */ | times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4 |
1524 | in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The | |
1525 | result is always left in buffer32. | |
1526 | ||
1527 | Note that this function does not object to surrogate values. This is | |
1528 | deliberate; it makes it possible to construct UTF-32 strings that are invalid, | |
1529 | for the purpose of testing that they are correctly faulted. | |
1530 | ||
1531 | static int callout(pcre_callout_block *cb) | Patterns to be converted are either plain ASCII or UTF-8; data lines are always |
1532 | in UTF-8 so that values greater than 255 can be handled. | |
1533 | ||
1534 | Arguments: | |
1535 | data TRUE if converting a data line; FALSE for a regex | |
1536 | p points to a byte string | |
1537 | utf true if UTF-8 (to be converted to UTF-32) | |
1538 | len number of bytes in the string (excluding trailing zero) | |
1539 | ||
1540 | Returns: number of 32-bit data items used (excluding trailing zero) | |
1541 | OR -1 if a UTF-8 string is malformed | |
1542 | OR -2 if a value > 0x10ffff is encountered | |
1543 | OR -3 if an ill-formed value is encountered (i.e. a surrogate) | |
1544 | */ | |
1545 | ||
1546 | static int | |
1547 | to32(int data, pcre_uint8 *p, int utf, int len) | |
1548 | { | { |
1549 | FILE *f = (first_callout | callout_extra)? outfile : NULL; | pcre_uint32 *pp; |
int i, pre_start, post_start, subject_length; | ||
1550 | ||
1551 | if (callout_extra) | if (buffer32_size < 4*len + 4) |
1552 | { | { |
1553 | fprintf(f, "Callout %d: last capture = %d\n", | if (buffer32 != NULL) free(buffer32); |
1554 | cb->callout_number, cb->capture_last); | buffer32_size = 4*len + 4; |
1555 | buffer32 = (pcre_uint32 *)malloc(buffer32_size); | |
1556 | for (i = 0; i < cb->capture_top * 2; i += 2) | if (buffer32 == NULL) |
1557 | { | { |
1558 | if (cb->offset_vector[i] < 0) | fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size); |
1559 | fprintf(f, "%2d: <unset>\n", i/2); | exit(1); |
else | ||
{ | ||
fprintf(f, "%2d: ", i/2); | ||
(void)pchars((unsigned char *)cb->subject + cb->offset_vector[i], | ||
cb->offset_vector[i+1] - cb->offset_vector[i], f); | ||
fprintf(f, "\n"); | ||
} | ||
1560 | } | } |
1561 | } | } |
1562 | ||
1563 | /* Re-print the subject in canonical form, the first time or if giving full | pp = buffer32; |
datails. On subsequent calls in the same match, we use pchars just to find the | ||
printed lengths of the substrings. */ | ||
if (f != NULL) fprintf(f, "--->"); | ||
pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f); | ||
post_start = pchars((unsigned char *)(cb->subject + cb->start_match), | ||
cb->current_position - cb->start_match, f); | ||
subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL); | ||
(void)pchars((unsigned char *)(cb->subject + cb->current_position), | ||
cb->subject_length - cb->current_position, f); | ||
if (f != NULL) fprintf(f, "\n"); | ||
/* Always print appropriate indicators, with callout number if not already | ||
shown. For automatic callouts, show the pattern offset. */ | ||
1564 | ||
1565 | if (cb->callout_number == 255) | if (!utf && !data) |
1566 | { | { |
1567 | fprintf(outfile, "%+3d ", cb->pattern_position); | while (len-- > 0) *pp++ = *p++; |
if (cb->pattern_position > 99) fprintf(outfile, "\n "); | ||
1568 | } | } |
1569 | ||
1570 | else | else |
1571 | { | { |
1572 | if (callout_extra) fprintf(outfile, " "); | pcre_uint32 c = 0; |
1573 | else fprintf(outfile, "%3d ", cb->callout_number); | while (len > 0) |
1574 | { | |
1575 | int chlen = utf82ord(p, &c); | |
1576 | if (chlen <= 0) return -1; | |
1577 | if (utf) | |
1578 | { | |
1579 | if (c > 0x10ffff) return -2; | |
1580 | if (!data && (c & 0xfffff800u) == 0xd800u) return -3; | |
1581 | } | |
1582 | ||
1583 | p += chlen; | |
1584 | len -= chlen; | |
1585 | *pp++ = c; | |
1586 | } | |
1587 | } | } |
1588 | ||
1589 | for (i = 0; i < pre_start; i++) fprintf(outfile, " "); | *pp = 0; |
1590 | fprintf(outfile, "^"); | return pp - buffer32; |
1591 | } | |
1592 | ||
1593 | if (post_start > 0) | /* Check that a 32-bit character string is valid UTF-32. |
{ | ||
for (i = 0; i < post_start - 1; i++) fprintf(outfile, " "); | ||
fprintf(outfile, "^"); | ||
} | ||
1594 | ||
1595 | for (i = 0; i < subject_length - pre_start - post_start + 4; i++) | Arguments: |
1596 | fprintf(outfile, " "); | string points to the string |
1597 | length length of string, or -1 if the string is zero-terminated | |
1598 | ||
1599 | fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length, | Returns: TRUE if the string is a valid UTF-32 string |
1600 | pbuffer + cb->pattern_position); | FALSE otherwise |
1601 | */ | |
1602 | ||
1603 | fprintf(outfile, "\n"); | #ifdef SUPPORT_UTF |
1604 | first_callout = 0; | static BOOL |
1605 | valid_utf32(pcre_uint32 *string, int length) | |
1606 | { | |
1607 | register pcre_uint32 *p; | |
1608 | register pcre_uint32 c; | |
1609 | ||
1610 | if (cb->callout_data != NULL) | for (p = string; length-- > 0; p++) |
1611 | { | { |
1612 | int callout_data = *((int *)(cb->callout_data)); | c = *p; |
1613 | if (callout_data != 0) | |
1614 | { | if (c > 0x10ffffu) |
1615 | fprintf(outfile, "Callout data = %d\n", callout_data); | return FALSE; |
1616 | return callout_data; | |
1617 | } | /* A surrogate */ |
1618 | if ((c & 0xfffff800u) == 0xd800u) | |
1619 | return FALSE; | |
1620 | ||
1621 | /* Non-character */ | |
1622 | if ((c & 0xfffeu) == 0xfffeu || | |
1623 | c >= 0xfdd0u && c <= 0xfdefu) | |
1624 | return FALSE; | |
1625 | } | } |
1626 | ||
1627 | return (cb->callout_number != callout_fail_id)? 0 : | return TRUE; |
(++callout_count >= callout_fail_count)? 1 : 0; | ||
1628 | } | } |
1629 | #endif /* SUPPORT_UTF */ | |
1630 | ||
1631 | #endif | |
1632 | ||
1633 | /************************************************* | /************************************************* |
1634 | * Local malloc functions * | * Read or extend an input line * |
1635 | *************************************************/ | *************************************************/ |
1636 | ||
1637 | /* Alternative malloc function, to test functionality and show the size of the | /* Input lines are read into buffer, but both patterns and data lines can be |
1638 | compiled re. */ | continued over multiple input lines. In addition, if the buffer fills up, we |
1639 | want to automatically expand it so as to be able to handle extremely large | |
1640 | static void *new_malloc(size_t size) | lines that are needed for certain stress tests. When the input buffer is |
1641 | { | expanded, the other two buffers must also be expanded likewise, and the |
1642 | void *block = malloc(size); | contents of pbuffer, which are a copy of the input for callouts, must be |
1643 | preserved (for when expansion happens for a data line). This is not the most | |
1644 | optimal way of handling this, but hey, this is just a test program! | |
1645 | ||
1646 | Arguments: | |
1647 | f the file to read | |
1648 | start where in buffer to start (this *must* be within buffer) | |
1649 | prompt for stdin or readline() | |
1650 | ||
1651 | Returns: pointer to the start of new data | |
1652 | could be a copy of start, or could be moved | |
1653 | NULL if no data read and EOF reached | |
1654 | */ | |
1655 | ||
1656 | static pcre_uint8 * | |
1657 | extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt) | |
1658 | { | |
1659 | pcre_uint8 *here = start; | |
1660 | ||
1661 | for (;;) | |
1662 | { | |
1663 | size_t rlen = (size_t)(buffer_size - (here - buffer)); | |
1664 | ||
1665 | if (rlen > 1000) | |
1666 | { | |
1667 | int dlen; | |
1668 | ||
1669 | /* If libreadline or libedit support is required, use readline() to read a | |
1670 | line if the input is a terminal. Note that readline() removes the trailing | |
1671 | newline, so we must put it back again, to be compatible with fgets(). */ | |
1672 | ||
1673 | #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) | |
1674 | if (isatty(fileno(f))) | |
1675 | { | |
1676 | size_t len; | |
1677 | char *s = readline(prompt); | |
1678 | if (s == NULL) return (here == start)? NULL : start; | |
1679 | len = strlen(s); | |
1680 | if (len > 0) add_history(s); | |
1681 | if (len > rlen - 1) len = rlen - 1; | |
1682 | memcpy(here, s, len); | |
1683 | here[len] = '\n'; | |
1684 | here[len+1] = 0; | |
1685 | free(s); | |
1686 | } | |
1687 | else | |
1688 | #endif | |
1689 | ||
1690 | /* Read the next line by normal means, prompting if the file is stdin. */ | |
1691 | ||
1692 | { | |
1693 | if (f == stdin) printf("%s", prompt); | |
1694 | if (fgets((char *)here, rlen, f) == NULL) | |
1695 | return (here == start)? NULL : start; | |
1696 | } | |
1697 | ||
1698 | dlen = (int)strlen((char *)here); | |
1699 | if (dlen > 0 && here[dlen - 1] == '\n') return start; | |
1700 | here += dlen; | |
1701 | } | |
1702 | ||
1703 | else | |
1704 | { | |
1705 | int new_buffer_size = 2*buffer_size; | |
1706 | pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size); | |
1707 | pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size); | |
1708 | ||
1709 | if (new_buffer == NULL || new_pbuffer == NULL) | |
1710 | { | |
1711 | fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size); | |
1712 | exit(1); | |
1713 | } | |
1714 | ||
1715 | memcpy(new_buffer, buffer, buffer_size); | |
1716 | memcpy(new_pbuffer, pbuffer, buffer_size); | |
1717 | ||
1718 | buffer_size = new_buffer_size; | |
1719 | ||
1720 | start = new_buffer + (start - buffer); | |
1721 | here = new_buffer + (here - buffer); | |
1722 | ||
1723 | free(buffer); | |
1724 | free(pbuffer); | |
1725 | ||
1726 | buffer = new_buffer; | |
1727 | pbuffer = new_pbuffer; | |
1728 | } | |
1729 | } | |
1730 | ||
1731 | return NULL; /* Control never gets here */ | |
1732 | } | |
1733 | ||
1734 | ||
1735 | ||
1736 | /************************************************* | |
1737 | * Read number from string * | |
1738 | *************************************************/ | |
1739 | ||
1740 | /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess | |
1741 | around with conditional compilation, just do the job by hand. It is only used | |
1742 | for unpicking arguments, so just keep it simple. | |
1743 | ||
1744 | Arguments: | |
1745 | str string to be converted | |
1746 | endptr where to put the end pointer | |
1747 | ||
1748 | Returns: the unsigned long | |
1749 | */ | |
1750 | ||
1751 | static int | |
1752 | get_value(pcre_uint8 *str, pcre_uint8 **endptr) | |
1753 | { | |
1754 | int result = 0; | |
1755 | while(*str != 0 && isspace(*str)) str++; | |
1756 | while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0'); | |
1757 | *endptr = str; | |
1758 | return(result); | |
1759 | } | |
1760 | ||
1761 | ||
1762 | ||
1763 | /************************************************* | |
1764 | * Print one character * | |
1765 | *************************************************/ | |
1766 | ||
1767 | /* Print a single character either literally, or as a hex escape. */ | |
1768 | ||
1769 | static int pchar(pcre_uint32 c, FILE *f) | |
1770 | { | |
1771 | int n; | |
1772 | if (PRINTOK(c)) | |
1773 | { | |
1774 | if (f != NULL) fprintf(f, "%c", c); | |
1775 | return 1; | |
1776 | } | |
1777 | ||
1778 | if (c < 0x100) | |
1779 | { | |
1780 | if (use_utf) | |
1781 | { | |
1782 | if (f != NULL) fprintf(f, "\\x{%02x}", c); | |
1783 | return 6; | |
1784 | } | |
1785 | else | |
1786 | { | |
1787 | if (f != NULL) fprintf(f, "\\x%02x", c); | |
1788 | return 4; | |
1789 | } | |
1790 | } | |
1791 | ||
1792 | if (f != NULL) n = fprintf(f, "\\x{%02x}", c); | |
1793 | return n >= 0 ? n : 0; | |
1794 | } | |
1795 | ||
1796 | ||
1797 | ||
1798 | #ifdef SUPPORT_PCRE8 | |
1799 | /************************************************* | |
1800 | * Print 8-bit character string * | |
1801 | *************************************************/ | |
1802 | ||
1803 | /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed. | |
1804 | If handed a NULL file, just counts chars without printing. */ | |
1805 | ||
1806 | static int pchars(pcre_uint8 *p, int length, FILE *f) | |
1807 | { | |
1808 | pcre_uint32 c = 0; | |
1809 | int yield = 0; | |
1810 | ||
1811 | if (length < 0) | |
1812 | length = strlen((char *)p); | |
1813 | ||
1814 | while (length-- > 0) | |
1815 | { | |
1816 | #if !defined NOUTF | |
1817 | if (use_utf) | |
1818 | { | |
1819 | int rc = utf82ord(p, &c); | |
1820 | if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */ | |
1821 | { | |
1822 | length -= rc - 1; | |
1823 | p += rc; | |
1824 | yield += pchar(c, f); | |
1825 | continue; | |
1826 | } | |
1827 | } | |
1828 | #endif | |
1829 | c = *p++; | |
1830 | yield += pchar(c, f); | |
1831 | } | |
1832 | ||
1833 | return yield; | |
1834 | } | |
1835 | #endif | |
1836 | ||
1837 | ||
1838 | ||
1839 | #ifdef SUPPORT_PCRE16 | |
1840 | /************************************************* | |
1841 | * Find length of 0-terminated 16-bit string * | |
1842 | *************************************************/ | |
1843 | ||
1844 | static int strlen16(PCRE_SPTR16 p) | |
1845 | { | |
1846 | int len = 0; | |
1847 | while (*p++ != 0) len++; | |
1848 | return len; | |
1849 | } | |
1850 | #endif /* SUPPORT_PCRE16 */ | |
1851 | ||
1852 | ||
1853 | ||
1854 | #ifdef SUPPORT_PCRE32 | |
1855 | /************************************************* | |
1856 | * Find length of 0-terminated 32-bit string * | |
1857 | *************************************************/ | |
1858 | ||
1859 | static int strlen32(PCRE_SPTR32 p) | |
1860 | { | |
1861 | int len = 0; | |
1862 | while (*p++ != 0) len++; | |
1863 | return len; | |
1864 | } | |
1865 | #endif /* SUPPORT_PCRE32 */ | |
1866 | ||
1867 | ||
1868 | ||
1869 | #ifdef SUPPORT_PCRE16 | |
1870 | /************************************************* | |
1871 | * Print 16-bit character string * | |
1872 | *************************************************/ | |
1873 | ||
1874 | /* Must handle UTF-16 strings in utf mode. Yields number of characters printed. | |
1875 | If handed a NULL file, just counts chars without printing. */ | |
1876 | ||
1877 | static int pchars16(PCRE_SPTR16 p, int length, FILE *f) | |
1878 | { | |
1879 | int yield = 0; | |
1880 | ||
1881 | if (length < 0) | |
1882 | length = strlen16(p); | |
1883 | ||
1884 | while (length-- > 0) | |
1885 | { | |
1886 | pcre_uint32 c = *p++ & 0xffff; | |
1887 | #if !defined NOUTF | |
1888 | if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0) | |
1889 | { | |
1890 | int d = *p & 0xffff; | |
1891 | if (d >= 0xDC00 && d < 0xDFFF) | |
1892 | { | |
1893 | c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000; | |
1894 | length--; | |
1895 | p++; | |
1896 | } | |
1897 | } | |
1898 | #endif | |
1899 | yield += pchar(c, f); | |
1900 | } | |
1901 | ||
1902 | return yield; | |
1903 | } | |
1904 | #endif /* SUPPORT_PCRE16 */ | |
1905 | ||
1906 | ||
1907 | ||
1908 | #ifdef SUPPORT_PCRE32 | |
1909 | /************************************************* | |
1910 | * Print 32-bit character string * | |
1911 | *************************************************/ | |
1912 | ||
1913 | /* Must handle UTF-32 strings in utf mode. Yields number of characters printed. | |
1914 | If handed a NULL file, just counts chars without printing. */ | |
1915 | ||
1916 | #define UTF32_MASK (0x1fffffu) | |
1917 | ||
1918 | static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f) | |
1919 | { | |
1920 | int yield = 0; | |
1921 | ||
1922 | if (length < 0) | |
1923 | length = strlen32(p); | |
1924 | ||
1925 | while (length-- > 0) | |
1926 | { | |
1927 | pcre_uint32 c = *p++; | |
1928 | if (utf) c &= UTF32_MASK; | |
1929 | yield += pchar(c, f); | |
1930 | } | |
1931 | ||
1932 | return yield; | |
1933 | } | |
1934 | #endif /* SUPPORT_PCRE32 */ | |
1935 | ||
1936 | ||
1937 | ||
1938 | #ifdef SUPPORT_PCRE8 | |
1939 | /************************************************* | |
1940 | * Read a capture name (8-bit) and check it * | |
1941 | *************************************************/ | |
1942 | ||
1943 | static pcre_uint8 * | |
1944 | read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re) | |
1945 | { | |
1946 | pcre_uint8 *npp = *pp; | |
1947 | while (isalnum(*p)) *npp++ = *p++; | |
1948 | *npp++ = 0; | |
1949 | *npp = 0; | |
1950 | if (pcre_get_stringnumber(re, (char *)(*pp)) < 0) | |
1951 | { | |
1952 | fprintf(outfile, "no parentheses with name \""); | |
1953 | PCHARSV(*pp, 0, -1, outfile); | |
1954 | fprintf(outfile, "\"\n"); | |
1955 | } | |
1956 | ||
1957 | *pp = npp; | |
1958 | return p; | |
1959 | } | |
1960 | #endif /* SUPPORT_PCRE8 */ | |
1961 | ||
1962 | ||
1963 | ||
1964 | #ifdef SUPPORT_PCRE16 | |
1965 | /************************************************* | |
1966 | * Read a capture name (16-bit) and check it * | |
1967 | *************************************************/ | |
1968 | ||
1969 | /* Note that the text being read is 8-bit. */ | |
1970 | ||
1971 | static pcre_uint8 * | |
1972 | read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re) | |
1973 | { | |
1974 | pcre_uint16 *npp = *pp; | |
1975 | while (isalnum(*p)) *npp++ = *p++; | |
1976 | *npp++ = 0; | |
1977 | *npp = 0; | |
1978 | if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0) | |
1979 | { | |
1980 | fprintf(outfile, "no parentheses with name \""); | |
1981 | PCHARSV(*pp, 0, -1, outfile); | |
1982 | fprintf(outfile, "\"\n"); | |
1983 | } | |
1984 | *pp = npp; | |
1985 | return p; | |
1986 | } | |
1987 | #endif /* SUPPORT_PCRE16 */ | |
1988 | ||
1989 | ||
1990 | ||
1991 | #ifdef SUPPORT_PCRE32 | |
1992 | /************************************************* | |
1993 | * Read a capture name (32-bit) and check it * | |
1994 | *************************************************/ | |
1995 | ||
1996 | /* Note that the text being read is 8-bit. */ | |
1997 | ||
1998 | static pcre_uint8 * | |
1999 | read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re) | |
2000 | { | |
2001 | pcre_uint32 *npp = *pp; | |
2002 | while (isalnum(*p)) *npp++ = *p++; | |
2003 | *npp++ = 0; | |
2004 | *npp = 0; | |
2005 | if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0) | |
2006 | { | |
2007 | fprintf(outfile, "no parentheses with name \""); | |
2008 | PCHARSV(*pp, 0, -1, outfile); | |
2009 | fprintf(outfile, "\"\n"); | |
2010 | } | |
2011 | *pp = npp; | |
2012 | return p; | |
2013 | } | |
2014 | #endif /* SUPPORT_PCRE32 */ | |
2015 | ||
2016 | ||
2017 | ||
2018 | /************************************************* | |
2019 | * Callout function * | |
2020 | *************************************************/ | |
2021 | ||
2022 | /* Called from PCRE as a result of the (?C) item. We print out where we are in | |
2023 | the match. Yield zero unless more callouts than the fail count, or the callout | |
2024 | data is not zero. */ | |
2025 | ||
2026 | static int callout(pcre_callout_block *cb) | |
2027 | { | |
2028 | FILE *f = (first_callout | callout_extra)? outfile : NULL; | |
2029 | int i, pre_start, post_start, subject_length; | |
2030 | ||
2031 | if (callout_extra) | |
2032 | { | |
2033 | fprintf(f, "Callout %d: last capture = %d\n", | |
2034 | cb->callout_number, cb->capture_last); | |
2035 | ||
2036 | for (i = 0; i < cb->capture_top * 2; i += 2) | |
2037 | { | |
2038 | if (cb->offset_vector[i] < 0) | |
2039 | fprintf(f, "%2d: <unset>\n", i/2); | |
2040 | else | |
2041 | { | |
2042 | fprintf(f, "%2d: ", i/2); | |
2043 | PCHARSV(cb->subject, cb->offset_vector[i], | |
2044 | cb->offset_vector[i+1] - cb->offset_vector[i], f); | |
2045 | fprintf(f, "\n"); | |
2046 | } | |
2047 | } | |
2048 | } | |
2049 | ||
2050 | /* Re-print the subject in canonical form, the first time or if giving full | |
2051 | datails. On subsequent calls in the same match, we use pchars just to find the | |
2052 | printed lengths of the substrings. */ | |
2053 | ||
2054 | if (f != NULL) fprintf(f, "--->"); | |
2055 | ||
2056 | PCHARS(pre_start, cb->subject, 0, cb->start_match, f); | |
2057 | PCHARS(post_start, cb->subject, cb->start_match, | |
2058 | cb->current_position - cb->start_match, f); | |
2059 | ||
2060 | PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL); | |
2061 | ||
2062 | PCHARSV(cb->subject, cb->current_position, | |
2063 | cb->subject_length - cb->current_position, f); | |
2064 | ||
2065 | if (f != NULL) fprintf(f, "\n"); | |
2066 | ||
2067 | /* Always print appropriate indicators, with callout number if not already | |
2068 | shown. For automatic callouts, show the pattern offset. */ | |
2069 | ||
2070 | if (cb->callout_number == 255) | |
2071 | { | |
2072 | fprintf(outfile, "%+3d ", cb->pattern_position); | |
2073 | if (cb->pattern_position > 99) fprintf(outfile, "\n "); | |
2074 | } | |
2075 | else | |
2076 | { | |
2077 | if (callout_extra) fprintf(outfile, " "); | |
2078 | else fprintf(outfile, "%3d ", cb->callout_number); | |
2079 | } | |
2080 | ||
2081 | for (i = 0; i < pre_start; i++) fprintf(outfile, " "); | |
2082 | fprintf(outfile, "^"); | |
2083 | ||
2084 | if (post_start > 0) | |
2085 | { | |
2086 | for (i = 0; i < post_start - 1; i++) fprintf(outfile, " "); | |
2087 | fprintf(outfile, "^"); | |
2088 | } | |
2089 | ||
2090 | for (i = 0; i < subject_length - pre_start - post_start + 4; i++) | |
2091 | fprintf(outfile, " "); | |
2092 | ||
2093 | fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length, | |
2094 | pbuffer + cb->pattern_position); | |
2095 | ||
2096 | fprintf(outfile, "\n"); | |
2097 | first_callout = 0; | |
2098 | ||
2099 | if (cb->mark != last_callout_mark) | |
2100 | { | |
2101 | if (cb->mark == NULL) | |
2102 | fprintf(outfile, "Latest Mark: <unset>\n"); | |
2103 | else | |
2104 | { | |
2105 | fprintf(outfile, "Latest Mark: "); | |
2106 | PCHARSV(cb->mark, 0, -1, outfile); | |
2107 | putc('\n', outfile); | |
2108 | } | |
2109 | last_callout_mark = cb->mark; | |
2110 | } | |
2111 | ||
2112 | if (cb->callout_data != NULL) | |
2113 | { | |
2114 | int callout_data = *((int *)(cb->callout_data)); | |
2115 | if (callout_data != 0) | |
2116 | { | |
2117 | fprintf(outfile, "Callout data = %d\n", callout_data); | |
2118 | return callout_data; | |
2119 | } | |
2120 | } | |
2121 | ||
2122 | return (cb->callout_number != callout_fail_id)? 0 : | |
2123 | (++callout_count >= callout_fail_count)? 1 : 0; | |
2124 | } | |
2125 | ||
2126 | ||
2127 | /************************************************* | |
2128 | * Local malloc functions * | |
2129 | *************************************************/ | |
2130 | ||
2131 | /* Alternative malloc function, to test functionality and save the size of a | |
2132 | compiled re, which is the first store request that pcre_compile() makes. The | |
2133 | show_malloc variable is set only during matching. */ | |
2134 | ||
2135 | static void *new_malloc(size_t size) | |
2136 | { | |
2137 | void *block = malloc(size); | |
2138 | gotten_store = size; | gotten_store = size; |
2139 | if (first_gotten_store == 0) first_gotten_store = size; | |
2140 | if (show_malloc) | if (show_malloc) |
2141 | fprintf(outfile, "malloc %3d %p\n", (int)size, block); | fprintf(outfile, "malloc %3d %p\n", (int)size, block); |
2142 | return block; | return block; |
# | Line 560 if (show_malloc) | Line 2149 if (show_malloc) |
2149 | free(block); | free(block); |
2150 | } | } |
2151 | ||
2152 | /* For recursion malloc/free, to test stacking calls */ | /* For recursion malloc/free, to test stacking calls */ |
2153 | ||
2154 | static void *stack_malloc(size_t size) | static void *stack_malloc(size_t size) |
# | Line 583 free(block); | Line 2171 free(block); |
2171 | * Call pcre_fullinfo() * | * Call pcre_fullinfo() * |
2172 | *************************************************/ | *************************************************/ |
2173 | ||
2174 | /* Get one piece of information from the pcre_fullinfo() function */ | /* Get one piece of information from the pcre_fullinfo() function. When only |
2175 | one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct | |
2176 | value, but the code is defensive. | |
2177 | ||
2178 | Arguments: | |
2179 | re compiled regex | |
2180 | study study data | |
2181 | option PCRE_INFO_xxx option | |
2182 | ptr where to put the data | |
2183 | ||
2184 | Returns: 0 when OK, < 0 on error | |
2185 | */ | |
2186 | ||
2187 | static int | |
2188 | new_info(pcre *re, pcre_extra *study, int option, void *ptr) | |
2189 | { | |
2190 | int rc; | |
2191 | ||
2192 | if (pcre_mode == PCRE32_MODE) | |
2193 | #ifdef SUPPORT_PCRE32 | |
2194 | rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr); | |
2195 | #else | |
2196 | rc = PCRE_ERROR_BADMODE; | |
2197 | #endif | |
2198 | else if (pcre_mode == PCRE16_MODE) | |
2199 | #ifdef SUPPORT_PCRE16 | |
2200 | rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr); | |
2201 | #else | |
2202 | rc = PCRE_ERROR_BADMODE; | |
2203 | #endif | |
2204 | else | |
2205 | #ifdef SUPPORT_PCRE8 | |
2206 | rc = pcre_fullinfo(re, study, option, ptr); | |
2207 | #else | |
2208 | rc = PCRE_ERROR_BADMODE; | |
2209 | #endif | |
2210 | ||
2211 | if (rc < 0) | |
2212 | { | |
2213 | fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc, | |
2214 | pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option); | |
2215 | if (rc == PCRE_ERROR_BADMODE) | |
2216 | fprintf(outfile, "Running in %d-bit mode but pattern was compiled in " | |
2217 | "%d-bit mode\n", 8 * CHAR_SIZE, | |
2218 | 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK)); | |
2219 | } | |
2220 | ||
2221 | return rc; | |
2222 | } | |
2223 | ||
2224 | ||
2225 | ||
2226 | /************************************************* | |
2227 | * Swap byte functions * | |
2228 | *************************************************/ | |
2229 | ||
2230 | /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32 | |
2231 | value, respectively. | |
2232 | ||
2233 | Arguments: | |
2234 | value any number | |
2235 | ||
2236 | Returns: the byte swapped value | |
2237 | */ | |
2238 | ||
2239 | static pcre_uint32 | |
2240 | swap_uint32(pcre_uint32 value) | |
2241 | { | |
2242 | return ((value & 0x000000ff) << 24) | | |
2243 | ((value & 0x0000ff00) << 8) | | |
2244 | ((value & 0x00ff0000) >> 8) | | |
2245 | (value >> 24); | |
2246 | } | |
2247 | ||
2248 | static pcre_uint16 | |
2249 | swap_uint16(pcre_uint16 value) | |
2250 | { | |
2251 | return (value >> 8) | (value << 8); | |
2252 | } | |
2253 | ||
2254 | ||
2255 | ||
2256 | /************************************************* | |
2257 | * Flip bytes in a compiled pattern * | |
2258 | *************************************************/ | |
2259 | ||
2260 | /* This function is called if the 'F' option was present on a pattern that is | |
2261 | to be written to a file. We flip the bytes of all the integer fields in the | |
2262 | regex data block and the study block. In 16-bit mode this also flips relevant | |
2263 | bytes in the pattern itself. This is to make it possible to test PCRE's | |
2264 | ability to reload byte-flipped patterns, e.g. those compiled on a different | |
2265 | architecture. */ | |
2266 | ||
2267 | #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16 | |
2268 | static void | |
2269 | regexflip8_or_16(pcre *ere, pcre_extra *extra) | |
2270 | { | |
2271 | real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere; | |
2272 | #ifdef SUPPORT_PCRE16 | |
2273 | int op; | |
2274 | pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset; | |
2275 | int length = re->name_count * re->name_entry_size; | |
2276 | #ifdef SUPPORT_UTF | |
2277 | BOOL utf = (re->options & PCRE_UTF16) != 0; | |
2278 | BOOL utf16_char = FALSE; | |
2279 | #endif /* SUPPORT_UTF */ | |
2280 | #endif /* SUPPORT_PCRE16 */ | |
2281 | ||
2282 | /* Always flip the bytes in the main data block and study blocks. */ | |
2283 | ||
2284 | re->magic_number = REVERSED_MAGIC_NUMBER; | |
2285 | re->size = swap_uint32(re->size); | |
2286 | re->options = swap_uint32(re->options); | |
2287 | re->flags = swap_uint16(re->flags); | |
2288 | re->top_bracket = swap_uint16(re->top_bracket); | |
2289 | re->top_backref = swap_uint16(re->top_backref); | |
2290 | re->first_char = swap_uint16(re->first_char); | |
2291 | re->req_char = swap_uint16(re->req_char); | |
2292 | re->name_table_offset = swap_uint16(re->name_table_offset); | |
2293 | re->name_entry_size = swap_uint16(re->name_entry_size); | |
2294 | re->name_count = swap_uint16(re->name_count); | |
2295 | ||
2296 | if (extra != NULL) | |
2297 | { | |
2298 | pcre_study_data *rsd = (pcre_study_data *)(extra->study_data); | |
2299 | rsd->size = swap_uint32(rsd->size); | |
2300 | rsd->flags = swap_uint32(rsd->flags); | |
2301 | rsd->minlength = swap_uint32(rsd->minlength); | |
2302 | } | |
2303 | ||
2304 | /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes | |
2305 | in the name table, if present, and then in the pattern itself. */ | |
2306 | ||
2307 | #ifdef SUPPORT_PCRE16 | |
2308 | if (pcre_mode != PCRE16_MODE) return; | |
2309 | ||
2310 | while(TRUE) | |
2311 | { | |
2312 | /* Swap previous characters. */ | |
2313 | while (length-- > 0) | |
2314 | { | |
2315 | *ptr = swap_uint16(*ptr); | |
2316 | ptr++; | |
2317 | } | |
2318 | #ifdef SUPPORT_UTF | |
2319 | if (utf16_char) | |
2320 | { | |
2321 | if ((ptr[-1] & 0xfc00) == 0xd800) | |
2322 | { | |
2323 | /* We know that there is only one extra character in UTF-16. */ | |
2324 | *ptr = swap_uint16(*ptr); | |
2325 | ptr++; | |
2326 | } | |
2327 | } | |
2328 | utf16_char = FALSE; | |
2329 | #endif /* SUPPORT_UTF */ | |
2330 | ||
2331 | /* Get next opcode. */ | |
2332 | ||
2333 | length = 0; | |
2334 | op = *ptr; | |
2335 | *ptr++ = swap_uint16(op); | |
2336 | ||
2337 | switch (op) | |
2338 | { | |
2339 | case OP_END: | |
2340 | return; | |
2341 | ||
2342 | #ifdef SUPPORT_UTF | |
2343 | case OP_CHAR: | |
2344 | case OP_CHARI: | |
2345 | case OP_NOT: | |
2346 | case OP_NOTI: | |
2347 | case OP_STAR: | |
2348 | case OP_MINSTAR: | |
2349 | case OP_PLUS: | |
2350 | case OP_MINPLUS: | |
2351 | case OP_QUERY: | |
2352 | case OP_MINQUERY: | |
2353 | case OP_UPTO: | |
2354 | case OP_MINUPTO: | |
2355 | case OP_EXACT: | |
2356 | case OP_POSSTAR: | |
2357 | case OP_POSPLUS: | |
2358 | case OP_POSQUERY: | |
2359 | case OP_POSUPTO: | |
2360 | case OP_STARI: | |
2361 | case OP_MINSTARI: | |
2362 | case OP_PLUSI: | |
2363 | case OP_MINPLUSI: | |
2364 | case OP_QUERYI: | |
2365 | case OP_MINQUERYI: | |
2366 | case OP_UPTOI: | |
2367 | case OP_MINUPTOI: | |
2368 | case OP_EXACTI: | |
2369 | case OP_POSSTARI: | |
2370 | case OP_POSPLUSI: | |
2371 | case OP_POSQUERYI: | |
2372 | case OP_POSUPTOI: | |
2373 | case OP_NOTSTAR: | |
2374 | case OP_NOTMINSTAR: | |
2375 | case OP_NOTPLUS: | |
2376 | case OP_NOTMINPLUS: | |
2377 | case OP_NOTQUERY: | |
2378 | case OP_NOTMINQUERY: | |
2379 | case OP_NOTUPTO: | |
2380 | case OP_NOTMINUPTO: | |
2381 | case OP_NOTEXACT: | |
2382 | case OP_NOTPOSSTAR: | |
2383 | case OP_NOTPOSPLUS: | |
2384 | case OP_NOTPOSQUERY: | |
2385 | case OP_NOTPOSUPTO: | |
2386 | case OP_NOTSTARI: | |
2387 | case OP_NOTMINSTARI: | |
2388 | case OP_NOTPLUSI: | |
2389 | case OP_NOTMINPLUSI: | |
2390 | case OP_NOTQUERYI: | |
2391 | case OP_NOTMINQUERYI: | |
2392 | case OP_NOTUPTOI: | |
2393 | case OP_NOTMINUPTOI: | |
2394 | case OP_NOTEXACTI: | |
2395 | case OP_NOTPOSSTARI: | |
2396 | case OP_NOTPOSPLUSI: | |
2397 | case OP_NOTPOSQUERYI: | |
2398 | case OP_NOTPOSUPTOI: | |
2399 | if (utf) utf16_char = TRUE; | |
2400 | #endif | |
2401 | /* Fall through. */ | |
2402 | ||
2403 | default: | |
2404 | length = OP_lengths16[op] - 1; | |
2405 | break; | |
2406 | ||
2407 | case OP_CLASS: | |
2408 | case OP_NCLASS: | |
2409 | /* Skip the character bit map. */ | |
2410 | ptr += 32/sizeof(pcre_uint16); | |
2411 | length = 0; | |
2412 | break; | |
2413 | ||
2414 | case OP_XCLASS: | |
2415 | /* LINK_SIZE can be 1 or 2 in 16 bit mode. */ | |
2416 | if (LINK_SIZE > 1) | |
2417 | length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1])) | |
2418 | - (1 + LINK_SIZE + 1)); | |
2419 | else | |
2420 | length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1)); | |
2421 | ||
2422 | /* Reverse the size of the XCLASS instance. */ | |
2423 | *ptr = swap_uint16(*ptr); | |
2424 | ptr++; | |
2425 | if (LINK_SIZE > 1) | |
2426 | { | |
2427 | *ptr = swap_uint16(*ptr); | |
2428 | ptr++; | |
2429 | } | |
2430 | ||
2431 | op = *ptr; | |
2432 | *ptr = swap_uint16(op); | |
2433 | ptr++; | |
2434 | if ((op & XCL_MAP) != 0) | |
2435 | { | |
2436 | /* Skip the character bit map. */ | |
2437 | ptr += 32/sizeof(pcre_uint16); | |
2438 | length -= 32/sizeof(pcre_uint16); | |
2439 | } | |
2440 | break; | |
2441 | } | |
2442 | } | |
2443 | /* Control should never reach here in 16 bit mode. */ | |
2444 | #endif /* SUPPORT_PCRE16 */ | |
2445 | } | |
2446 | #endif /* SUPPORT_PCRE[8|16] */ | |
2447 | ||
2448 | ||
2449 | ||
2450 | #if defined SUPPORT_PCRE32 | |
2451 | static void | |
2452 | regexflip_32(pcre *ere, pcre_extra *extra) | |
2453 | { | |
2454 | real_pcre32 *re = (real_pcre32 *)ere; | |
2455 | int op; | |
2456 | pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset; | |
2457 | int length = re->name_count * re->name_entry_size; | |
2458 | #ifdef SUPPORT_UTF | |
2459 | BOOL utf = (re->options & PCRE_UTF32) != 0; | |
2460 | #endif /* SUPPORT_UTF */ | |
2461 | ||
2462 | /* Always flip the bytes in the main data block and study blocks. */ | |
2463 | ||
2464 | re->magic_number = REVERSED_MAGIC_NUMBER; | |
2465 | re->size = swap_uint32(re->size); | |
2466 | re->options = swap_uint32(re->options); | |
2467 | re->flags = swap_uint16(re->flags); | |
2468 | re->top_bracket = swap_uint16(re->top_bracket); | |
2469 | re->top_backref = swap_uint16(re->top_backref); | |
2470 | re->first_char = swap_uint32(re->first_char); | |
2471 | re->req_char = swap_uint32(re->req_char); | |
2472 | re->name_table_offset = swap_uint16(re->name_table_offset); | |
2473 | re->name_entry_size = swap_uint16(re->name_entry_size); | |
2474 | re->name_count = swap_uint16(re->name_count); | |
2475 | ||
2476 | if (extra != NULL) | |
2477 | { | |
2478 | pcre_study_data *rsd = (pcre_study_data *)(extra->study_data); | |
2479 | rsd->size = swap_uint32(rsd->size); | |
2480 | rsd->flags = swap_uint32(rsd->flags); | |
2481 | rsd->minlength = swap_uint32(rsd->minlength); | |
2482 | } | |
2483 | ||
2484 | /* In 32-bit mode we must swap bytes | |
2485 | in the name table, if present, and then in the pattern itself. */ | |
2486 | ||
2487 | while(TRUE) | |
2488 | { | |
2489 | /* Swap previous characters. */ | |
2490 | while (length-- > 0) | |
2491 | { | |
2492 | *ptr = swap_uint32(*ptr); | |
2493 | ptr++; | |
2494 | } | |
2495 | ||
2496 | /* Get next opcode. */ | |
2497 | ||
2498 | length = 0; | |
2499 | op = *ptr; | |
2500 | *ptr++ = swap_uint32(op); | |
2501 | ||
2502 | static void new_info(pcre *re, pcre_extra *study, int option, void *ptr) | switch (op) |
2503 | { | { |
2504 | int rc; | case OP_END: |
2505 | if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0) | return; |
2506 | fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option); | |
2507 | default: | |
2508 | length = OP_lengths32[op] - 1; | |
2509 | break; | |
2510 | ||
2511 | case OP_CLASS: | |
2512 | case OP_NCLASS: | |
2513 | /* Skip the character bit map. */ | |
2514 | ptr += 32/sizeof(pcre_uint32); | |
2515 | length = 0; | |
2516 | break; | |
2517 | ||
2518 | case OP_XCLASS: | |
2519 | /* LINK_SIZE can only be 1 in 32-bit mode. */ | |
2520 | length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1)); | |
2521 | ||
2522 | /* Reverse the size of the XCLASS instance. */ | |
2523 | *ptr = swap_uint32(*ptr); | |
2524 | ptr++; | |
2525 | ||
2526 | op = *ptr; | |
2527 | *ptr = swap_uint32(op); | |
2528 | ptr++; | |
2529 | if ((op & XCL_MAP) != 0) | |
2530 | { | |
2531 | /* Skip the character bit map. */ | |
2532 | ptr += 32/sizeof(pcre_uint32); | |
2533 | length -= 32/sizeof(pcre_uint32); | |
2534 | } | |
2535 | break; | |
2536 | } | |
2537 | } | |
2538 | /* Control should never reach here in 32 bit mode. */ | |
2539 | } | } |
2540 | ||
2541 | #endif /* SUPPORT_PCRE32 */ | |
2542 | ||
2543 | ||
/************************************************* | ||
* Byte flipping function * | ||
*************************************************/ | ||
2544 | ||
2545 | static unsigned long int | static void |
2546 | byteflip(unsigned long int value, int n) | regexflip(pcre *ere, pcre_extra *extra) |
2547 | { | { |
2548 | if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8); | #if defined SUPPORT_PCRE32 |
2549 | return ((value & 0x000000ff) << 24) | | if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32) |
2550 | ((value & 0x0000ff00) << 8) | | regexflip_32(ere, extra); |
2551 | ((value & 0x00ff0000) >> 8) | | #endif |
2552 | ((value & 0xff000000) >> 24); | #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16 |
2553 | if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16)) | |
2554 | regexflip8_or_16(ere, extra); | |
2555 | #endif | |
2556 | } | } |
2557 | ||
2558 | ||
2559 | ||
2560 | /************************************************* | /************************************************* |
2561 | * Check match or recursion limit * | * Check match or recursion limit * |
2562 | *************************************************/ | *************************************************/ |
2563 | ||
2564 | static int | static int |
2565 | check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len, | check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len, |
2566 | int start_offset, int options, int *use_offsets, int use_size_offsets, | int start_offset, int options, int *use_offsets, int use_size_offsets, |
2567 | int flag, unsigned long int *limit, int errnumber, const char *msg) | int flag, unsigned long int *limit, int errnumber, const char *msg) |
2568 | { | { |
# | Line 631 for (;;) | Line 2577 for (;;) |
2577 | { | { |
2578 | *limit = mid; | *limit = mid; |
2579 | ||
2580 | count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, | PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, |
2581 | use_offsets, use_size_offsets); | use_offsets, use_size_offsets); |
2582 | ||
2583 | if (count == errnumber) | if (count == errnumber) |
# | Line 663 return count; | Line 2609 return count; |
2609 | ||
2610 | ||
2611 | /************************************************* | /************************************************* |
2612 | * Case-independent strncmp() function * | |
2613 | *************************************************/ | |
2614 | ||
2615 | /* | |
2616 | Arguments: | |
2617 | s first string | |
2618 | t second string | |
2619 | n number of characters to compare | |
2620 | ||
2621 | Returns: < 0, = 0, or > 0, according to the comparison | |
2622 | */ | |
2623 | ||
2624 | static int | |
2625 | strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n) | |
2626 | { | |
2627 | while (n--) | |
2628 | { | |
2629 | int c = tolower(*s++) - tolower(*t++); | |
2630 | if (c) return c; | |
2631 | } | |
2632 | return 0; | |
2633 | } | |
2634 | ||
2635 | ||
2636 | ||
2637 | /************************************************* | |
2638 | * Check newline indicator * | * Check newline indicator * |
2639 | *************************************************/ | *************************************************/ |
2640 | ||
2641 | /* This is used both at compile and run-time to check for <xxx> escapes, where | /* This is used both at compile and run-time to check for <xxx> escapes. Print |
2642 | xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is | a message and return 0 if there is no match. |
no match. | ||
2643 | ||
2644 | Arguments: | Arguments: |
2645 | p points after the leading '<' | p points after the leading '<' |
# | Line 678 Returns: appropriate PCRE_NEWLINE_x | Line 2649 Returns: appropriate PCRE_NEWLINE_x |
2649 | */ | */ |
2650 | ||
2651 | static int | static int |
2652 | check_newline(uschar *p, FILE *f) | check_newline(pcre_uint8 *p, FILE *f) |
2653 | { | { |
2654 | if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR; | if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR; |
2655 | if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF; | if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF; |
2656 | if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF; | if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF; |
2657 | if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF; | if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF; |
2658 | if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY; | if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY; |
2659 | if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF; | |
2660 | if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE; | |
2661 | fprintf(f, "Unknown newline type at: <%s\n", p); | fprintf(f, "Unknown newline type at: <%s\n", p); |
2662 | return 0; | return 0; |
2663 | } | } |
# | Line 698 return 0; | Line 2671 return 0; |
2671 | static void | static void |
2672 | usage(void) | usage(void) |
2673 | { | { |
2674 | printf("Usage: pcretest [options] [<input> [<output>]]\n"); | printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n"); |
2675 | printf(" -b show compiled code (bytecode)\n"); | printf("Input and output default to stdin and stdout.\n"); |
2676 | #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) | |
2677 | printf("If input is a terminal, readline() is used to read from it.\n"); | |
2678 | #else | |
2679 | printf("This version of pcretest is not linked with readline().\n"); | |
2680 | #endif | |
2681 | printf("\nOptions:\n"); | |
2682 | #ifdef SUPPORT_PCRE16 | |
2683 | printf(" -16 use the 16-bit library\n"); | |
2684 | #endif | |
2685 | #ifdef SUPPORT_PCRE32 | |
2686 | printf(" -32 use the 32-bit library\n"); | |
2687 | #endif | |
2688 | printf(" -b show compiled code\n"); | |
2689 | printf(" -C show PCRE compile-time options and exit\n"); | printf(" -C show PCRE compile-time options and exit\n"); |
2690 | printf(" -C arg show a specific compile-time option\n"); | |
2691 | printf(" and exit with its value. The arg can be:\n"); | |
2692 | printf(" linksize internal link size [2, 3, 4]\n"); | |
2693 | printf(" pcre8 8 bit library support enabled [0, 1]\n"); | |
2694 | printf(" pcre16 16 bit library support enabled [0, 1]\n"); | |
2695 | printf(" pcre32 32 bit library support enabled [0, 1]\n"); | |
2696 | printf(" utf Unicode Transformation Format supported [0, 1]\n"); | |
2697 | printf(" ucp Unicode Properties supported [0, 1]\n"); | |
2698 | printf(" jit Just-in-time compiler supported [0, 1]\n"); | |
2699 | printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n"); | |
2700 | printf(" -d debug: show compiled code and information (-b and -i)\n"); | printf(" -d debug: show compiled code and information (-b and -i)\n"); |
2701 | #if !defined NODFA | #if !defined NODFA |
2702 | printf(" -dfa force DFA matching for all subjects\n"); | printf(" -dfa force DFA matching for all subjects\n"); |
2703 | #endif | #endif |
2704 | printf(" -help show usage information\n"); | printf(" -help show usage information\n"); |
2705 | printf(" -i show information about compiled patterns\n" | printf(" -i show information about compiled patterns\n" |
2706 | " -M find MATCH_LIMIT minimum for each subject\n" | |
2707 | " -m output memory used information\n" | " -m output memory used information\n" |
2708 | " -o <n> set size of offsets vector to <n>\n"); | " -o <n> set size of offsets vector to <n>\n"); |
2709 | #if !defined NOPOSIX | #if !defined NOPOSIX |
# | Line 714 printf(" -p use POSIX interface\n | Line 2711 printf(" -p use POSIX interface\n |
2711 | #endif | #endif |
2712 | printf(" -q quiet: do not output PCRE version number at start\n"); | printf(" -q quiet: do not output PCRE version number at start\n"); |
2713 | printf(" -S <n> set stack size to <n> megabytes\n"); | printf(" -S <n> set stack size to <n> megabytes\n"); |
2714 | printf(" -s output store (memory) used information\n" | printf(" -s force each pattern to be studied at basic level\n" |
2715 | " -s+ force each pattern to be studied, using JIT if available\n" | |
2716 | " -s++ ditto, verifying when JIT was actually used\n" | |
2717 | " -s+n force each pattern to be studied, using JIT if available,\n" | |
2718 | " where 1 <= n <= 7 selects JIT options\n" | |
2719 | " -s++n ditto, verifying when JIT was actually used\n" | |
2720 | " -t time compilation and execution\n"); | " -t time compilation and execution\n"); |
2721 | printf(" -t <n> time compilation and execution, repeating <n> times\n"); | printf(" -t <n> time compilation and execution, repeating <n> times\n"); |
2722 | printf(" -tm time execution (matching) only\n"); | printf(" -tm time execution (matching) only\n"); |
# | Line 734 options, followed by a set of test data, | Line 2736 options, followed by a set of test data, |
2736 | int main(int argc, char **argv) | int main(int argc, char **argv) |
2737 | { | { |
2738 | FILE *infile = stdin; | FILE *infile = stdin; |
2739 | const char *version; | |
2740 | int options = 0; | int options = 0; |
2741 | int study_options = 0; | int study_options = 0; |
2742 | int default_find_match_limit = FALSE; | |
2743 | int op = 1; | int op = 1; |
2744 | int timeit = 0; | int timeit = 0; |
2745 | int timeitm = 0; | int timeitm = 0; |
2746 | int showinfo = 0; | int showinfo = 0; |
2747 | int showstore = 0; | int showstore = 0; |
2748 | int force_study = -1; | |
2749 | int force_study_options = 0; | |
2750 | int quiet = 0; | int quiet = 0; |
2751 | int size_offsets = 45; | int size_offsets = 45; |
2752 | int size_offsets_max; | int size_offsets_max; |
2753 | int *offsets = NULL; | int *offsets = NULL; |
#if !defined NOPOSIX | ||
int posix = 0; | ||
#endif | ||
2754 | int debug = 0; | int debug = 0; |
2755 | int done = 0; | int done = 0; |
2756 | int all_use_dfa = 0; | int all_use_dfa = 0; |
2757 | int verify_jit = 0; | |
2758 | int yield = 0; | int yield = 0; |
2759 | int mask_utf32 = 0; | |
2760 | int stack_size; | int stack_size; |
2761 | pcre_uint8 *dbuffer = NULL; | |
2762 | size_t dbuffer_size = 1u << 14; | |
2763 | ||
2764 | /* These vectors store, end-to-end, a list of captured substring names. Assume | #if !defined NOPOSIX |
2765 | that 1024 is plenty long enough for the few names we'll be testing. */ | int posix = 0; |
2766 | #endif | |
2767 | #if !defined NODFA | |
2768 | int *dfa_workspace = NULL; | |
2769 | #endif | |
2770 | ||
2771 | pcre_jit_stack *jit_stack = NULL; | |
2772 | ||
2773 | /* These vectors store, end-to-end, a list of zero-terminated captured | |
2774 | substring names, each list itself being terminated by an empty name. Assume | |
2775 | that 1024 is plenty long enough for the few names we'll be testing. It is | |
2776 | easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version | |
2777 | for the actual memory, to ensure alignment. */ | |
2778 | ||
2779 | pcre_uint32 copynames[1024]; | |
2780 | pcre_uint32 getnames[1024]; | |
2781 | ||
2782 | #ifdef SUPPORT_PCRE32 | |
2783 | pcre_uint32 *cn32ptr; | |
2784 | pcre_uint32 *gn32ptr; | |
2785 | #endif | |
2786 | ||
2787 | uschar copynames[1024]; | #ifdef SUPPORT_PCRE16 |
2788 | uschar getnames[1024]; | pcre_uint16 *copynames16 = (pcre_uint16 *)copynames; |
2789 | pcre_uint16 *getnames16 = (pcre_uint16 *)getnames; | |
2790 | pcre_uint16 *cn16ptr; | |
2791 | pcre_uint16 *gn16ptr; | |
2792 | #endif | |
2793 | ||
2794 | uschar *copynamesptr; | #ifdef SUPPORT_PCRE8 |
2795 | uschar *getnamesptr; | pcre_uint8 *copynames8 = (pcre_uint8 *)copynames; |
2796 | pcre_uint8 *getnames8 = (pcre_uint8 *)getnames; | |
2797 | pcre_uint8 *cn8ptr; | |
2798 | pcre_uint8 *gn8ptr; | |
2799 | #endif | |
2800 | ||
2801 | /* Get buffers from malloc() so that Electric Fence will check their misuse | /* Get buffers from malloc() so that valgrind will check their misuse when |
2802 | when I am debugging. They grow automatically when very long lines are read. */ | debugging. They grow automatically when very long lines are read. The 16- |
2803 | and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */ | |
2804 | ||
2805 | buffer = (unsigned char *)malloc(buffer_size); | buffer = (pcre_uint8 *)malloc(buffer_size); |
2806 | dbuffer = (unsigned char *)malloc(buffer_size); | pbuffer = (pcre_uint8 *)malloc(buffer_size); |
pbuffer = (unsigned char *)malloc(buffer_size); | ||
2807 | ||
2808 | /* The outfile variable is static so that new_malloc can use it. */ | /* The outfile variable is static so that new_malloc can use it. */ |
2809 | ||
# | Line 783 it set 0x8000, but then I was advised th | Line 2818 it set 0x8000, but then I was advised th |
2818 | _setmode( _fileno( stdout ), _O_BINARY ); | _setmode( _fileno( stdout ), _O_BINARY ); |
2819 | #endif | #endif |
2820 | ||
2821 | /* Get the version number: both pcre_version() and pcre16_version() give the | |
2822 | same answer. We just need to ensure that we call one that is available. */ | |
2823 | ||
2824 | #if defined SUPPORT_PCRE8 | |
2825 | version = pcre_version(); | |
2826 | #elif defined SUPPORT_PCRE16 | |
2827 | version = pcre16_version(); | |
2828 | #elif defined SUPPORT_PCRE32 | |
2829 | version = pcre32_version(); | |
2830 | #endif | |
2831 | ||
2832 | /* Scan options */ | /* Scan options */ |
2833 | ||
2834 | while (argc > 1 && argv[op][0] == '-') | while (argc > 1 && argv[op][0] == '-') |
2835 | { | { |
2836 | unsigned char *endptr; | pcre_uint8 *endptr; |
2837 | char *arg = argv[op]; | |
2838 | ||
2839 | if (strcmp(arg, "-m") == 0) showstore = 1; | |
2840 | else if (strcmp(arg, "-s") == 0) force_study = 0; | |
2841 | ||
2842 | if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0) | else if (strncmp(arg, "-s+", 3) == 0) |
2843 | showstore = 1; | { |
2844 | else if (strcmp(argv[op], "-q") == 0) quiet = 1; | arg += 3; |
2845 | else if (strcmp(argv[op], "-b") == 0) debug = 1; | if (*arg == '+') { arg++; verify_jit = TRUE; } |
2846 | else if (strcmp(argv[op], "-i") == 0) showinfo = 1; | force_study = 1; |
2847 | else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1; | if (*arg == 0) |
2848 | force_study_options = jit_study_bits[6]; | |
2849 | else if (*arg >= '1' && *arg <= '7') | |
2850 | force_study_options = jit_study_bits[*arg - '1']; | |
2851 | else goto BAD_ARG; | |
2852 | } | |
2853 | else if (strcmp(arg, "-8") == 0) | |
2854 | { | |
2855 | #ifdef SUPPORT_PCRE8 | |
2856 | pcre_mode = PCRE8_MODE; | |
2857 | #else | |
2858 | printf("** This version of PCRE was built without 8-bit support\n"); | |
2859 | exit(1); | |
2860 | #endif | |
2861 | } | |
2862 | else if (strcmp(arg, "-16") == 0) | |
2863 | { | |
2864 | #ifdef SUPPORT_PCRE16 | |
2865 | pcre_mode = PCRE16_MODE; | |
2866 | #else | |
2867 | printf("** This version of PCRE was built without 16-bit support\n"); | |
2868 | exit(1); | |
2869 | #endif | |
2870 | } | |
2871 | else if (strcmp(arg, "-32") == 0 || strcmp(arg, "-32+") == 0) | |
2872 | { | |
2873 | #ifdef SUPPORT_PCRE32 | |
2874 | pcre_mode = PCRE32_MODE; | |
2875 | mask_utf32 = (strcmp(arg, "-32+") == 0); | |
2876 | #else | |
2877 | printf("** This version of PCRE was built without 32-bit support\n"); | |
2878 | exit(1); | |
2879 | #endif | |
2880 | } | |
2881 | else if (strcmp(arg, "-q") == 0) quiet = 1; | |
2882 | else if (strcmp(arg, "-b") == 0) debug = 1; | |
2883 | else if (strcmp(arg, "-i") == 0) showinfo = 1; | |
2884 | else if (strcmp(arg, "-d") == 0) showinfo = debug = 1; | |
2885 | else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE; | |
2886 | #if !defined NODFA | #if !defined NODFA |
2887 | else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1; | else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1; |
2888 | #endif | #endif |
2889 | else if (strcmp(argv[op], "-o") == 0 && argc > 2 && | else if (strcmp(arg, "-o") == 0 && argc > 2 && |
2890 | ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)), | ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)), |
2891 | *endptr == 0)) | *endptr == 0)) |
2892 | { | { |
2893 | op++; | op++; |
2894 | argc--; | argc--; |
2895 | } | } |
2896 | else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0) | else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0) |
2897 | { | { |
2898 | int both = argv[op][2] == 0; | int both = arg[2] == 0; |
2899 | int temp; | int temp; |
2900 | if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr), | if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr), |
2901 | *endptr == 0)) | *endptr == 0)) |
2902 | { | { |
2903 | timeitm = temp; | timeitm = temp; |
# | Line 819 while (argc > 1 && argv[op][0] == '-') | Line 2907 while (argc > 1 && argv[op][0] == '-') |
2907 | else timeitm = LOOPREPEAT; | else timeitm = LOOPREPEAT; |
2908 | if (both) timeit = timeitm; | if (both) timeit = timeitm; |
2909 | } | } |
2910 | else if (strcmp(argv[op], "-S") == 0 && argc > 2 && | else if (strcmp(arg, "-S") == 0 && argc > 2 && |
2911 | ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)), | ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)), |
2912 | *endptr == 0)) | *endptr == 0)) |
2913 | { | { |
2914 | #if defined(_WIN32) || defined(WIN32) | #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) |
2915 | printf("PCRE: -S not supported on this OS\n"); | printf("PCRE: -S not supported on this OS\n"); |
2916 | exit(1); | exit(1); |
2917 | #else | #else |
# | Line 842 while (argc > 1 && argv[op][0] == '-') | Line 2930 while (argc > 1 && argv[op][0] == '-') |
2930 | #endif | #endif |
2931 | } | } |
2932 | #if !defined NOPOSIX | #if !defined NOPOSIX |
2933 | else if (strcmp(argv[op], "-p") == 0) posix = 1; | else if (strcmp(arg, "-p") == 0) posix = 1; |
2934 | #endif | #endif |
2935 | else if (strcmp(argv[op], "-C") == 0) | else if (strcmp(arg, "-C") == 0) |
2936 | { | { |
2937 | int rc; | int rc; |
2938 | printf("PCRE version %s\n", pcre_version()); | unsigned long int lrc; |
2939 | ||
2940 | if (argc > 2) | |
2941 | { | |
2942 | if (strcmp(argv[op + 1], "linksize") == 0) | |
2943 | { | |
2944 | (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc); | |
2945 | printf("%d\n", rc); | |
2946 | yield = rc; | |
2947 | } | |
2948 | else if (strcmp(argv[op + 1], "pcre8") == 0) | |
2949 | { | |
2950 | #ifdef SUPPORT_PCRE8 | |
2951 | printf("1\n"); | |
2952 | yield = 1; | |
2953 | #else | |
2954 | printf("0\n"); | |
2955 | yield = 0; | |
2956 | #endif | |
2957 | } | |
2958 | else if (strcmp(argv[op + 1], "pcre16") == 0) | |
2959 | { | |
2960 | #ifdef SUPPORT_PCRE16 | |
2961 | printf("1\n"); | |
2962 | yield = 1; | |
2963 | #else | |
2964 | printf("0\n"); | |
2965 | yield = 0; | |
2966 | #endif | |
2967 | } | |
2968 | else if (strcmp(argv[op + 1], "pcre32") == 0) | |
2969 | { | |
2970 | #ifdef SUPPORT_PCRE32 | |
2971 | printf("1\n"); | |
2972 | yield = 1; | |
2973 | #else | |
2974 | printf("0\n"); | |
2975 | yield = 0; | |
2976 | #endif | |
2977 | goto EXIT; | |
2978 | } | |
2979 | if (strcmp(argv[op + 1], "utf") == 0) | |
2980 | { | |
2981 | #ifdef SUPPORT_PCRE8 | |
2982 | if (pcre_mode == PCRE8_MODE) | |
2983 | (void)pcre_config(PCRE_CONFIG_UTF8, &rc); | |
2984 | #endif | |
2985 | #ifdef SUPPORT_PCRE16 | |
2986 | if (pcre_mode == PCRE16_MODE) | |
2987 | (void)pcre16_config(PCRE_CONFIG_UTF16, &rc); | |
2988 | #endif | |
2989 | #ifdef SUPPORT_PCRE32 | |
2990 | if (pcre_mode == PCRE32_MODE) | |
2991 | (void)pcre32_config(PCRE_CONFIG_UTF32, &rc); | |
2992 | #endif | |
2993 | printf("%d\n", rc); | |
2994 | yield = rc; | |
2995 | goto EXIT; | |
2996 | } | |
2997 | else if (strcmp(argv[op + 1], "ucp") == 0) | |
2998 | { | |
2999 | (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc); | |
3000 | printf("%d\n", rc); | |
3001 | yield = rc; | |
3002 | } | |
3003 | else if (strcmp(argv[op + 1], "jit") == 0) | |
3004 | { | |
3005 | (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc); | |
3006 | printf("%d\n", rc); | |
3007 | yield = rc; | |
3008 | } | |
3009 | else if (strcmp(argv[op + 1], "newline") == 0) | |
3010 | { | |
3011 | (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc); | |
3012 | print_newline_config(rc, TRUE); | |
3013 | } | |
3014 | else if (strcmp(argv[op + 1], "ebcdic") == 0) | |
3015 | { | |
3016 | #ifdef EBCDIC | |
3017 | printf("1\n"); | |
3018 | yield = 1; | |
3019 | #else | |
3020 | printf("0\n"); | |
3021 | #endif | |
3022 | } | |
3023 | else if (strcmp(argv[op + 1], "ebcdic-nl") == 0) | |
3024 | { | |
3025 | #ifdef EBCDIC | |
3026 | printf("0x%02x\n", CHAR_LF); | |
3027 | #else | |
3028 | printf("0\n"); | |
3029 | #endif | |
3030 | } | |
3031 | else | |
3032 | { | |
3033 | printf("Unknown -C option: %s\n", argv[op + 1]); | |
3034 | } | |
3035 | goto EXIT; | |
3036 | } | |
3037 | ||
3038 | /* No argument for -C: output all configuration information. */ | |
3039 | ||
3040 | printf("PCRE version %s\n", version); | |
3041 | printf("Compiled with\n"); | printf("Compiled with\n"); |
3042 | ||
3043 | #ifdef EBCDIC | |
3044 | printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF); | |
3045 | #endif | |
3046 | ||
3047 | /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both | |
3048 | are set, either both UTFs are supported or both are not supported. */ | |
3049 | ||
3050 | #ifdef SUPPORT_PCRE8 | |
3051 | printf(" 8-bit support\n"); | |
3052 | (void)pcre_config(PCRE_CONFIG_UTF8, &rc); | (void)pcre_config(PCRE_CONFIG_UTF8, &rc); |
3053 | printf(" %sUTF-8 support\n", rc? "" : "No "); | printf (" %sUTF-8 support\n", rc ? "" : "No "); |
3054 | (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc); | #endif |
3055 | #ifdef SUPPORT_PCRE16 | |
3056 | printf(" 16-bit support\n"); | |
3057 | (void)pcre16_config(PCRE_CONFIG_UTF16, &rc); | |
3058 | printf (" %sUTF-16 support\n", rc ? "" : "No "); | |
3059 | #endif | |
3060 | #ifdef SUPPORT_PCRE32 | |
3061 | printf(" 32-bit support\n"); | |
3062 | (void)pcre32_config(PCRE_CONFIG_UTF32, &rc); | |
3063 | printf (" %sUTF-32 support\n", rc ? "" : "No "); | |
3064 | #endif | |
3065 | ||
3066 | (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc); | |
3067 | printf(" %sUnicode properties support\n", rc? "" : "No "); | printf(" %sUnicode properties support\n", rc? "" : "No "); |
3068 | (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc); | (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc); |
3069 | printf(" Newline sequence is %s\n", (rc == '\r')? "CR" : | if (rc) |
3070 | (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" : | { |
3071 | (rc == -2)? "ANYCRLF" : | const char *arch; |
3072 | (rc == -1)? "ANY" : "???"); | (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch)); |
3073 | (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc); | printf(" Just-in-time compiler support: %s\n", arch); |
3074 | } | |
3075 | else | |
3076 | printf(" No just-in-time compiler support\n"); | |
3077 | (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc); | |
3078 | print_newline_config(rc, FALSE); | |
3079 | (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc); | |
3080 | printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" : | |
3081 | "all Unicode newlines"); | |
3082 | (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc); | |
3083 | printf(" Internal link size = %d\n", rc); | printf(" Internal link size = %d\n", rc); |
3084 | (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc); | (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc); |
3085 | printf(" POSIX malloc threshold = %d\n", rc); | printf(" POSIX malloc threshold = %d\n", rc); |
3086 | (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc); | (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc); |
3087 | printf(" Default match limit = %d\n", rc); | printf(" Default match limit = %ld\n", lrc); |
3088 | (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc); | (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc); |
3089 | printf(" Default recursion depth limit = %d\n", rc); | printf(" Default recursion depth limit = %ld\n", lrc); |
3090 | (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc); | (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc); |
3091 | printf(" Match recursion uses %s\n", rc? "stack" : "heap"); | printf(" Match recursion uses %s", rc? "stack" : "heap"); |
3092 | if (showstore) | |
3093 | { | |
3094 | PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0); | |
3095 | printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size); | |
3096 | } | |
3097 | printf("\n"); | |
3098 | goto EXIT; | goto EXIT; |
3099 | } | } |
3100 | else if (strcmp(argv[op], "-help") == 0 || | else if (strcmp(arg, "-help") == 0 || |
3101 | strcmp(argv[op], "--help") == 0) | strcmp(arg, "--help") == 0) |
3102 | { | { |
3103 | usage(); | usage(); |
3104 | goto EXIT; | goto EXIT; |
3105 | } | } |
3106 | else | else |
3107 | { | { |
3108 | printf("** Unknown or malformed option %s\n", argv[op]); | BAD_ARG: |
3109 | printf("** Unknown or malformed option %s\n", arg); | |
3110 | usage(); | usage(); |
3111 | yield = 1; | yield = 1; |
3112 | goto EXIT; | goto EXIT; |
# | Line 925 if (argc > 2) | Line 3153 if (argc > 2) |
3153 | ||
3154 | /* Set alternative malloc function */ | /* Set alternative malloc function */ |
3155 | ||
3156 | #ifdef SUPPORT_PCRE8 | |
3157 | pcre_malloc = new_malloc; | pcre_malloc = new_malloc; |
3158 | pcre_free = new_free; | pcre_free = new_free; |
3159 | pcre_stack_malloc = stack_malloc; | pcre_stack_malloc = stack_malloc; |
3160 | pcre_stack_free = stack_free; | pcre_stack_free = stack_free; |
3161 | #endif | |
3162 | ||
3163 | #ifdef SUPPORT_PCRE16 | |
3164 | pcre16_malloc = new_malloc; | |
3165 | pcre16_free = new_free; | |
3166 | pcre16_stack_malloc = stack_malloc; | |
3167 | pcre16_stack_free = stack_free; | |
3168 | #endif | |
3169 | ||
3170 | #ifdef SUPPORT_PCRE32 | |
3171 | pcre32_malloc = new_malloc; | |
3172 | pcre32_free = new_free; | |
3173 | pcre32_stack_malloc = stack_malloc; | |
3174 | pcre32_stack_free = stack_free; | |
3175 | #endif | |
3176 | ||
3177 | /* Heading line unless quiet, then prompt for first regex if stdin */ | /* Heading line unless quiet, then prompt for first regex if stdin */ |
3178 | ||
3179 | if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version()); | if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version); |
3180 | ||
3181 | /* Main loop */ | /* Main loop */ |
3182 | ||
# | Line 947 while (!done) | Line 3191 while (!done) |
3191 | #endif | #endif |
3192 | ||
3193 | const char *error; | const char *error; |
3194 | unsigned char *p, *pp, *ppp; | pcre_uint8 *markptr; |
3195 | unsigned char *to_file = NULL; | pcre_uint8 *p, *pp, *ppp; |
3196 | const unsigned char *tables = NULL; | pcre_uint8 *to_file = NULL; |
3197 | const pcre_uint8 *tables = NULL; | |
3198 | unsigned long int get_options; | |
3199 | unsigned long int true_size, true_study_size = 0; | unsigned long int true_size, true_study_size = 0; |
3200 | size_t size, regex_gotten_store; | size_t size, regex_gotten_store; |
3201 | int do_allcaps = 0; | |
3202 | int do_mark = 0; | |
3203 | int do_study = 0; | int do_study = 0; |
3204 | int no_force_study = 0; | |
3205 | int do_debug = debug; | int do_debug = debug; |
int debug_lengths = 1; | ||
3206 | int do_G = 0; | int do_G = 0; |
3207 | int do_g = 0; | int do_g = 0; |
3208 | int do_showinfo = showinfo; | int do_showinfo = showinfo; |
3209 | int do_showrest = 0; | int do_showrest = 0; |
3210 | int do_showcaprest = 0; | |
3211 | int do_flip = 0; | int do_flip = 0; |
3212 | int erroroffset, len, delimiter, poffset; | int erroroffset, len, delimiter, poffset; |
3213 | ||
3214 | use_utf8 = 0; | #if !defined NODFA |
3215 | int dfa_matched = 0; | |
3216 | #endif | |
3217 | ||
3218 | if (infile == stdin) printf(" re> "); | use_utf = 0; |
3219 | if (extend_inputline(infile, buffer) == NULL) break; | debug_lengths = 1; |
3220 | ||
3221 | if (extend_inputline(infile, buffer, " re> ") == NULL) break; | |
3222 | if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); | if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); |
3223 | fflush(outfile); | fflush(outfile); |
3224 | ||
# | Line 977 while (!done) | Line 3230 while (!done) |
3230 | ||
3231 | if (*p == '<' && strchr((char *)(p+1), '<') == NULL) | if (*p == '<' && strchr((char *)(p+1), '<') == NULL) |
3232 | { | { |
3233 | unsigned long int magic, get_options; | pcre_uint32 magic; |
3234 | uschar sbuf[8]; | pcre_uint8 sbuf[8]; |
3235 | FILE *f; | FILE *f; |
3236 | ||
3237 | p++; | p++; |
3238 | if (*p == '!') | |
3239 | { | |
3240 | do_debug = TRUE; | |
3241 | do_showinfo = TRUE; | |
3242 | p++; | |
3243 | } | |
3244 | ||
3245 | pp = p + (int)strlen((char *)p); | pp = p + (int)strlen((char *)p); |
3246 | while (isspace(pp[-1])) pp--; | while (isspace(pp[-1])) pp--; |
3247 | *pp = 0; | *pp = 0; |
# | Line 993 while (!done) | Line 3253 while (!done) |
3253 | continue; | continue; |
3254 | } | } |
3255 | ||
3256 | first_gotten_store = 0; | |
3257 | if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ; | if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ; |
3258 | ||
3259 | true_size = | true_size = |
# | Line 1000 while (!done) | Line 3261 while (!done) |
3261 | true_study_size = | true_study_size = |
3262 | (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7]; | (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7]; |
3263 | ||
3264 | re = (real_pcre *)new_malloc(true_size); | re = (pcre *)new_malloc(true_size); |
3265 | regex_gotten_store = gotten_store; | if (re == NULL) |
3266 | { | |
3267 | printf("** Failed to get %d bytes of memory for pcre object\n", | |
3268 | (int)true_size); | |
3269 | yield = 1; | |
3270 | goto EXIT; | |
3271 | } | |
3272 | regex_gotten_store = first_gotten_store; | |
3273 | ||
3274 | if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ; | if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ; |
3275 | ||
3276 | magic = ((real_pcre *)re)->magic_number; | magic = REAL_PCRE_MAGIC(re); |
3277 | if (magic != MAGIC_NUMBER) | if (magic != MAGIC_NUMBER) |
3278 | { | { |
3279 | if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER) | if (swap_uint32(magic) == MAGIC_NUMBER) |
3280 | { | { |
3281 | do_flip = 1; | do_flip = 1; |
3282 | } | } |
3283 | else | else |
3284 | { | { |
3285 | fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p); | fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p); |
3286 | new_free(re); | |
3287 | fclose(f); | fclose(f); |
3288 | continue; | continue; |
3289 | } | } |
3290 | } | } |
3291 | ||
3292 | fprintf(outfile, "Compiled regex%s loaded from %s\n", | /* We hide the byte-invert info for little and big endian tests. */ |
3293 | do_flip? " (byte-inverted)" : "", p); | fprintf(outfile, "Compiled pattern%s loaded from %s\n", |
3294 | do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p); | |
/* Need to know if UTF-8 for printing data strings */ | ||
3295 | ||
3296 | new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options); | /* Now see if there is any following study data. */ |
use_utf8 = (get_options & PCRE_UTF8) != 0; | ||
/* Now see if there is any following study data */ | ||
3297 | ||
3298 | if (true_study_size != 0) | if (true_study_size != 0) |
3299 | { | { |
# | Line 1044 while (!done) | Line 3309 while (!done) |
3309 | { | { |
3310 | FAIL_READ: | FAIL_READ: |
3311 | fprintf(outfile, "Failed to read data from %s\n", p); | fprintf(outfile, "Failed to read data from %s\n", p); |
3312 | if (extra != NULL) new_free(extra); | if (extra != NULL) |
3313 | if (re != NULL) new_free(re); | { |
3314 | PCRE_FREE_STUDY(extra); | |
3315 | } | |
3316 | new_free(re); | |
3317 | fclose(f); | fclose(f); |
3318 | continue; | continue; |
3319 | } | } |
# | Line 1054 while (!done) | Line 3322 while (!done) |
3322 | } | } |
3323 | else fprintf(outfile, "No study data\n"); | else fprintf(outfile, "No study data\n"); |
3324 | ||
3325 | /* Flip the necessary bytes. */ | |
3326 | if (do_flip) | |
3327 | { | |
3328 | int rc; | |
3329 | PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL); | |
3330 | if (rc == PCRE_ERROR_BADMODE) | |
3331 | { | |
3332 | /* Simulate the result of the function call below. */ | |
3333 | fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc, | |
3334 | pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", | |
3335 | PCRE_INFO_OPTIONS); | |
3336 | fprintf(outfile, "Running in %d-bit mode but pattern was compiled in " | |
3337 | "%d-bit mode\n", 8 * CHAR_SIZE, | |
3338 | 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK)); | |
3339 | new_free(re); | |
3340 | fclose(f); | |
3341 | continue; | |
3342 | } | |
3343 | } | |
3344 | ||
3345 | /* Need to know if UTF-8 for printing data strings. */ | |
3346 | ||
3347 | if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) | |
3348 | { | |
3349 | new_free(re); | |
3350 | fclose(f); | |
3351 | continue; | |
3352 | } | |
3353 | use_utf = (get_options & PCRE_UTF8) != 0; | |
3354 | ||
3355 | fclose(f); | fclose(f); |
3356 | goto SHOW_INFO; | goto SHOW_INFO; |
3357 | } | } |
3358 | ||
3359 | /* In-line pattern (the usual case). Get the delimiter and seek the end of | /* In-line pattern (the usual case). Get the delimiter and seek the end of |
3360 | the pattern; if is isn't complete, read more. */ | the pattern; if it isn't complete, read more. */ |
3361 | ||
3362 | delimiter = *p++; | delimiter = *p++; |
3363 | ||
3364 | if (isalnum(delimiter) || delimiter == '\\') | if (isalnum(delimiter) || delimiter == '\\') |
3365 | { | { |
3366 | fprintf(outfile, "** Delimiter must not be alphameric or \\\n"); | fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n"); |
3367 | goto SKIP_DATA; | goto SKIP_DATA; |
3368 | } | } |
3369 | ||
3370 | pp = p; | pp = p; |
3371 | poffset = p - buffer; | poffset = (int)(p - buffer); |
3372 | ||
3373 | for(;;) | for(;;) |
3374 | { | { |
# | Line 1081 while (!done) | Line 3379 while (!done) |
3379 | pp++; | pp++; |
3380 | } | } |
3381 | if (*pp != 0) break; | if (*pp != 0) break; |
3382 | if (infile == stdin) printf(" > "); | if ((pp = extend_inputline(infile, pp, " > ")) == NULL) |
if ((pp = extend_inputline(infile, pp)) == NULL) | ||
3383 | { | { |
3384 | fprintf(outfile, "** Unexpected EOF\n"); | fprintf(outfile, "** Unexpected EOF\n"); |
3385 | done = 1; | done = 1; |
# | Line 1111 while (!done) | Line 3408 while (!done) |
3408 | /* Look for options after final delimiter */ | /* Look for options after final delimiter */ |
3409 | ||
3410 | options = 0; | options = 0; |
3411 | study_options = 0; | study_options = force_study_options; |
3412 | log_store = showstore; /* default from command line */ | log_store = showstore; /* default from command line */ |
3413 | ||
3414 | while (*pp != 0) | while (*pp != 0) |
# | Line 1125 while (!done) | Line 3422 while (!done) |
3422 | case 's': options |= PCRE_DOTALL; break; | case 's': options |= PCRE_DOTALL; break; |
3423 | case 'x': options |= PCRE_EXTENDED; break; | case 'x': options |= PCRE_EXTENDED; break; |
3424 | ||
3425 | case '+': do_showrest = 1; break; | case '+': |
3426 | if (do_showrest) do_showcaprest = 1; else do_showrest = 1; | |
3427 | break; | |
3428 | ||
3429 | case '=': do_allcaps = 1; break; | |
3430 | case 'A': options |= PCRE_ANCHORED; break; | case 'A': options |= PCRE_ANCHORED; break; |
3431 | case 'B': do_debug = 1; break; | case 'B': do_debug = 1; break; |
3432 | case 'C': options |= PCRE_AUTO_CALLOUT; break; | case 'C': options |= PCRE_AUTO_CALLOUT; break; |
# | Line 1135 while (!done) | Line 3436 while (!done) |
3436 | case 'G': do_G = 1; break; | case 'G': do_G = 1; break; |
3437 | case 'I': do_showinfo = 1; break; | case 'I': do_showinfo = 1; break; |
3438 | case 'J': options |= PCRE_DUPNAMES; break; | case 'J': options |= PCRE_DUPNAMES; break; |
3439 | case 'K': do_mark = 1; break; | |
3440 | case 'M': log_store = 1; break; | case 'M': log_store = 1; break; |
3441 | case 'N': options |= PCRE_NO_AUTO_CAPTURE; break; | case 'N': options |= PCRE_NO_AUTO_CAPTURE; break; |
3442 | ||
# | Line 1142 while (!done) | Line 3444 while (!done) |
3444 | case 'P': do_posix = 1; break; | case 'P': do_posix = 1; break; |
3445 | #endif | #endif |
3446 | ||
3447 | case 'S': do_study = 1; break; | case 'S': |
3448 | do_study = 1; | |
3449 | for (;;) | |
3450 | { | |
3451 | switch (*pp++) | |
3452 | { | |
3453 | case 'S': | |
3454 | do_study = 0; | |
3455 | no_force_study = 1; | |
3456 | break; | |
3457 | ||
3458 | case '!': | |
3459 | study_options |= PCRE_STUDY_EXTRA_NEEDED; | |
3460 | break; | |
3461 | ||
3462 | case '+': | |
3463 | if (*pp == '+') | |
3464 | { | |
3465 | verify_jit = TRUE; | |
3466 | pp++; | |
3467 | } | |
3468 | if (*pp >= '1' && *pp <= '7') | |
3469 | study_options |= jit_study_bits[*pp++ - '1']; | |
3470 | else | |
3471 | study_options |= jit_study_bits[6]; | |
3472 | break; | |
3473 | ||
3474 | case '-': | |
3475 | study_options &= ~PCRE_STUDY_ALLJIT; | |
3476 | break; | |
3477 | ||
3478 | default: | |
3479 | pp--; | |
3480 | goto ENDLOOP; | |
3481 | } | |
3482 | } | |
3483 | ENDLOOP: | |
3484 | break; | |
3485 | ||
3486 | case 'U': options |= PCRE_UNGREEDY; break; | case 'U': options |= PCRE_UNGREEDY; break; |
3487 | case 'W': options |= PCRE_UCP; break; | |
3488 | case 'X': options |= PCRE_EXTRA; break; | case 'X': options |= PCRE_EXTRA; break; |
3489 | case 'Y': options |= PCRE_NO_START_OPTIMISE; break; | |
3490 | case 'Z': debug_lengths = 0; break; | case 'Z': debug_lengths = 0; break; |
3491 | case '8': options |= PCRE_UTF8; use_utf8 = 1; break; | case '8': options |= PCRE_UTF8; use_utf = 1; break; |
3492 | case '?': options |= PCRE_NO_UTF8_CHECK; break; | case '?': options |= PCRE_NO_UTF8_CHECK; break; |
3493 | ||
3494 | case 'T': | |
3495 | switch (*pp++) | |
3496 | { | |
3497 | case '0': tables = tables0; break; | |
3498 | case '1': tables = tables1; break; | |
3499 | ||
3500 | case '\r': | |
3501 | case '\n': | |
3502 | case ' ': | |
3503 | case 0: | |
3504 | fprintf(outfile, "** Missing table number after /T\n"); | |
3505 | goto SKIP_DATA; | |
3506 | ||
3507 | default: | |
3508 | fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]); | |
3509 | goto SKIP_DATA; | |
3510 | } | |
3511 | break; | |
3512 | ||
3513 | case 'L': | case 'L': |
3514 | ppp = pp; | ppp = pp; |
3515 | /* The '\r' test here is so that it works on Windows. */ | /* The '\r' test here is so that it works on Windows. */ |
# | Line 1161 while (!done) | Line 3522 while (!done) |
3522 | goto SKIP_DATA; | goto SKIP_DATA; |
3523 | } | } |
3524 | locale_set = 1; | locale_set = 1; |
3525 | tables = pcre_maketables(); | tables = PCRE_MAKETABLES; |
3526 | pp = ppp; | pp = ppp; |
3527 | break; | break; |
3528 | ||
# | Line 1174 while (!done) | Line 3535 while (!done) |
3535 | ||
3536 | case '<': | case '<': |
3537 | { | { |
3538 | int x = check_newline(pp, outfile); | if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0) |
3539 | if (x == 0) goto SKIP_DATA; | { |
3540 | options |= x; | options |= PCRE_JAVASCRIPT_COMPAT; |
3541 | while (*pp++ != '>'); | pp += 3; |
3542 | } | |
3543 | else | |
3544 | { | |
3545 | int x = check_newline(pp, outfile); | |
3546 | if (x == 0) goto SKIP_DATA; | |
3547 | options |= x; | |
3548 | while (*pp++ != '>'); | |
3549 | } | |
3550 | } | } |
3551 | break; | break; |
3552 | ||
# | Line 1194 while (!done) | Line 3563 while (!done) |
3563 | ||
3564 | /* Handle compiling via the POSIX interface, which doesn't support the | /* Handle compiling via the POSIX interface, which doesn't support the |
3565 | timing, showing, or debugging options, nor the ability to pass over | timing, showing, or debugging options, nor the ability to pass over |
3566 | local character tables. */ | local character tables. Neither does it have 16-bit support. */ |
3567 | ||
3568 | #if !defined NOPOSIX | #if !defined NOPOSIX |
3569 | if (posix || do_posix) | if (posix || do_posix) |
# | Line 1207 while (!done) | Line 3576 while (!done) |
3576 | if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL; | if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL; |
3577 | if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB; | if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB; |
3578 | if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8; | if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8; |
3579 | if ((options & PCRE_UCP) != 0) cflags |= REG_UCP; | |
3580 | if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY; | |
3581 | ||
3582 | first_gotten_store = 0; | |
3583 | rc = regcomp(&preg, (char *)p, cflags); | rc = regcomp(&preg, (char *)p, cflags); |
3584 | ||
3585 | /* Compilation failed; go back for another re, skipping to blank line | /* Compilation failed; go back for another re, skipping to blank line |
# | Line 1227 while (!done) | Line 3599 while (!done) |
3599 | #endif /* !defined NOPOSIX */ | #endif /* !defined NOPOSIX */ |
3600 | ||
3601 | { | { |
3602 | /* In 16- or 32-bit mode, convert the input. */ | |
3603 | ||
3604 | #ifdef SUPPORT_PCRE16 | |
3605 | if (pcre_mode == PCRE16_MODE) | |
3606 | { | |
3607 | switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p))) | |
3608 | { | |
3609 | case -1: | |
3610 | fprintf(outfile, "**Failed: invalid UTF-8 string cannot be " | |
3611 | "converted to UTF-16\n"); | |
3612 | goto SKIP_DATA; | |
3613 | ||
3614 | case -2: | |
3615 | fprintf(outfile, "**Failed: character value greater than 0x10ffff " | |
3616 | "cannot be converted to UTF-16\n"); | |
3617 | goto SKIP_DATA; | |
3618 | ||
3619 | case -3: /* "Impossible error" when to16 is called arg1 FALSE */ | |
3620 | fprintf(outfile, "**Failed: character value greater than 0xffff " | |
3621 | "cannot be converted to 16-bit in non-UTF mode\n"); | |
3622 | goto SKIP_DATA; | |
3623 | ||
3624 | default: | |
3625 | break; | |
3626 | } | |
3627 | p = (pcre_uint8 *)buffer16; | |
3628 | } | |
3629 | #endif | |
3630 | ||
3631 | #ifdef SUPPORT_PCRE32 | |
3632 | if (pcre_mode == PCRE32_MODE) | |
3633 | { | |
3634 | switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p))) | |
3635 | { | |
3636 | case -1: | |
3637 | fprintf(outfile, "**Failed: invalid UTF-8 string cannot be " | |
3638 | "converted to UTF-32\n"); | |
3639 | goto SKIP_DATA; | |
3640 | ||
3641 | case -2: | |
3642 | fprintf(outfile, "**Failed: character value greater than 0x10ffff " | |
3643 | "cannot be converted to UTF-32\n"); | |
3644 | goto SKIP_DATA; | |
3645 | ||
3646 | case -3: | |
3647 | fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n"); | |
3648 | goto SKIP_DATA; | |
3649 | ||
3650 | default: | |
3651 | break; | |
3652 | } | |
3653 | p = (pcre_uint8 *)buffer32; | |
3654 | } | |
3655 | #endif | |
3656 | ||
3657 | /* Compile many times when timing */ | |
3658 | ||
3659 | if (timeit > 0) | if (timeit > 0) |
3660 | { | { |
3661 | register int i; | register int i; |
# | Line 1234 while (!done) | Line 3663 while (!done) |
3663 | clock_t start_time = clock(); | clock_t start_time = clock(); |
3664 | for (i = 0; i < timeit; i++) | for (i = 0; i < timeit; i++) |
3665 | { | { |
3666 | re = pcre_compile((char *)p, options, &error, &erroroffset, tables); | PCRE_COMPILE(re, p, options, &error, &erroroffset, tables); |
3667 | if (re != NULL) free(re); | if (re != NULL) free(re); |
3668 | } | } |
3669 | time_taken = clock() - start_time; | time_taken = clock() - start_time; |
# | Line 1243 while (!done) | Line 3672 while (!done) |
3672 | (double)CLOCKS_PER_SEC); | (double)CLOCKS_PER_SEC); |
3673 | } | } |
3674 | ||
3675 | re = pcre_compile((char *)p, options, &error, &erroroffset, tables); | first_gotten_store = 0; |
3676 | PCRE_COMPILE(re, p, options, &error, &erroroffset, tables); | |
3677 | ||
3678 | /* Compilation failed; go back for another re, skipping to blank line | /* Compilation failed; go back for another re, skipping to blank line |
3679 | if non-interactive. */ | if non-interactive. */ |
# | Line 1256 while (!done) | Line 3686 while (!done) |
3686 | { | { |
3687 | for (;;) | for (;;) |
3688 | { | { |
3689 | if (extend_inputline(infile, buffer) == NULL) | if (extend_inputline(infile, buffer, NULL) == NULL) |
3690 | { | { |
3691 | done = 1; | done = 1; |
3692 | goto CONTINUE; | goto CONTINUE; |
# | Line 1270 while (!done) | Line 3700 while (!done) |
3700 | goto CONTINUE; | goto CONTINUE; |
3701 | } | } |
3702 | ||
3703 | /* Compilation succeeded; print data if required. There are now two | /* Compilation succeeded. It is now possible to set the UTF-8 option from |
3704 | info-returning functions. The old one has a limited interface and | within the regex; check for this so that we know how to process the data |
3705 | returns only limited data. Check that it agrees with the newer one. */ | lines. */ |
3706 | ||
3707 | if (log_store) | if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) |
3708 | fprintf(outfile, "Memory allocation (code space): %d\n", | goto SKIP_DATA; |
3709 | (int)(gotten_store - | if ((get_options & PCRE_UTF8) != 0) use_utf = 1; |
sizeof(real_pcre) - | ||
((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size)); | ||
3710 | ||
3711 | /* Extract the size for possible writing before possibly flipping it, | /* Extract the size for possible writing before possibly flipping it, |
3712 | and remember the store that was got. */ | and remember the store that was got. */ |
3713 | ||
3714 | true_size = ((real_pcre *)re)->size; | true_size = REAL_PCRE_SIZE(re); |
3715 | regex_gotten_store = gotten_store; | regex_gotten_store = first_gotten_store; |
3716 | ||
3717 | /* Output code size information if requested */ | |
3718 | ||
3719 | if (log_store) | |
3720 | { | |
3721 | int name_count, name_entry_size, real_pcre_size; | |
3722 | ||
3723 | new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count); | |
3724 | new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size); | |
3725 | #ifdef SUPPORT_PCRE8 | |
3726 | if (REAL_PCRE_FLAGS(re) & PCRE_MODE8) | |
3727 | real_pcre_size = sizeof(real_pcre); | |
3728 | #endif | |
3729 | #ifdef SUPPORT_PCRE16 | |
3730 | if (REAL_PCRE_FLAGS(re) & PCRE_MODE16) | |
3731 | real_pcre_size = sizeof(real_pcre16); | |
3732 | #endif | |
3733 | #ifdef SUPPORT_PCRE32 | |
3734 | if (REAL_PCRE_FLAGS(re) & PCRE_MODE32) | |
3735 | real_pcre_size = sizeof(real_pcre32); | |
3736 | #endif | |
3737 | fprintf(outfile, "Memory allocation (code space): %d\n", | |
3738 | (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size)); | |
3739 | } | |
3740 | ||
3741 | /* If /S was present, study the regexp to generate additional info to | /* If -s or /S was present, study the regex to generate additional info to |
3742 | help with the matching. */ | help with the matching, unless the pattern has the SS option, which |
3743 | suppresses the effect of /S (used for a few test patterns where studying is | |
3744 | never sensible). */ | |
3745 | ||
3746 | if (do_study) | if (do_study || (force_study >= 0 && !no_force_study)) |
3747 | { | { |
3748 | if (timeit > 0) | if (timeit > 0) |
3749 | { | { |
# | Line 1297 while (!done) | Line 3751 while (!done) |
3751 | clock_t time_taken; | clock_t time_taken; |
3752 | clock_t start_time = clock(); | clock_t start_time = clock(); |
3753 | for (i = 0; i < timeit; i++) | for (i = 0; i < timeit; i++) |
3754 | extra = pcre_study(re, study_options, &error); | { |
3755 | PCRE_STUDY(extra, re, study_options, &error); | |
3756 | } | |
3757 | time_taken = clock() - start_time; | time_taken = clock() - start_time; |
3758 | if (extra != NULL) free(extra); | if (extra != NULL) |
3759 | { | |
3760 | PCRE_FREE_STUDY(extra); | |
3761 | } | |
3762 | fprintf(outfile, " Study time %.4f milliseconds\n", | fprintf(outfile, " Study time %.4f milliseconds\n", |
3763 | (((double)time_taken * 1000.0) / (double)timeit) / | (((double)time_taken * 1000.0) / (double)timeit) / |
3764 | (double)CLOCKS_PER_SEC); | (double)CLOCKS_PER_SEC); |
3765 | } | } |
3766 | extra = pcre_study(re, study_options, &error); | PCRE_STUDY(extra, re, study_options, &error); |
3767 | if (error != NULL) | if (error != NULL) |
3768 | fprintf(outfile, "Failed to study: %s\n", error); | fprintf(outfile, "Failed to study: %s\n", error); |
3769 | else if (extra != NULL) | else if (extra != NULL) |
3770 | { | |
3771 | true_study_size = ((pcre_study_data *)(extra->study_data))->size; | true_study_size = ((pcre_study_data *)(extra->study_data))->size; |
3772 | if (log_store) | |
3773 | { | |
3774 | size_t jitsize; | |
3775 | if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 && | |
3776 | jitsize != 0) | |
3777 | fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize); | |
3778 | } | |
3779 | } | |
3780 | } | } |
3781 | ||
3782 | /* If the 'F' option was present, we flip the bytes of all the integer | /* If /K was present, we set up for handling MARK data. */ |
fields in the regex data block and the study block. This is to make it | ||
possible to test PCRE's handling of byte-flipped patterns, e.g. those | ||
compiled on a different architecture. */ | ||
3783 | ||
3784 | if (do_flip) | if (do_mark) |
3785 | { | { |
3786 | real_pcre *rre = (real_pcre *)re; | if (extra == NULL) |
rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number)); | ||
rre->size = byteflip(rre->size, sizeof(rre->size)); | ||
rre->options = byteflip(rre->options, sizeof(rre->options)); | ||
rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket)); | ||
rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref)); | ||
rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte)); | ||
rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte)); | ||
rre->name_table_offset = byteflip(rre->name_table_offset, | ||
sizeof(rre->name_table_offset)); | ||
rre->name_entry_size = byteflip(rre->name_entry_size, | ||
sizeof(rre->name_entry_size)); | ||
rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count)); | ||
if (extra != NULL) | ||
3787 | { | { |
3788 | pcre_study_data *rsd = (pcre_study_data *)(extra->study_data); | extra = (pcre_extra *)malloc(sizeof(pcre_extra)); |
3789 | rsd->size = byteflip(rsd->size, sizeof(rsd->size)); | extra->flags = 0; |
rsd->options = byteflip(rsd->options, sizeof(rsd->options)); | ||
3790 | } | } |
3791 | extra->mark = &markptr; | |
3792 | extra->flags |= PCRE_EXTRA_MARK; | |
3793 | } | } |
3794 | ||
3795 | /* Extract information from the compiled data if required */ | /* Extract and display information from the compiled data if required. */ |
3796 | ||
3797 | SHOW_INFO: | SHOW_INFO: |
3798 |