/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1122 - (show annotations)
Wed Oct 17 17:31:19 2012 UTC (6 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 173136 byte(s)
Error occurred while calculating annotation data.
Fix pcretest compiler warnings and make it compile with only 2 out of 3 bit 
sizes.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136
137 #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138 /* Configure internal macros to 32 bit mode. */
139 #define COMPILE_PCRE32
140 #endif
141 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142 /* Configure internal macros to 16 bit mode. */
143 #define COMPILE_PCRE16
144 #endif
145 #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146 /* Configure internal macros to 16 bit mode. */
147 #define COMPILE_PCRE8
148 #endif
149
150 #include "pcre_internal.h"
151
152 /* The pcre_printint() function, which prints the internal form of a compiled
153 regex, is held in a separate file so that (a) it can be compiled in either
154 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
155 when that is compiled in debug mode. */
156
157 #ifdef SUPPORT_PCRE8
158 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159 #endif
160 #ifdef SUPPORT_PCRE16
161 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162 #endif
163 #ifdef SUPPORT_PCRE32
164 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165 #endif
166
167 /* We need access to some of the data tables that PCRE uses. So as not to have
168 to keep two copies, we include the source files here, changing the names of the
169 external symbols to prevent clashes. */
170
171 #define PCRE_INCLUDED
172
173 #include "pcre_tables.c"
174 #include "pcre_ucd.c"
175
176 /* The definition of the macro PRINTABLE, which determines whether to print an
177 output character as-is or as a hex value when showing compiled patterns, is
178 the same as in the printint.src file. We uses it here in cases when the locale
179 has not been explicitly changed, so as to get consistent output from systems
180 that differ in their output from isprint() even in the "C" locale. */
181
182 #ifdef EBCDIC
183 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
184 #else
185 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
186 #endif
187
188 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189
190 /* Posix support is disabled in 16 or 32 bit only mode. */
191 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192 #define NOPOSIX
193 #endif
194
195 /* It is possible to compile this test program without including support for
196 testing the POSIX interface, though this is not available via the standard
197 Makefile. */
198
199 #if !defined NOPOSIX
200 #include "pcreposix.h"
201 #endif
202
203 /* It is also possible, originally for the benefit of a version that was
204 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
205 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
206 automatically cut out the UTF support if PCRE is built without it. */
207
208 #ifndef SUPPORT_UTF
209 #ifndef NOUTF
210 #define NOUTF
211 #endif
212 #endif
213
214 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
215 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216 only from one place and is handled differently). I couldn't dream up any way of
217 using a single macro to do this in a generic way, because of the many different
218 argument requirements. We know that at least one of SUPPORT_PCRE8 and
219 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
220 use these in the definitions of generic macros.
221
222 **** Special note about the PCHARSxxx macros: the address of the string to be
223 printed is always given as two arguments: a base address followed by an offset.
224 The base address is cast to the correct data size for 8 or 16 bit data; the
225 offset is in units of this size. If the string were given as base+offset in one
226 argument, the casting might be incorrectly applied. */
227
228 #ifdef SUPPORT_PCRE8
229
230 #define PCHARS8(lv, p, offset, len, f) \
231 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
232
233 #define PCHARSV8(p, offset, len, f) \
234 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235
236 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237 p = read_capture_name8(p, cn8, re)
238
239 #define STRLEN8(p) ((int)strlen((char *)p))
240
241 #define SET_PCRE_CALLOUT8(callout) \
242 pcre_callout = callout
243
244 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
245 pcre_assign_jit_stack(extra, callback, userdata)
246
247 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
248 re = pcre_compile((char *)pat, options, error, erroffset, tables)
249
250 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251 namesptr, cbuffer, size) \
252 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
253 (char *)namesptr, cbuffer, size)
254
255 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
256 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
257
258 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259 offsets, size_offsets, workspace, size_workspace) \
260 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
261 offsets, size_offsets, workspace, size_workspace)
262
263 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
264 offsets, size_offsets) \
265 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
266 offsets, size_offsets)
267
268 #define PCRE_FREE_STUDY8(extra) \
269 pcre_free_study(extra)
270
271 #define PCRE_FREE_SUBSTRING8(substring) \
272 pcre_free_substring(substring)
273
274 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
275 pcre_free_substring_list(listptr)
276
277 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
278 getnamesptr, subsptr) \
279 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
280 (char *)getnamesptr, subsptr)
281
282 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
283 n = pcre_get_stringnumber(re, (char *)ptr)
284
285 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
286 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
287
288 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
289 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
290
291 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
292 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
293
294 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
295 pcre_printint(re, outfile, debug_lengths)
296
297 #define PCRE_STUDY8(extra, re, options, error) \
298 extra = pcre_study(re, options, error)
299
300 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
301 pcre_jit_stack_alloc(startsize, maxsize)
302
303 #define PCRE_JIT_STACK_FREE8(stack) \
304 pcre_jit_stack_free(stack)
305
306 #endif /* SUPPORT_PCRE8 */
307
308 /* -----------------------------------------------------------*/
309
310 #ifdef SUPPORT_PCRE16
311
312 #define PCHARS16(lv, p, offset, len, f) \
313 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
314
315 #define PCHARSV16(p, offset, len, f) \
316 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
317
318 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319 p = read_capture_name16(p, cn16, re)
320
321 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
322
323 #define SET_PCRE_CALLOUT16(callout) \
324 pcre16_callout = (int (*)(pcre16_callout_block *))callout
325
326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327 pcre16_assign_jit_stack((pcre16_extra *)extra, \
328 (pcre16_jit_callback)callback, userdata)
329
330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332 tables)
333
334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335 namesptr, cbuffer, size) \
336 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338
339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341 (PCRE_UCHAR16 *)cbuffer, size/2)
342
343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344 offsets, size_offsets, workspace, size_workspace) \
345 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347 workspace, size_workspace)
348
349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350 offsets, size_offsets) \
351 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352 len, start_offset, options, offsets, size_offsets)
353
354 #define PCRE_FREE_STUDY16(extra) \
355 pcre16_free_study((pcre16_extra *)extra)
356
357 #define PCRE_FREE_SUBSTRING16(substring) \
358 pcre16_free_substring((PCRE_SPTR16)substring)
359
360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362
363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364 getnamesptr, subsptr) \
365 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367
368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370
371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373 (PCRE_SPTR16 *)(void*)subsptr)
374
375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377 (PCRE_SPTR16 **)(void*)listptr)
378
379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381 tables)
382
383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384 pcre16_printint(re, outfile, debug_lengths)
385
386 #define PCRE_STUDY16(extra, re, options, error) \
387 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388
389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391
392 #define PCRE_JIT_STACK_FREE16(stack) \
393 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394
395 #endif /* SUPPORT_PCRE16 */
396
397 /* -----------------------------------------------------------*/
398
399 #ifdef SUPPORT_PCRE32
400
401 #define PCHARS32(lv, p, offset, len, f) \
402 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
403
404 #define PCHARSV32(p, offset, len, f) \
405 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
406
407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408 p = read_capture_name32(p, cn32, re)
409
410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411
412 #define SET_PCRE_CALLOUT32(callout) \
413 pcre32_callout = (int (*)(pcre32_callout_block *))callout
414
415 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416 pcre32_assign_jit_stack((pcre32_extra *)extra, \
417 (pcre32_jit_callback)callback, userdata)
418
419 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421 tables)
422
423 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424 namesptr, cbuffer, size) \
425 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427
428 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430 (PCRE_UCHAR32 *)cbuffer, size/2)
431
432 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433 offsets, size_offsets, workspace, size_workspace) \
434 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436 workspace, size_workspace)
437
438 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439 offsets, size_offsets) \
440 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441 len, start_offset, options, offsets, size_offsets)
442
443 #define PCRE_FREE_STUDY32(extra) \
444 pcre32_free_study((pcre32_extra *)extra)
445
446 #define PCRE_FREE_SUBSTRING32(substring) \
447 pcre32_free_substring((PCRE_SPTR32)substring)
448
449 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
451
452 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453 getnamesptr, subsptr) \
454 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456
457 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
459
460 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462 (PCRE_SPTR32 *)(void*)subsptr)
463
464 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466 (PCRE_SPTR32 **)(void*)listptr)
467
468 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470 tables)
471
472 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473 pcre32_printint(re, outfile, debug_lengths)
474
475 #define PCRE_STUDY32(extra, re, options, error) \
476 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477
478 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480
481 #define PCRE_JIT_STACK_FREE32(stack) \
482 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483
484 #endif /* SUPPORT_PCRE32 */
485
486
487 /* ----- More than one mode is supported; a runtime test is needed, except for
488 pcre_config(), and the JIT stack functions, when it doesn't matter which
489 version is called. ----- */
490
491 enum {
492 PCRE8_MODE,
493 PCRE16_MODE,
494 PCRE32_MODE
495 };
496
497 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
498 defined (SUPPORT_PCRE32)) >= 2
499
500 #define CHAR_SIZE (1 << pcre_mode)
501
502 /* There doesn't seem to be an easy way of writing these macros that can cope
503 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
504 cases separately. */
505
506 /* ----- All three modes supported ----- */
507
508 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
509
510 #define PCHARS(lv, p, offset, len, f) \
511 if (pcre_mode == PCRE32_MODE) \
512 PCHARS32(lv, p, offset, len, f); \
513 else if (pcre_mode == PCRE16_MODE) \
514 PCHARS16(lv, p, offset, len, f); \
515 else \
516 PCHARS8(lv, p, offset, len, f)
517
518 #define PCHARSV(p, offset, len, f) \
519 if (pcre_mode == PCRE32_MODE) \
520 PCHARSV32(p, offset, len, f); \
521 else if (pcre_mode == PCRE16_MODE) \
522 PCHARSV16(p, offset, len, f); \
523 else \
524 PCHARSV8(p, offset, len, f)
525
526 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
527 if (pcre_mode == PCRE32_MODE) \
528 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
529 else if (pcre_mode == PCRE16_MODE) \
530 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
531 else \
532 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
533
534 #define SET_PCRE_CALLOUT(callout) \
535 if (pcre_mode == PCRE32_MODE) \
536 SET_PCRE_CALLOUT32(callout); \
537 else if (pcre_mode == PCRE16_MODE) \
538 SET_PCRE_CALLOUT16(callout); \
539 else \
540 SET_PCRE_CALLOUT8(callout)
541
542 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
543
544 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
545 if (pcre_mode == PCRE32_MODE) \
546 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
547 else if (pcre_mode == PCRE16_MODE) \
548 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
549 else \
550 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
551
552 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
553 if (pcre_mode == PCRE32_MODE) \
554 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
555 else if (pcre_mode == PCRE16_MODE) \
556 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
557 else \
558 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
559
560 #define PCRE_CONFIG pcre_config
561
562 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
563 namesptr, cbuffer, size) \
564 if (pcre_mode == PCRE32_MODE) \
565 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
566 namesptr, cbuffer, size); \
567 else if (pcre_mode == PCRE16_MODE) \
568 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
569 namesptr, cbuffer, size); \
570 else \
571 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
572 namesptr, cbuffer, size)
573
574 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
575 if (pcre_mode == PCRE32_MODE) \
576 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
577 else if (pcre_mode == PCRE16_MODE) \
578 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
579 else \
580 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
581
582 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets, workspace, size_workspace) \
584 if (pcre_mode == PCRE32_MODE) \
585 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets, workspace, size_workspace); \
587 else if (pcre_mode == PCRE16_MODE) \
588 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets, workspace, size_workspace); \
590 else \
591 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets, workspace, size_workspace)
593
594 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
595 offsets, size_offsets) \
596 if (pcre_mode == PCRE32_MODE) \
597 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
598 offsets, size_offsets); \
599 else if (pcre_mode == PCRE16_MODE) \
600 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
601 offsets, size_offsets); \
602 else \
603 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
604 offsets, size_offsets)
605
606 #define PCRE_FREE_STUDY(extra) \
607 if (pcre_mode == PCRE32_MODE) \
608 PCRE_FREE_STUDY32(extra); \
609 else if (pcre_mode == PCRE16_MODE) \
610 PCRE_FREE_STUDY16(extra); \
611 else \
612 PCRE_FREE_STUDY8(extra)
613
614 #define PCRE_FREE_SUBSTRING(substring) \
615 if (pcre_mode == PCRE32_MODE) \
616 PCRE_FREE_SUBSTRING32(substring); \
617 else if (pcre_mode == PCRE16_MODE) \
618 PCRE_FREE_SUBSTRING16(substring); \
619 else \
620 PCRE_FREE_SUBSTRING8(substring)
621
622 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
623 if (pcre_mode == PCRE32_MODE) \
624 PCRE_FREE_SUBSTRING_LIST32(listptr); \
625 else if (pcre_mode == PCRE16_MODE) \
626 PCRE_FREE_SUBSTRING_LIST16(listptr); \
627 else \
628 PCRE_FREE_SUBSTRING_LIST8(listptr)
629
630 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
631 getnamesptr, subsptr) \
632 if (pcre_mode == PCRE32_MODE) \
633 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
634 getnamesptr, subsptr); \
635 else if (pcre_mode == PCRE16_MODE) \
636 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
637 getnamesptr, subsptr); \
638 else \
639 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
640 getnamesptr, subsptr)
641
642 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
643 if (pcre_mode == PCRE32_MODE) \
644 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
645 else if (pcre_mode == PCRE16_MODE) \
646 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
647 else \
648 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
649
650 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
651 if (pcre_mode == PCRE32_MODE) \
652 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
653 else if (pcre_mode == PCRE16_MODE) \
654 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
655 else \
656 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
657
658 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
659 if (pcre_mode == PCRE32_MODE) \
660 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
661 else if (pcre_mode == PCRE16_MODE) \
662 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
663 else \
664 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
665
666 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
667 (pcre_mode == PCRE32_MODE ? \
668 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
669 : pcre_mode == PCRE16_MODE ? \
670 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
671 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
672
673 #define PCRE_JIT_STACK_FREE(stack) \
674 if (pcre_mode == PCRE32_MODE) \
675 PCRE_JIT_STACK_FREE32(stack); \
676 else if (pcre_mode == PCRE16_MODE) \
677 PCRE_JIT_STACK_FREE16(stack); \
678 else \
679 PCRE_JIT_STACK_FREE8(stack)
680
681 #define PCRE_MAKETABLES \
682 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
683
684 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
685 if (pcre_mode == PCRE32_MODE) \
686 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
687 else if (pcre_mode == PCRE16_MODE) \
688 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
689 else \
690 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
691
692 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
693 if (pcre_mode == PCRE32_MODE) \
694 PCRE_PRINTINT32(re, outfile, debug_lengths); \
695 else if (pcre_mode == PCRE16_MODE) \
696 PCRE_PRINTINT16(re, outfile, debug_lengths); \
697 else \
698 PCRE_PRINTINT8(re, outfile, debug_lengths)
699
700 #define PCRE_STUDY(extra, re, options, error) \
701 if (pcre_mode == PCRE32_MODE) \
702 PCRE_STUDY32(extra, re, options, error); \
703 else if (pcre_mode == PCRE16_MODE) \
704 PCRE_STUDY16(extra, re, options, error); \
705 else \
706 PCRE_STUDY8(extra, re, options, error)
707
708
709 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
710
711 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
712 #define PCHARS(lv, p, offset, len, f) \
713 if (pcre_mode == PCRE32_MODE) \
714 PCHARS32(lv, p, offset, len, f); \
715 else \
716 PCHARS16(lv, p, offset, len, f)
717
718 #define PCHARSV(p, offset, len, f) \
719 if (pcre_mode == PCRE32_MODE) \
720 PCHARSV32(p, offset, len, f); \
721 else \
722 PCHARSV16(p, offset, len, f)
723
724 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
725 if (pcre_mode == PCRE32_MODE) \
726 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
727 else \
728 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re)
729
730 #define SET_PCRE_CALLOUT(callout) \
731 if (pcre_mode == PCRE32_MODE) \
732 SET_PCRE_CALLOUT32(callout); \
733 else \
734 SET_PCRE_CALLOUT16(callout)
735
736 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : STRLEN16(p)
737
738 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
739 if (pcre_mode == PCRE32_MODE) \
740 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
741 else \
742 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata)
743
744 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
745 if (pcre_mode == PCRE32_MODE) \
746 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
747 else \
748 PCRE_COMPILE16(re, pat, options, error, erroffset, tables)
749
750 #define PCRE_CONFIG pcre_config
751
752 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
753 namesptr, cbuffer, size) \
754 if (pcre_mode == PCRE32_MODE) \
755 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
756 namesptr, cbuffer, size); \
757 else \
758 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
759 namesptr, cbuffer, size)
760
761 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
762 if (pcre_mode == PCRE32_MODE) \
763 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
764 else \
765 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size)
766
767 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
768 offsets, size_offsets, workspace, size_workspace) \
769 if (pcre_mode == PCRE32_MODE) \
770 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
771 offsets, size_offsets, workspace, size_workspace); \
772 else \
773 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
774 offsets, size_offsets, workspace, size_workspace)
775
776 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
777 offsets, size_offsets) \
778 if (pcre_mode == PCRE32_MODE) \
779 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
780 offsets, size_offsets); \
781 else \
782 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
783 offsets, size_offsets)
784
785 #define PCRE_FREE_STUDY(extra) \
786 if (pcre_mode == PCRE32_MODE) \
787 PCRE_FREE_STUDY32(extra); \
788 else \
789 PCRE_FREE_STUDY16(extra)
790
791 #define PCRE_FREE_SUBSTRING(substring) \
792 if (pcre_mode == PCRE32_MODE) \
793 PCRE_FREE_SUBSTRING32(substring); \
794 else \
795 PCRE_FREE_SUBSTRING16(substring)
796
797 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
798 if (pcre_mode == PCRE32_MODE) \
799 PCRE_FREE_SUBSTRING_LIST32(listptr); \
800 else \
801 PCRE_FREE_SUBSTRING_LIST16(listptr)
802
803 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
804 getnamesptr, subsptr) \
805 if (pcre_mode == PCRE32_MODE) \
806 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
807 getnamesptr, subsptr); \
808 else \
809 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
810 getnamesptr, subsptr)
811
812 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
813 if (pcre_mode == PCRE32_MODE) \
814 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
815 else \
816 PCRE_GET_STRINGNUMBER16(n, rc, ptr)
817
818 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
819 if (pcre_mode == PCRE32_MODE) \
820 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
821 else \
822 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr)
823
824 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
825 if (pcre_mode == PCRE32_MODE) \
826 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
827 else \
828 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr)
829
830 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
831 (pcre_mode == PCRE32_MODE ? \
832 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
833 : PCRE_JIT_STACK_ALLOC16(startsize, maxsize)
834
835 #define PCRE_JIT_STACK_FREE(stack) \
836 if (pcre_mode == PCRE32_MODE) \
837 PCRE_JIT_STACK_FREE32(stack); \
838 else \
839 PCRE_JIT_STACK_FREE16(stack)
840
841 #define PCRE_MAKETABLES \
842 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre16_maketables())
843
844 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
845 if (pcre_mode == PCRE32_MODE) \
846 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
847 else \
848 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables)
849
850 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
851 if (pcre_mode == PCRE32_MODE) \
852 PCRE_PRINTINT32(re, outfile, debug_lengths); \
853 else \
854 PCRE_PRINTINT16(re, outfile, debug_lengths)
855
856 #define PCRE_STUDY(extra, re, options, error) \
857 if (pcre_mode == PCRE32_MODE) \
858 PCRE_STUDY32(extra, re, options, error); \
859 else \
860 PCRE_STUDY16(extra, re, options, error)
861
862
863 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
864
865 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
866
867 #define PCHARS(lv, p, offset, len, f) \
868 if (pcre_mode == PCRE32_MODE) \
869 PCHARS32(lv, p, offset, len, f); \
870 else \
871 PCHARS8(lv, p, offset, len, f)
872
873 #define PCHARSV(p, offset, len, f) \
874 if (pcre_mode == PCRE32_MODE) \
875 PCHARSV32(p, offset, len, f); \
876 else \
877 PCHARSV8(p, offset, len, f)
878
879 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
880 if (pcre_mode == PCRE32_MODE) \
881 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
882 else \
883 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
884
885 #define SET_PCRE_CALLOUT(callout) \
886 if (pcre_mode == PCRE32_MODE) \
887 SET_PCRE_CALLOUT32(callout); \
888 else \
889 SET_PCRE_CALLOUT8(callout)
890
891 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : STRLEN8(p))
892
893 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
894 if (pcre_mode == PCRE32_MODE) \
895 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
896 else \
897 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
898
899 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
900 if (pcre_mode == PCRE32_MODE) \
901 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
902 else \
903 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
904
905 #define PCRE_CONFIG pcre_config
906
907 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
908 namesptr, cbuffer, size) \
909 if (pcre_mode == PCRE32_MODE) \
910 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
911 namesptr, cbuffer, size); \
912 else \
913 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
914 namesptr, cbuffer, size)
915
916 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
917 if (pcre_mode == PCRE32_MODE) \
918 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
919 else \
920 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
921
922 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
923 offsets, size_offsets, workspace, size_workspace) \
924 if (pcre_mode == PCRE32_MODE) \
925 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
926 offsets, size_offsets, workspace, size_workspace); \
927 else \
928 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
929 offsets, size_offsets, workspace, size_workspace)
930
931 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
932 offsets, size_offsets) \
933 if (pcre_mode == PCRE32_MODE) \
934 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
935 offsets, size_offsets); \
936 else \
937 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
938 offsets, size_offsets)
939
940 #define PCRE_FREE_STUDY(extra) \
941 if (pcre_mode == PCRE32_MODE) \
942 PCRE_FREE_STUDY32(extra); \
943 else \
944 PCRE_FREE_STUDY8(extra)
945
946 #define PCRE_FREE_SUBSTRING(substring) \
947 if (pcre_mode == PCRE32_MODE) \
948 PCRE_FREE_SUBSTRING32(substring); \
949 else \
950 PCRE_FREE_SUBSTRING8(substring)
951
952 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
953 if (pcre_mode == PCRE32_MODE) \
954 PCRE_FREE_SUBSTRING_LIST32(listptr); \
955 else \
956 PCRE_FREE_SUBSTRING_LIST8(listptr)
957
958 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
959 getnamesptr, subsptr) \
960 if (pcre_mode == PCRE32_MODE) \
961 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
962 getnamesptr, subsptr); \
963 else \
964 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
965 getnamesptr, subsptr)
966
967 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
968 if (pcre_mode == PCRE32_MODE) \
969 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
970 else \
971 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
972
973 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
974 if (pcre_mode == PCRE32_MODE) \
975 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
976 else \
977 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
978
979 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
980 if (pcre_mode == PCRE32_MODE) \
981 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
982 else \
983 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
984
985 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
986 (pcre_mode == PCRE32_MODE ? \
987 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
988 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
989
990 #define PCRE_JIT_STACK_FREE(stack) \
991 if (pcre_mode == PCRE32_MODE) \
992 PCRE_JIT_STACK_FREE32(stack); \
993 else \
994 PCRE_JIT_STACK_FREE8(stack)
995
996 #define PCRE_MAKETABLES \
997 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_maketables())
998
999 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
1000 if (pcre_mode == PCRE32_MODE) \
1001 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
1002 else \
1003 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
1004
1005 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
1006 if (pcre_mode == PCRE32_MODE) \
1007 PCRE_PRINTINT32(re, outfile, debug_lengths); \
1008 else \
1009 PCRE_PRINTINT8(re, outfile, debug_lengths)
1010
1011 #define PCRE_STUDY(extra, re, options, error) \
1012 if (pcre_mode == PCRE32_MODE) \
1013 PCRE_STUDY32(extra, re, options, error); \
1014 else \
1015 PCRE_STUDY8(extra, re, options, error)
1016
1017
1018 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1019
1020 #else
1021 #define PCHARS(lv, p, offset, len, f) \
1022 if (pcre_mode == PCRE16_MODE) \
1023 PCHARS16(lv, p, offset, len, f); \
1024 else \
1025 PCHARS8(lv, p, offset, len, f)
1026
1027 #define PCHARSV(p, offset, len, f) \
1028 if (pcre_mode == PCRE16_MODE) \
1029 PCHARSV16(p, offset, len, f); \
1030 else \
1031 PCHARSV8(p, offset, len, f)
1032
1033 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
1034 if (pcre_mode == PCRE16_MODE) \
1035 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
1036 else \
1037 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
1038
1039 #define SET_PCRE_CALLOUT(callout) \
1040 if (pcre_mode == PCRE16_MODE) \
1041 SET_PCRE_CALLOUT16(callout); \
1042 else \
1043 SET_PCRE_CALLOUT8(callout)
1044
1045 #define STRLEN(p) (pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
1046
1047 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
1048 if (pcre_mode == PCRE16_MODE) \
1049 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
1050 else \
1051 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
1052
1053 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
1054 if (pcre_mode == PCRE16_MODE) \
1055 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
1056 else \
1057 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
1058
1059 #define PCRE_CONFIG pcre_config
1060
1061 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
1062 namesptr, cbuffer, size) \
1063 if (pcre_mode == PCRE16_MODE) \
1064 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
1065 namesptr, cbuffer, size); \
1066 else \
1067 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
1068 namesptr, cbuffer, size)
1069
1070 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
1071 if (pcre_mode == PCRE16_MODE) \
1072 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
1073 else \
1074 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
1075
1076 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
1077 offsets, size_offsets, workspace, size_workspace) \
1078 if (pcre_mode == PCRE16_MODE) \
1079 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
1080 offsets, size_offsets, workspace, size_workspace); \
1081 else \
1082 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
1083 offsets, size_offsets, workspace, size_workspace)
1084
1085 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
1086 offsets, size_offsets) \
1087 if (pcre_mode == PCRE16_MODE) \
1088 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
1089 offsets, size_offsets); \
1090 else \
1091 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
1092 offsets, size_offsets)
1093
1094 #define PCRE_FREE_STUDY(extra) \
1095 if (pcre_mode == PCRE16_MODE) \
1096 PCRE_FREE_STUDY16(extra); \
1097 else \
1098 PCRE_FREE_STUDY8(extra)
1099
1100 #define PCRE_FREE_SUBSTRING(substring) \
1101 if (pcre_mode == PCRE16_MODE) \
1102 PCRE_FREE_SUBSTRING16(substring); \
1103 else \
1104 PCRE_FREE_SUBSTRING8(substring)
1105
1106 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
1107 if (pcre_mode == PCRE16_MODE) \
1108 PCRE_FREE_SUBSTRING_LIST16(listptr); \
1109 else \
1110 PCRE_FREE_SUBSTRING_LIST8(listptr)
1111
1112 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
1113 getnamesptr, subsptr) \
1114 if (pcre_mode == PCRE16_MODE) \
1115 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
1116 getnamesptr, subsptr); \
1117 else \
1118 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
1119 getnamesptr, subsptr)
1120
1121 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
1122 if (pcre_mode == PCRE16_MODE) \
1123 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
1124 else \
1125 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
1126
1127 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
1128 if (pcre_mode == PCRE16_MODE) \
1129 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
1130 else \
1131 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
1132
1133 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
1134 if (pcre_mode == PCRE16_MODE) \
1135 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
1136 else \
1137 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
1138
1139 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
1140 (pcre_mode == PCRE16_MODE ? \
1141 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
1142 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
1143
1144 #define PCRE_JIT_STACK_FREE(stack) \
1145 if (pcre_mode == PCRE16_MODE) \
1146 PCRE_JIT_STACK_FREE16(stack); \
1147 else \
1148 PCRE_JIT_STACK_FREE8(stack)
1149
1150 #define PCRE_MAKETABLES \
1151 (pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
1152
1153 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
1154 if (pcre_mode == PCRE16_MODE) \
1155 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
1156 else \
1157 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
1158
1159 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
1160 if (pcre_mode == PCRE16_MODE) \
1161 PCRE_PRINTINT16(re, outfile, debug_lengths); \
1162 else \
1163 PCRE_PRINTINT8(re, outfile, debug_lengths)
1164
1165 #define PCRE_STUDY(extra, re, options, error) \
1166 if (pcre_mode == PCRE16_MODE) \
1167 PCRE_STUDY16(extra, re, options, error); \
1168 else \
1169 PCRE_STUDY8(extra, re, options, error)
1170
1171 #endif
1172
1173 /* ----- End of cases where more than one mode is supported ----- */
1174
1175
1176 /* ----- Only 8-bit mode is supported ----- */
1177
1178 #elif defined SUPPORT_PCRE8
1179 #define CHAR_SIZE 1
1180 #define PCHARS PCHARS8
1181 #define PCHARSV PCHARSV8
1182 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
1183 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
1184 #define STRLEN STRLEN8
1185 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
1186 #define PCRE_COMPILE PCRE_COMPILE8
1187 #define PCRE_CONFIG pcre_config
1188 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
1189 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
1190 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
1191 #define PCRE_EXEC PCRE_EXEC8
1192 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
1193 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
1194 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
1195 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
1196 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
1197 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
1198 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
1199 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
1200 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
1201 #define PCRE_MAKETABLES pcre_maketables()
1202 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
1203 #define PCRE_PRINTINT PCRE_PRINTINT8
1204 #define PCRE_STUDY PCRE_STUDY8
1205
1206 /* ----- Only 16-bit mode is supported ----- */
1207
1208 #elif defined SUPPORT_PCRE16
1209 #define CHAR_SIZE 2
1210 #define PCHARS PCHARS16
1211 #define PCHARSV PCHARSV16
1212 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
1213 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
1214 #define STRLEN STRLEN16
1215 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
1216 #define PCRE_COMPILE PCRE_COMPILE16
1217 #define PCRE_CONFIG pcre16_config
1218 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
1219 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
1220 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
1221 #define PCRE_EXEC PCRE_EXEC16
1222 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
1223 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
1224 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
1225 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
1226 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
1227 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
1228 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
1229 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
1230 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
1231 #define PCRE_MAKETABLES pcre16_maketables()
1232 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
1233 #define PCRE_PRINTINT PCRE_PRINTINT16
1234 #define PCRE_STUDY PCRE_STUDY16
1235
1236 /* ----- Only 32-bit mode is supported ----- */
1237
1238 #elif defined SUPPORT_PCRE32
1239 #define CHAR_SIZE 4
1240 #define PCHARS PCHARS32
1241 #define PCHARSV PCHARSV32
1242 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
1243 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
1244 #define STRLEN STRLEN32
1245 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
1246 #define PCRE_COMPILE PCRE_COMPILE32
1247 #define PCRE_CONFIG pcre32_config
1248 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
1249 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
1250 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
1251 #define PCRE_EXEC PCRE_EXEC32
1252 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
1253 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
1254 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
1255 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
1256 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
1257 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
1258 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
1259 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
1260 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
1261 #define PCRE_MAKETABLES pcre32_maketables()
1262 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
1263 #define PCRE_PRINTINT PCRE_PRINTINT32
1264 #define PCRE_STUDY PCRE_STUDY32
1265
1266 #endif
1267
1268 /* ----- End of mode-specific function call macros ----- */
1269
1270
1271 /* Other parameters */
1272
1273 #ifndef CLOCKS_PER_SEC
1274 #ifdef CLK_TCK
1275 #define CLOCKS_PER_SEC CLK_TCK
1276 #else
1277 #define CLOCKS_PER_SEC 100
1278 #endif
1279 #endif
1280
1281 #if !defined NODFA
1282 #define DFA_WS_DIMENSION 1000
1283 #endif
1284
1285 /* This is the default loop count for timing. */
1286
1287 #define LOOPREPEAT 500000
1288
1289 /* Static variables */
1290
1291 static FILE *outfile;
1292 static int log_store = 0;
1293 static int callout_count;
1294 static int callout_extra;
1295 static int callout_fail_count;
1296 static int callout_fail_id;
1297 static int debug_lengths;
1298 static int first_callout;
1299 static int jit_was_used;
1300 static int locale_set = 0;
1301 static int show_malloc;
1302 static int use_utf;
1303 static size_t gotten_store;
1304 static size_t first_gotten_store = 0;
1305 static const unsigned char *last_callout_mark = NULL;
1306
1307 /* The buffers grow automatically if very long input lines are encountered. */
1308
1309 static int buffer_size = 50000;
1310 static pcre_uint8 *buffer = NULL;
1311 static pcre_uint8 *pbuffer = NULL;
1312
1313 /* Another buffer is needed translation to 16/32-bit character strings. It will
1314 obtained and extended as required. */
1315
1316 #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
1317
1318 /* We need the table of operator lengths that is used for 16/32-bit compiling, in
1319 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
1320 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
1321 appropriately for the 16/32-bit world. Just as a safety check, make sure that
1322 COMPILE_PCRE[16|32] is *not* set. */
1323
1324 #ifdef COMPILE_PCRE16
1325 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1326 #endif
1327
1328 #ifdef COMPILE_PCRE32
1329 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1330 #endif
1331
1332 #if LINK_SIZE == 2
1333 #undef LINK_SIZE
1334 #define LINK_SIZE 1
1335 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1336 #undef LINK_SIZE
1337 #define LINK_SIZE 2
1338 #else
1339 #error LINK_SIZE must be either 2, 3, or 4
1340 #endif
1341
1342 #undef IMM2_SIZE
1343 #define IMM2_SIZE 1
1344
1345 #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
1346
1347 #ifdef SUPPORT_PCRE16
1348 static int buffer16_size = 0;
1349 static pcre_uint16 *buffer16 = NULL;
1350 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1351 #endif /* SUPPORT_PCRE16 */
1352
1353 #ifdef SUPPORT_PCRE32
1354 static int buffer32_size = 0;
1355 static pcre_uint32 *buffer32 = NULL;
1356 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1357 #endif /* SUPPORT_PCRE32 */
1358
1359 /* If we have 8-bit support, default to it; if there is also
1360 16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
1361 there must be 16-or 32-bit support, so default it to 1. */
1362
1363 #if defined SUPPORT_PCRE8
1364 static int pcre_mode = PCRE8_MODE;
1365 #elif defined SUPPORT_PCRE16
1366 static int pcre_mode = PCRE16_MODE;
1367 #elif defined SUPPORT_PCRE32
1368 static int pcre_mode = PCRE32_MODE;
1369 #endif
1370
1371 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1372
1373 static int jit_study_bits[] =
1374 {
1375 PCRE_STUDY_JIT_COMPILE,
1376 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1377 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1378 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1379 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1380 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1381 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1382 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1383 };
1384
1385 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1386 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1387
1388 /* Textual explanations for runtime error codes */
1389
1390 static const char *errtexts[] = {
1391 NULL, /* 0 is no error */
1392 NULL, /* NOMATCH is handled specially */
1393 "NULL argument passed",
1394 "bad option value",
1395 "magic number missing",
1396 "unknown opcode - pattern overwritten?",
1397 "no more memory",
1398 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1399 "match limit exceeded",
1400 "callout error code",
1401 NULL, /* BADUTF8/16 is handled specially */
1402 NULL, /* BADUTF8/16 offset is handled specially */
1403 NULL, /* PARTIAL is handled specially */
1404 "not used - internal error",
1405 "internal error - pattern overwritten?",
1406 "bad count value",
1407 "item unsupported for DFA matching",
1408 "backreference condition or recursion test not supported for DFA matching",
1409 "match limit not supported for DFA matching",
1410 "workspace size exceeded in DFA matching",
1411 "too much recursion for DFA matching",
1412 "recursion limit exceeded",
1413 "not used - internal error",
1414 "invalid combination of newline options",
1415 "bad offset value",
1416 NULL, /* SHORTUTF8/16 is handled specially */
1417 "nested recursion at the same subject position",
1418 "JIT stack limit reached",
1419 "pattern compiled in wrong mode: 8-bit/16-bit error",
1420 "pattern compiled with other endianness",
1421 "invalid data in workspace for DFA restart"
1422 };
1423
1424
1425 /*************************************************
1426 * Alternate character tables *
1427 *************************************************/
1428
1429 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1430 using the default tables of the library. However, the T option can be used to
1431 select alternate sets of tables, for different kinds of testing. Note also that
1432 the L (locale) option also adjusts the tables. */
1433
1434 /* This is the set of tables distributed as default with PCRE. It recognizes
1435 only ASCII characters. */
1436
1437 static const pcre_uint8 tables0[] = {
1438
1439 /* This table is a lower casing table. */
1440
1441 0, 1, 2, 3, 4, 5, 6, 7,
1442 8, 9, 10, 11, 12, 13, 14, 15,
1443 16, 17, 18, 19, 20, 21, 22, 23,
1444 24, 25, 26, 27, 28, 29, 30, 31,
1445 32, 33, 34, 35, 36, 37, 38, 39,
1446 40, 41, 42, 43, 44, 45, 46, 47,
1447 48, 49, 50, 51, 52, 53, 54, 55,
1448 56, 57, 58, 59, 60, 61, 62, 63,
1449 64, 97, 98, 99,100,101,102,103,
1450 104,105,106,107,108,109,110,111,
1451 112,113,114,115,116,117,118,119,
1452 120,121,122, 91, 92, 93, 94, 95,
1453 96, 97, 98, 99,100,101,102,103,
1454 104,105,106,107,108,109,110,111,
1455 112,113,114,115,116,117,118,119,
1456 120,121,122,123,124,125,126,127,
1457 128,129,130,131,132,133,134,135,
1458 136,137,138,139,140,141,142,143,
1459 144,145,146,147,148,149,150,151,
1460 152,153,154,155,156,157,158,159,
1461 160,161,162,163,164,165,166,167,
1462 168,169,170,171,172,173,174,175,
1463 176,177,178,179,180,181,182,183,
1464 184,185,186,187,188,189,190,191,
1465 192,193,194,195,196,197,198,199,
1466 200,201,202,203,204,205,206,207,
1467 208,209,210,211,212,213,214,215,
1468 216,217,218,219,220,221,222,223,
1469 224,225,226,227,228,229,230,231,
1470 232,233,234,235,236,237,238,239,
1471 240,241,242,243,244,245,246,247,
1472 248,249,250,251,252,253,254,255,
1473
1474 /* This table is a case flipping table. */
1475
1476 0, 1, 2, 3, 4, 5, 6, 7,
1477 8, 9, 10, 11, 12, 13, 14, 15,
1478 16, 17, 18, 19, 20, 21, 22, 23,
1479 24, 25, 26, 27, 28, 29, 30, 31,
1480 32, 33, 34, 35, 36, 37, 38, 39,
1481 40, 41, 42, 43, 44, 45, 46, 47,
1482 48, 49, 50, 51, 52, 53, 54, 55,
1483 56, 57, 58, 59, 60, 61, 62, 63,
1484 64, 97, 98, 99,100,101,102,103,
1485 104,105,106,107,108,109,110,111,
1486 112,113,114,115,116,117,118,119,
1487 120,121,122, 91, 92, 93, 94, 95,
1488 96, 65, 66, 67, 68, 69, 70, 71,
1489 72, 73, 74, 75, 76, 77, 78, 79,
1490 80, 81, 82, 83, 84, 85, 86, 87,
1491 88, 89, 90,123,124,125,126,127,
1492 128,129,130,131,132,133,134,135,
1493 136,137,138,139,140,141,142,143,
1494 144,145,146,147,148,149,150,151,
1495 152,153,154,155,156,157,158,159,
1496 160,161,162,163,164,165,166,167,
1497 168,169,170,171,172,173,174,175,
1498 176,177,178,179,180,181,182,183,
1499 184,185,186,187,188,189,190,191,
1500 192,193,194,195,196,197,198,199,
1501 200,201,202,203,204,205,206,207,
1502 208,209,210,211,212,213,214,215,
1503 216,217,218,219,220,221,222,223,
1504 224,225,226,227,228,229,230,231,
1505 232,233,234,235,236,237,238,239,
1506 240,241,242,243,244,245,246,247,
1507 248,249,250,251,252,253,254,255,
1508
1509 /* This table contains bit maps for various character classes. Each map is 32
1510 bytes long and the bits run from the least significant end of each byte. The
1511 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1512 graph, print, punct, and cntrl. Other classes are built from combinations. */
1513
1514 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1515 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1516 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1517 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1518
1519 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1520 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1521 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1522 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1523
1524 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1525 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1526 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1527 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1528
1529 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1530 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1531 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1532 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1533
1534 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1535 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1536 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1537 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1538
1539 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1540 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1541 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1542 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1543
1544 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1545 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1546 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1547 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1548
1549 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1550 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1551 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1552 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1553
1554 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1555 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1556 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1557 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1558
1559 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1560 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1561 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1562 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1563
1564 /* This table identifies various classes of character by individual bits:
1565 0x01 white space character
1566 0x02 letter
1567 0x04 decimal digit
1568 0x08 hexadecimal digit
1569 0x10 alphanumeric or '_'
1570 0x80 regular expression metacharacter or binary zero
1571 */
1572
1573 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1574 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1575 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1576 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1577 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1578 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1579 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1580 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1581 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1582 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1583 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1584 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1585 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1586 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1587 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1588 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1591 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1592 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1593 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1594 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1595 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1596 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1597 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1603 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1604 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1605
1606 /* This is a set of tables that came orginally from a Windows user. It seems to
1607 be at least an approximation of ISO 8859. In particular, there are characters
1608 greater than 128 that are marked as spaces, letters, etc. */
1609
1610 static const pcre_uint8 tables1[] = {
1611 0,1,2,3,4,5,6,7,
1612 8,9,10,11,12,13,14,15,
1613 16,17,18,19,20,21,22,23,
1614 24,25,26,27,28,29,30,31,
1615 32,33,34,35,36,37,38,39,
1616 40,41,42,43,44,45,46,47,
1617 48,49,50,51,52,53,54,55,
1618 56,57,58,59,60,61,62,63,
1619 64,97,98,99,100,101,102,103,
1620 104,105,106,107,108,109,110,111,
1621 112,113,114,115,116,117,118,119,
1622 120,121,122,91,92,93,94,95,
1623 96,97,98,99,100,101,102,103,
1624 104,105,106,107,108,109,110,111,
1625 112,113,114,115,116,117,118,119,
1626 120,121,122,123,124,125,126,127,
1627 128,129,130,131,132,133,134,135,
1628 136,137,138,139,140,141,142,143,
1629 144,145,146,147,148,149,150,151,
1630 152,153,154,155,156,157,158,159,
1631 160,161,162,163,164,165,166,167,
1632 168,169,170,171,172,173,174,175,
1633 176,177,178,179,180,181,182,183,
1634 184,185,186,187,188,189,190,191,
1635 224,225,226,227,228,229,230,231,
1636 232,233,234,235,236,237,238,239,
1637 240,241,242,243,244,245,246,215,
1638 248,249,250,251,252,253,254,223,
1639 224,225,226,227,228,229,230,231,
1640 232,233,234,235,236,237,238,239,
1641 240,241,242,243,244,245,246,247,
1642 248,249,250,251,252,253,254,255,
1643 0,1,2,3,4,5,6,7,
1644 8,9,10,11,12,13,14,15,
1645 16,17,18,19,20,21,22,23,
1646 24,25,26,27,28,29,30,31,
1647 32,33,34,35,36,37,38,39,
1648 40,41,42,43,44,45,46,47,
1649 48,49,50,51,52,53,54,55,
1650 56,57,58,59,60,61,62,63,
1651 64,97,98,99,100,101,102,103,
1652 104,105,106,107,108,109,110,111,
1653 112,113,114,115,116,117,118,119,
1654 120,121,122,91,92,93,94,95,
1655 96,65,66,67,68,69,70,71,
1656 72,73,74,75,76,77,78,79,
1657 80,81,82,83,84,85,86,87,
1658 88,89,90,123,124,125,126,127,
1659 128,129,130,131,132,133,134,135,
1660 136,137,138,139,140,141,142,143,
1661 144,145,146,147,148,149,150,151,
1662 152,153,154,155,156,157,158,159,
1663 160,161,162,163,164,165,166,167,
1664 168,169,170,171,172,173,174,175,
1665 176,177,178,179,180,181,182,183,
1666 184,185,186,187,188,189,190,191,
1667 224,225,226,227,228,229,230,231,
1668 232,233,234,235,236,237,238,239,
1669 240,241,242,243,244,245,246,215,
1670 248,249,250,251,252,253,254,223,
1671 192,193,194,195,196,197,198,199,
1672 200,201,202,203,204,205,206,207,
1673 208,209,210,211,212,213,214,247,
1674 216,217,218,219,220,221,222,255,
1675 0,62,0,0,1,0,0,0,
1676 0,0,0,0,0,0,0,0,
1677 32,0,0,0,1,0,0,0,
1678 0,0,0,0,0,0,0,0,
1679 0,0,0,0,0,0,255,3,
1680 126,0,0,0,126,0,0,0,
1681 0,0,0,0,0,0,0,0,
1682 0,0,0,0,0,0,0,0,
1683 0,0,0,0,0,0,255,3,
1684 0,0,0,0,0,0,0,0,
1685 0,0,0,0,0,0,12,2,
1686 0,0,0,0,0,0,0,0,
1687 0,0,0,0,0,0,0,0,
1688 254,255,255,7,0,0,0,0,
1689 0,0,0,0,0,0,0,0,
1690 255,255,127,127,0,0,0,0,
1691 0,0,0,0,0,0,0,0,
1692 0,0,0,0,254,255,255,7,
1693 0,0,0,0,0,4,32,4,
1694 0,0,0,128,255,255,127,255,
1695 0,0,0,0,0,0,255,3,
1696 254,255,255,135,254,255,255,7,
1697 0,0,0,0,0,4,44,6,
1698 255,255,127,255,255,255,127,255,
1699 0,0,0,0,254,255,255,255,
1700 255,255,255,255,255,255,255,127,
1701 0,0,0,0,254,255,255,255,
1702 255,255,255,255,255,255,255,255,
1703 0,2,0,0,255,255,255,255,
1704 255,255,255,255,255,255,255,127,
1705 0,0,0,0,255,255,255,255,
1706 255,255,255,255,255,255,255,255,
1707 0,0,0,0,254,255,0,252,
1708 1,0,0,248,1,0,0,120,
1709 0,0,0,0,254,255,255,255,
1710 0,0,128,0,0,0,128,0,
1711 255,255,255,255,0,0,0,0,
1712 0,0,0,0,0,0,0,128,
1713 255,255,255,255,0,0,0,0,
1714 0,0,0,0,0,0,0,0,
1715 128,0,0,0,0,0,0,0,
1716 0,1,1,0,1,1,0,0,
1717 0,0,0,0,0,0,0,0,
1718 0,0,0,0,0,0,0,0,
1719 1,0,0,0,128,0,0,0,
1720 128,128,128,128,0,0,128,0,
1721 28,28,28,28,28,28,28,28,
1722 28,28,0,0,0,0,0,128,
1723 0,26,26,26,26,26,26,18,
1724 18,18,18,18,18,18,18,18,
1725 18,18,18,18,18,18,18,18,
1726 18,18,18,128,128,0,128,16,
1727 0,26,26,26,26,26,26,18,
1728 18,18,18,18,18,18,18,18,
1729 18,18,18,18,18,18,18,18,
1730 18,18,18,128,128,0,0,0,
1731 0,0,0,0,0,1,0,0,
1732 0,0,0,0,0,0,0,0,
1733 0,0,0,0,0,0,0,0,
1734 0,0,0,0,0,0,0,0,
1735 1,0,0,0,0,0,0,0,
1736 0,0,18,0,0,0,0,0,
1737 0,0,20,20,0,18,0,0,
1738 0,20,18,0,0,0,0,0,
1739 18,18,18,18,18,18,18,18,
1740 18,18,18,18,18,18,18,18,
1741 18,18,18,18,18,18,18,0,
1742 18,18,18,18,18,18,18,18,
1743 18,18,18,18,18,18,18,18,
1744 18,18,18,18,18,18,18,18,
1745 18,18,18,18,18,18,18,0,
1746 18,18,18,18,18,18,18,18
1747 };
1748
1749
1750
1751
1752 #ifndef HAVE_STRERROR
1753 /*************************************************
1754 * Provide strerror() for non-ANSI libraries *
1755 *************************************************/
1756
1757 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1758 in their libraries, but can provide the same facility by this simple
1759 alternative function. */
1760
1761 extern int sys_nerr;
1762 extern char *sys_errlist[];
1763
1764 char *
1765 strerror(int n)
1766 {
1767 if (n < 0 || n >= sys_nerr) return "unknown error number";
1768 return sys_errlist[n];
1769 }
1770 #endif /* HAVE_STRERROR */
1771
1772
1773
1774 /*************************************************
1775 * Print newline configuration *
1776 *************************************************/
1777
1778 /*
1779 Arguments:
1780 rc the return code from PCRE_CONFIG_NEWLINE
1781 isc TRUE if called from "-C newline"
1782 Returns: nothing
1783 */
1784
1785 static void
1786 print_newline_config(int rc, BOOL isc)
1787 {
1788 const char *s = NULL;
1789 if (!isc) printf(" Newline sequence is ");
1790 switch(rc)
1791 {
1792 case CHAR_CR: s = "CR"; break;
1793 case CHAR_LF: s = "LF"; break;
1794 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1795 case -1: s = "ANY"; break;
1796 case -2: s = "ANYCRLF"; break;
1797
1798 default:
1799 printf("a non-standard value: 0x%04x\n", rc);
1800 return;
1801 }
1802
1803 printf("%s\n", s);
1804 }
1805
1806
1807
1808 /*************************************************
1809 * JIT memory callback *
1810 *************************************************/
1811
1812 static pcre_jit_stack* jit_callback(void *arg)
1813 {
1814 jit_was_used = TRUE;
1815 return (pcre_jit_stack *)arg;
1816 }
1817
1818
1819 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1820 /*************************************************
1821 * Convert UTF-8 string to value *
1822 *************************************************/
1823
1824 /* This function takes one or more bytes that represents a UTF-8 character,
1825 and returns the value of the character.
1826
1827 Argument:
1828 utf8bytes a pointer to the byte vector
1829 vptr a pointer to an int to receive the value
1830
1831 Returns: > 0 => the number of bytes consumed
1832 -6 to 0 => malformed UTF-8 character at offset = (-return)
1833 */
1834
1835 static int
1836 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1837 {
1838 pcre_uint32 c = *utf8bytes++;
1839 pcre_uint32 d = c;
1840 int i, j, s;
1841
1842 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1843 {
1844 if ((d & 0x80) == 0) break;
1845 d <<= 1;
1846 }
1847
1848 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1849 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1850
1851 /* i now has a value in the range 1-5 */
1852
1853 s = 6*i;
1854 d = (c & utf8_table3[i]) << s;
1855
1856 for (j = 0; j < i; j++)
1857 {
1858 c = *utf8bytes++;
1859 if ((c & 0xc0) != 0x80) return -(j+1);
1860 s -= 6;
1861 d |= (c & 0x3f) << s;
1862 }
1863
1864 /* Check that encoding was the correct unique one */
1865
1866 for (j = 0; j < utf8_table1_size; j++)
1867 if (d <= (pcre_uint32)utf8_table1[j]) break;
1868 if (j != i) return -(i+1);
1869
1870 /* Valid value */
1871
1872 *vptr = d;
1873 return i+1;
1874 }
1875 #endif /* NOUTF || SUPPORT_PCRE16 */
1876
1877
1878
1879 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1880 /*************************************************
1881 * Convert character value to UTF-8 *
1882 *************************************************/
1883
1884 /* This function takes an integer value in the range 0 - 0x7fffffff
1885 and encodes it as a UTF-8 character in 0 to 6 bytes.
1886
1887 Arguments:
1888 cvalue the character value
1889 utf8bytes pointer to buffer for result - at least 6 bytes long
1890
1891 Returns: number of characters placed in the buffer
1892 */
1893
1894 static int
1895 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1896 {
1897 register int i, j;
1898 if (cvalue > 0x7fffffffu)
1899 return -1;
1900 for (i = 0; i < utf8_table1_size; i++)
1901 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1902 utf8bytes += i;
1903 for (j = i; j > 0; j--)
1904 {
1905 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1906 cvalue >>= 6;
1907 }
1908 *utf8bytes = utf8_table2[i] | cvalue;
1909 return i + 1;
1910 }
1911 #endif
1912
1913
1914 #ifdef SUPPORT_PCRE16
1915 /*************************************************
1916 * Convert a string to 16-bit *
1917 *************************************************/
1918
1919 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1920 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1921 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1922 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1923 result is always left in buffer16.
1924
1925 Note that this function does not object to surrogate values. This is
1926 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1927 for the purpose of testing that they are correctly faulted.
1928
1929 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1930 in UTF-8 so that values greater than 255 can be handled.
1931
1932 Arguments:
1933 data TRUE if converting a data line; FALSE for a regex
1934 p points to a byte string
1935 utf true if UTF-8 (to be converted to UTF-16)
1936 len number of bytes in the string (excluding trailing zero)
1937
1938 Returns: number of 16-bit data items used (excluding trailing zero)
1939 OR -1 if a UTF-8 string is malformed
1940 OR -2 if a value > 0x10ffff is encountered
1941 OR -3 if a value > 0xffff is encountered when not in UTF mode
1942 */
1943
1944 static int
1945 to16(int data, pcre_uint8 *p, int utf, int len)
1946 {
1947 pcre_uint16 *pp;
1948
1949 if (buffer16_size < 2*len + 2)
1950 {
1951 if (buffer16 != NULL) free(buffer16);
1952 buffer16_size = 2*len + 2;
1953 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1954 if (buffer16 == NULL)
1955 {
1956 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1957 exit(1);
1958 }
1959 }
1960
1961 pp = buffer16;
1962
1963 if (!utf && !data)
1964 {
1965 while (len-- > 0) *pp++ = *p++;
1966 }
1967
1968 else
1969 {
1970 pcre_uint32 c = 0;
1971 while (len > 0)
1972 {
1973 int chlen = utf82ord(p, &c);
1974 if (chlen <= 0) return -1;
1975 if (c > 0x10ffff) return -2;
1976 p += chlen;
1977 len -= chlen;
1978 if (c < 0x10000) *pp++ = c; else
1979 {
1980 if (!utf) return -3;
1981 c -= 0x10000;
1982 *pp++ = 0xD800 | (c >> 10);
1983 *pp++ = 0xDC00 | (c & 0x3ff);
1984 }
1985 }
1986 }
1987
1988 *pp = 0;
1989 return pp - buffer16;
1990 }
1991 #endif
1992
1993 #ifdef SUPPORT_PCRE32
1994 /*************************************************
1995 * Convert a string to 32-bit *
1996 *************************************************/
1997
1998 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1999 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
2000 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
2001 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
2002 result is always left in buffer32.
2003
2004 Note that this function does not object to surrogate values. This is
2005 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
2006 for the purpose of testing that they are correctly faulted.
2007
2008 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
2009 in UTF-8 so that values greater than 255 can be handled.
2010
2011 Arguments:
2012 data TRUE if converting a data line; FALSE for a regex
2013 p points to a byte string
2014 utf true if UTF-8 (to be converted to UTF-32)
2015 len number of bytes in the string (excluding trailing zero)
2016
2017 Returns: number of 32-bit data items used (excluding trailing zero)
2018 OR -1 if a UTF-8 string is malformed
2019 OR -2 if a value > 0x10ffff is encountered
2020 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
2021 */
2022
2023 static int
2024 to32(int data, pcre_uint8 *p, int utf, int len)
2025 {
2026 pcre_uint32 *pp;
2027
2028 if (buffer32_size < 4*len + 4)
2029 {
2030 if (buffer32 != NULL) free(buffer32);
2031 buffer32_size = 4*len + 4;
2032 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
2033 if (buffer32 == NULL)
2034 {
2035 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
2036 exit(1);
2037 }
2038 }
2039
2040 pp = buffer32;
2041
2042 if (!utf && !data)
2043 {
2044 while (len-- > 0) *pp++ = *p++;
2045 }
2046
2047 else
2048 {
2049 pcre_uint32 c = 0;
2050 while (len > 0)
2051 {
2052 int chlen = utf82ord(p, &c);
2053 if (chlen <= 0) return -1;
2054 if (utf)
2055 {
2056 if (c > 0x10ffff) return -2;
2057 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
2058 }
2059
2060 p += chlen;
2061 len -= chlen;
2062 *pp++ = c;
2063 }
2064 }
2065
2066 *pp = 0;
2067 return pp - buffer32;
2068 }
2069
2070 /* Check that a 32-bit character string is valid UTF-32.
2071
2072 Arguments:
2073 string points to the string
2074 length length of string, or -1 if the string is zero-terminated
2075
2076 Returns: TRUE if the string is a valid UTF-32 string
2077 FALSE otherwise
2078 */
2079
2080 #ifdef SUPPORT_UTF
2081 static BOOL
2082 valid_utf32(pcre_uint32 *string, int length)
2083 {
2084 register pcre_uint32 *p;
2085 register pcre_uint32 c;
2086
2087 for (p = string; length-- > 0; p++)
2088 {
2089 c = *p;
2090
2091 if (c > 0x10ffffu)
2092 return FALSE;
2093
2094 /* A surrogate */
2095 if ((c & 0xfffff800u) == 0xd800u)
2096 return FALSE;
2097
2098 /* Non-character */
2099 if ((c & 0xfffeu) == 0xfffeu ||
2100 c >= 0xfdd0u && c <= 0xfdefu)
2101 return FALSE;
2102 }
2103
2104 return TRUE;
2105 }
2106 #endif /* SUPPORT_UTF */
2107
2108 #endif
2109
2110 /*************************************************
2111 * Read or extend an input line *
2112 *************************************************/
2113
2114 /* Input lines are read into buffer, but both patterns and data lines can be
2115 continued over multiple input lines. In addition, if the buffer fills up, we
2116 want to automatically expand it so as to be able to handle extremely large
2117 lines that are needed for certain stress tests. When the input buffer is
2118 expanded, the other two buffers must also be expanded likewise, and the
2119 contents of pbuffer, which are a copy of the input for callouts, must be
2120 preserved (for when expansion happens for a data line). This is not the most
2121 optimal way of handling this, but hey, this is just a test program!
2122
2123 Arguments:
2124 f the file to read
2125 start where in buffer to start (this *must* be within buffer)
2126 prompt for stdin or readline()
2127
2128 Returns: pointer to the start of new data
2129 could be a copy of start, or could be moved
2130 NULL if no data read and EOF reached
2131 */
2132
2133 static pcre_uint8 *
2134 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
2135 {
2136 pcre_uint8 *here = start;
2137
2138 for (;;)
2139 {
2140 size_t rlen = (size_t)(buffer_size - (here - buffer));
2141
2142 if (rlen > 1000)
2143 {
2144 int dlen;
2145
2146 /* If libreadline or libedit support is required, use readline() to read a
2147 line if the input is a terminal. Note that readline() removes the trailing
2148 newline, so we must put it back again, to be compatible with fgets(). */
2149
2150 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2151 if (isatty(fileno(f)))
2152 {
2153 size_t len;
2154 char *s = readline(prompt);
2155 if (s == NULL) return (here == start)? NULL : start;
2156 len = strlen(s);
2157 if (len > 0) add_history(s);
2158 if (len > rlen - 1) len = rlen - 1;
2159 memcpy(here, s, len);
2160 here[len] = '\n';
2161 here[len+1] = 0;
2162 free(s);
2163 }
2164 else
2165 #endif
2166
2167 /* Read the next line by normal means, prompting if the file is stdin. */
2168
2169 {
2170 if (f == stdin) printf("%s", prompt);
2171 if (fgets((char *)here, rlen, f) == NULL)
2172 return (here == start)? NULL : start;
2173 }
2174
2175 dlen = (int)strlen((char *)here);
2176 if (dlen > 0 && here[dlen - 1] == '\n') return start;
2177 here += dlen;
2178 }
2179
2180 else
2181 {
2182 int new_buffer_size = 2*buffer_size;
2183 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
2184 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
2185
2186 if (new_buffer == NULL || new_pbuffer == NULL)
2187 {
2188 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
2189 exit(1);
2190 }
2191
2192 memcpy(new_buffer, buffer, buffer_size);
2193 memcpy(new_pbuffer, pbuffer, buffer_size);
2194
2195 buffer_size = new_buffer_size;
2196
2197 start = new_buffer + (start - buffer);
2198 here = new_buffer + (here - buffer);
2199
2200 free(buffer);
2201 free(pbuffer);
2202
2203 buffer = new_buffer;
2204 pbuffer = new_pbuffer;
2205 }
2206 }
2207
2208 return NULL; /* Control never gets here */
2209 }
2210
2211
2212
2213 /*************************************************
2214 * Read number from string *
2215 *************************************************/
2216
2217 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
2218 around with conditional compilation, just do the job by hand. It is only used
2219 for unpicking arguments, so just keep it simple.
2220
2221 Arguments:
2222 str string to be converted
2223 endptr where to put the end pointer
2224
2225 Returns: the unsigned long
2226 */
2227
2228 static int
2229 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
2230 {
2231 int result = 0;
2232 while(*str != 0 && isspace(*str)) str++;
2233 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
2234 *endptr = str;
2235 return(result);
2236 }
2237
2238
2239
2240 /*************************************************
2241 * Print one character *
2242 *************************************************/
2243
2244 /* Print a single character either literally, or as a hex escape. */
2245
2246 static int pchar(pcre_uint32 c, FILE *f)
2247 {
2248 int n;
2249 if (PRINTOK(c))
2250 {
2251 if (f != NULL) fprintf(f, "%c", c);
2252 return 1;
2253 }
2254
2255 if (c < 0x100)
2256 {
2257 if (use_utf)
2258 {
2259 if (f != NULL) fprintf(f, "\\x{%02x}", c);
2260 return 6;
2261 }
2262 else
2263 {
2264 if (f != NULL) fprintf(f, "\\x%02x", c);
2265 return 4;
2266 }
2267 }
2268
2269 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2270 return n >= 0 ? n : 0;
2271 }
2272
2273
2274
2275 #ifdef SUPPORT_PCRE8
2276 /*************************************************
2277 * Print 8-bit character string *
2278 *************************************************/
2279
2280 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2281 If handed a NULL file, just counts chars without printing. */
2282
2283 static int pchars(pcre_uint8 *p, int length, FILE *f)
2284 {
2285 pcre_uint32 c = 0;
2286 int yield = 0;
2287
2288 if (length < 0)
2289 length = strlen((char *)p);
2290
2291 while (length-- > 0)
2292 {
2293 #if !defined NOUTF
2294 if (use_utf)
2295 {
2296 int rc = utf82ord(p, &c);
2297 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2298 {
2299 length -= rc - 1;
2300 p += rc;
2301 yield += pchar(c, f);
2302 continue;
2303 }
2304 }
2305 #endif
2306 c = *p++;
2307 yield += pchar(c, f);
2308 }
2309
2310 return yield;
2311 }
2312 #endif
2313
2314
2315
2316 #ifdef SUPPORT_PCRE16
2317 /*************************************************
2318 * Find length of 0-terminated 16-bit string *
2319 *************************************************/
2320
2321 static int strlen16(PCRE_SPTR16 p)
2322 {
2323 int len = 0;
2324 while (*p++ != 0) len++;
2325 return len;
2326 }
2327 #endif /* SUPPORT_PCRE16 */
2328
2329
2330
2331 #ifdef SUPPORT_PCRE32
2332 /*************************************************
2333 * Find length of 0-terminated 32-bit string *
2334 *************************************************/
2335
2336 static int strlen32(PCRE_SPTR32 p)
2337 {
2338 int len = 0;
2339 while (*p++ != 0) len++;
2340 return len;
2341 }
2342 #endif /* SUPPORT_PCRE32 */
2343
2344
2345
2346 #ifdef SUPPORT_PCRE16
2347 /*************************************************
2348 * Print 16-bit character string *
2349 *************************************************/
2350
2351 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2352 If handed a NULL file, just counts chars without printing. */
2353
2354 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2355 {
2356 int yield = 0;
2357
2358 if (length < 0)
2359 length = strlen16(p);
2360
2361 while (length-- > 0)
2362 {
2363 pcre_uint32 c = *p++ & 0xffff;
2364 #if !defined NOUTF
2365 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2366 {
2367 int d = *p & 0xffff;
2368 if (d >= 0xDC00 && d < 0xDFFF)
2369 {
2370 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2371 length--;
2372 p++;
2373 }
2374 }
2375 #endif
2376 yield += pchar(c, f);
2377 }
2378
2379 return yield;
2380 }
2381 #endif /* SUPPORT_PCRE16 */
2382
2383
2384
2385 #ifdef SUPPORT_PCRE32
2386 /*************************************************
2387 * Print 32-bit character string *
2388 *************************************************/
2389
2390 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2391 If handed a NULL file, just counts chars without printing. */
2392
2393 #define UTF32_MASK (0x1fffffu)
2394
2395 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2396 {
2397 int yield = 0;
2398
2399 if (length < 0)
2400 length = strlen32(p);
2401
2402 while (length-- > 0)
2403 {
2404 pcre_uint32 c = *p++;
2405 if (utf) c &= UTF32_MASK;
2406 yield += pchar(c, f);
2407 }
2408
2409 return yield;
2410 }
2411 #endif /* SUPPORT_PCRE32 */
2412
2413
2414
2415 #ifdef SUPPORT_PCRE8
2416 /*************************************************
2417 * Read a capture name (8-bit) and check it *
2418 *************************************************/
2419
2420 static pcre_uint8 *
2421 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2422 {
2423 pcre_uint8 *npp = *pp;
2424 while (isalnum(*p)) *npp++ = *p++;
2425 *npp++ = 0;
2426 *npp = 0;
2427 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2428 {
2429 fprintf(outfile, "no parentheses with name \"");
2430 PCHARSV(*pp, 0, -1, outfile);
2431 fprintf(outfile, "\"\n");
2432 }
2433
2434 *pp = npp;
2435 return p;
2436 }
2437 #endif /* SUPPORT_PCRE8 */
2438
2439
2440
2441 #ifdef SUPPORT_PCRE16
2442 /*************************************************
2443 * Read a capture name (16-bit) and check it *
2444 *************************************************/
2445
2446 /* Note that the text being read is 8-bit. */
2447
2448 static pcre_uint8 *
2449 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2450 {
2451 pcre_uint16 *npp = *pp;
2452 while (isalnum(*p)) *npp++ = *p++;
2453 *npp++ = 0;
2454 *npp = 0;
2455 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2456 {
2457 fprintf(outfile, "no parentheses with name \"");
2458 PCHARSV(*pp, 0, -1, outfile);
2459 fprintf(outfile, "\"\n");
2460 }
2461 *pp = npp;
2462 return p;
2463 }
2464 #endif /* SUPPORT_PCRE16 */
2465
2466
2467
2468 #ifdef SUPPORT_PCRE32
2469 /*************************************************
2470 * Read a capture name (32-bit) and check it *
2471 *************************************************/
2472
2473 /* Note that the text being read is 8-bit. */
2474
2475 static pcre_uint8 *
2476 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2477 {
2478 pcre_uint32 *npp = *pp;
2479 while (isalnum(*p)) *npp++ = *p++;
2480 *npp++ = 0;
2481 *npp = 0;
2482 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2483 {
2484 fprintf(outfile, "no parentheses with name \"");
2485 PCHARSV(*pp, 0, -1, outfile);
2486 fprintf(outfile, "\"\n");
2487 }
2488 *pp = npp;
2489 return p;
2490 }
2491 #endif /* SUPPORT_PCRE32 */
2492
2493
2494
2495 /*************************************************
2496 * Callout function *
2497 *************************************************/
2498
2499 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2500 the match. Yield zero unless more callouts than the fail count, or the callout
2501 data is not zero. */
2502
2503 static int callout(pcre_callout_block *cb)
2504 {
2505 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2506 int i, pre_start, post_start, subject_length;
2507
2508 if (callout_extra)
2509 {
2510 fprintf(f, "Callout %d: last capture = %d\n",
2511 cb->callout_number, cb->capture_last);
2512
2513 for (i = 0; i < cb->capture_top * 2; i += 2)
2514 {
2515 if (cb->offset_vector[i] < 0)
2516 fprintf(f, "%2d: <unset>\n", i/2);
2517 else
2518 {
2519 fprintf(f, "%2d: ", i/2);
2520 PCHARSV(cb->subject, cb->offset_vector[i],
2521 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2522 fprintf(f, "\n");
2523 }
2524 }
2525 }
2526
2527 /* Re-print the subject in canonical form, the first time or if giving full
2528 datails. On subsequent calls in the same match, we use pchars just to find the
2529 printed lengths of the substrings. */
2530
2531 if (f != NULL) fprintf(f, "--->");
2532
2533 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2534 PCHARS(post_start, cb->subject, cb->start_match,
2535 cb->current_position - cb->start_match, f);
2536
2537 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2538
2539 PCHARSV(cb->subject, cb->current_position,
2540 cb->subject_length - cb->current_position, f);
2541
2542 if (f != NULL) fprintf(f, "\n");
2543
2544 /* Always print appropriate indicators, with callout number if not already
2545 shown. For automatic callouts, show the pattern offset. */
2546
2547 if (cb->callout_number == 255)
2548 {
2549 fprintf(outfile, "%+3d ", cb->pattern_position);
2550 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2551 }
2552 else
2553 {
2554 if (callout_extra) fprintf(outfile, " ");
2555 else fprintf(outfile, "%3d ", cb->callout_number);
2556 }
2557
2558 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2559 fprintf(outfile, "^");
2560
2561 if (post_start > 0)
2562 {
2563 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2564 fprintf(outfile, "^");
2565 }
2566
2567 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2568 fprintf(outfile, " ");
2569
2570 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2571 pbuffer + cb->pattern_position);
2572
2573 fprintf(outfile, "\n");
2574 first_callout = 0;
2575
2576 if (cb->mark != last_callout_mark)
2577 {
2578 if (cb->mark == NULL)
2579 fprintf(outfile, "Latest Mark: <unset>\n");
2580 else
2581 {
2582 fprintf(outfile, "Latest Mark: ");
2583 PCHARSV(cb->mark, 0, -1, outfile);
2584 putc('\n', outfile);
2585 }
2586 last_callout_mark = cb->mark;
2587 }
2588
2589 if (cb->callout_data != NULL)
2590 {
2591 int callout_data = *((int *)(cb->callout_data));
2592 if (callout_data != 0)
2593 {
2594 fprintf(outfile, "Callout data = %d\n", callout_data);
2595 return callout_data;
2596 }
2597 }
2598
2599 return (cb->callout_number != callout_fail_id)? 0 :
2600 (++callout_count >= callout_fail_count)? 1 : 0;
2601 }
2602
2603
2604 /*************************************************
2605 * Local malloc functions *
2606 *************************************************/
2607
2608 /* Alternative malloc function, to test functionality and save the size of a
2609 compiled re, which is the first store request that pcre_compile() makes. The
2610 show_malloc variable is set only during matching. */
2611
2612 static void *new_malloc(size_t size)
2613 {
2614 void *block = malloc(size);
2615 gotten_store = size;
2616 if (first_gotten_store == 0) first_gotten_store = size;
2617 if (show_malloc)
2618 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2619 return block;
2620 }
2621
2622 static void new_free(void *block)
2623 {
2624 if (show_malloc)
2625 fprintf(outfile, "free %p\n", block);
2626 free(block);
2627 }
2628
2629 /* For recursion malloc/free, to test stacking calls */
2630
2631 static void *stack_malloc(size_t size)
2632 {
2633 void *block = malloc(size);
2634 if (show_malloc)
2635 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2636 return block;
2637 }
2638
2639 static void stack_free(void *block)
2640 {
2641 if (show_malloc)
2642 fprintf(outfile, "stack_free %p\n", block);
2643 free(block);
2644 }
2645
2646
2647 /*************************************************
2648 * Call pcre_fullinfo() *
2649 *************************************************/
2650
2651 /* Get one piece of information from the pcre_fullinfo() function. When only
2652 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2653 value, but the code is defensive.
2654
2655 Arguments:
2656 re compiled regex
2657 study study data
2658 option PCRE_INFO_xxx option
2659 ptr where to put the data
2660
2661 Returns: 0 when OK, < 0 on error
2662 */
2663
2664 static int
2665 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2666 {
2667 int rc;
2668
2669 if (pcre_mode == PCRE32_MODE)
2670 #ifdef SUPPORT_PCRE32
2671 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2672 #else
2673 rc = PCRE_ERROR_BADMODE;
2674 #endif
2675 else if (pcre_mode == PCRE16_MODE)
2676 #ifdef SUPPORT_PCRE16
2677 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2678 #else
2679 rc = PCRE_ERROR_BADMODE;
2680 #endif
2681 else
2682 #ifdef SUPPORT_PCRE8
2683 rc = pcre_fullinfo(re, study, option, ptr);
2684 #else
2685 rc = PCRE_ERROR_BADMODE;
2686 #endif
2687
2688 if (rc < 0)
2689 {
2690 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2691 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2692 if (rc == PCRE_ERROR_BADMODE)
2693 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2694 "%d-bit mode\n", 8 * CHAR_SIZE,
2695 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2696 }
2697
2698 return rc;
2699 }
2700
2701
2702
2703 /*************************************************
2704 * Swap byte functions *
2705 *************************************************/
2706
2707 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2708 value, respectively.
2709
2710 Arguments:
2711 value any number
2712
2713 Returns: the byte swapped value
2714 */
2715
2716 static pcre_uint32
2717 swap_uint32(pcre_uint32 value)
2718 {
2719 return ((value & 0x000000ff) << 24) |
2720 ((value & 0x0000ff00) << 8) |
2721 ((value & 0x00ff0000) >> 8) |
2722 (value >> 24);
2723 }
2724
2725 static pcre_uint16
2726 swap_uint16(pcre_uint16 value)
2727 {
2728 return (value >> 8) | (value << 8);
2729 }
2730
2731
2732
2733 /*************************************************
2734 * Flip bytes in a compiled pattern *
2735 *************************************************/
2736
2737 /* This function is called if the 'F' option was present on a pattern that is
2738 to be written to a file. We flip the bytes of all the integer fields in the
2739 regex data block and the study block. In 16-bit mode this also flips relevant
2740 bytes in the pattern itself. This is to make it possible to test PCRE's
2741 ability to reload byte-flipped patterns, e.g. those compiled on a different
2742 architecture. */
2743
2744 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2745 static void
2746 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2747 {
2748 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2749 #ifdef SUPPORT_PCRE16
2750 int op;
2751 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2752 int length = re->name_count * re->name_entry_size;
2753 #ifdef SUPPORT_UTF
2754 BOOL utf = (re->options & PCRE_UTF16) != 0;
2755 BOOL utf16_char = FALSE;
2756 #endif /* SUPPORT_UTF */
2757 #endif /* SUPPORT_PCRE16 */
2758
2759 /* Always flip the bytes in the main data block and study blocks. */
2760
2761 re->magic_number = REVERSED_MAGIC_NUMBER;
2762 re->size = swap_uint32(re->size);
2763 re->options = swap_uint32(re->options);
2764 re->flags = swap_uint16(re->flags);
2765 re->top_bracket = swap_uint16(re->top_bracket);
2766 re->top_backref = swap_uint16(re->top_backref);
2767 re->first_char = swap_uint16(re->first_char);
2768 re->req_char = swap_uint16(re->req_char);
2769 re->name_table_offset = swap_uint16(re->name_table_offset);
2770 re->name_entry_size = swap_uint16(re->name_entry_size);
2771 re->name_count = swap_uint16(re->name_count);
2772
2773 if (extra != NULL)
2774 {
2775 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2776 rsd->size = swap_uint32(rsd->size);
2777 rsd->flags = swap_uint32(rsd->flags);
2778 rsd->minlength = swap_uint32(rsd->minlength);
2779 }
2780
2781 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2782 in the name table, if present, and then in the pattern itself. */
2783
2784 #ifdef SUPPORT_PCRE16
2785 if (pcre_mode != PCRE16_MODE) return;
2786
2787 while(TRUE)
2788 {
2789 /* Swap previous characters. */
2790 while (length-- > 0)
2791 {
2792 *ptr = swap_uint16(*ptr);
2793 ptr++;
2794 }
2795 #ifdef SUPPORT_UTF
2796 if (utf16_char)
2797 {
2798 if ((ptr[-1] & 0xfc00) == 0xd800)
2799 {
2800 /* We know that there is only one extra character in UTF-16. */
2801 *ptr = swap_uint16(*ptr);
2802 ptr++;
2803 }
2804 }
2805 utf16_char = FALSE;
2806 #endif /* SUPPORT_UTF */
2807
2808 /* Get next opcode. */
2809
2810 length = 0;
2811 op = *ptr;
2812 *ptr++ = swap_uint16(op);
2813
2814 switch (op)
2815 {
2816 case OP_END:
2817 return;
2818
2819 #ifdef SUPPORT_UTF
2820 case OP_CHAR:
2821 case OP_CHARI:
2822 case OP_NOT:
2823 case OP_NOTI:
2824 case OP_STAR:
2825 case OP_MINSTAR:
2826 case OP_PLUS:
2827 case OP_MINPLUS:
2828 case OP_QUERY:
2829 case OP_MINQUERY:
2830 case OP_UPTO:
2831 case OP_MINUPTO:
2832 case OP_EXACT:
2833 case OP_POSSTAR:
2834 case OP_POSPLUS:
2835 case OP_POSQUERY:
2836 case OP_POSUPTO:
2837 case OP_STARI:
2838 case OP_MINSTARI:
2839 case OP_PLUSI:
2840 case OP_MINPLUSI:
2841 case OP_QUERYI:
2842 case OP_MINQUERYI:
2843 case OP_UPTOI:
2844 case OP_MINUPTOI:
2845 case OP_EXACTI:
2846 case OP_POSSTARI:
2847 case OP_POSPLUSI:
2848 case OP_POSQUERYI:
2849 case OP_POSUPTOI:
2850 case OP_NOTSTAR:
2851 case OP_NOTMINSTAR:
2852 case OP_NOTPLUS:
2853 case OP_NOTMINPLUS:
2854 case OP_NOTQUERY:
2855 case OP_NOTMINQUERY:
2856 case OP_NOTUPTO:
2857 case OP_NOTMINUPTO:
2858 case OP_NOTEXACT:
2859 case OP_NOTPOSSTAR:
2860 case OP_NOTPOSPLUS:
2861 case OP_NOTPOSQUERY:
2862 case OP_NOTPOSUPTO:
2863 case OP_NOTSTARI:
2864 case OP_NOTMINSTARI:
2865 case OP_NOTPLUSI:
2866 case OP_NOTMINPLUSI:
2867 case OP_NOTQUERYI:
2868 case OP_NOTMINQUERYI:
2869 case OP_NOTUPTOI:
2870 case OP_NOTMINUPTOI:
2871 case OP_NOTEXACTI:
2872 case OP_NOTPOSSTARI:
2873 case OP_NOTPOSPLUSI:
2874 case OP_NOTPOSQUERYI:
2875 case OP_NOTPOSUPTOI:
2876 if (utf) utf16_char = TRUE;
2877 #endif
2878 /* Fall through. */
2879
2880 default:
2881 length = OP_lengths16[op] - 1;
2882 break;
2883
2884 case OP_CLASS:
2885 case OP_NCLASS:
2886 /* Skip the character bit map. */
2887 ptr += 32/sizeof(pcre_uint16);
2888 length = 0;
2889 break;
2890
2891 case OP_XCLASS:
2892 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2893 if (LINK_SIZE > 1)
2894 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2895 - (1 + LINK_SIZE + 1));
2896 else
2897 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2898
2899 /* Reverse the size of the XCLASS instance. */
2900 *ptr = swap_uint16(*ptr);
2901 ptr++;
2902 if (LINK_SIZE > 1)
2903 {
2904 *ptr = swap_uint16(*ptr);
2905 ptr++;
2906 }
2907
2908 op = *ptr;
2909 *ptr = swap_uint16(op);
2910 ptr++;
2911 if ((op & XCL_MAP) != 0)
2912 {
2913 /* Skip the character bit map. */
2914 ptr += 32/sizeof(pcre_uint16);
2915 length -= 32/sizeof(pcre_uint16);
2916 }
2917 break;
2918 }
2919 }
2920 /* Control should never reach here in 16 bit mode. */
2921 #endif /* SUPPORT_PCRE16 */
2922 }
2923 #endif /* SUPPORT_PCRE[8|16] */
2924
2925
2926
2927 #if defined SUPPORT_PCRE32
2928 static void
2929 regexflip_32(pcre *ere, pcre_extra *extra)
2930 {
2931 real_pcre32 *re = (real_pcre32 *)ere;
2932 int op;
2933 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2934 int length = re->name_count * re->name_entry_size;
2935 #ifdef SUPPORT_UTF
2936 BOOL utf = (re->options & PCRE_UTF32) != 0;
2937 #endif /* SUPPORT_UTF */
2938
2939 /* Always flip the bytes in the main data block and study blocks. */
2940
2941 re->magic_number = REVERSED_MAGIC_NUMBER;
2942 re->size = swap_uint32(re->size);
2943 re->options = swap_uint32(re->options);
2944 re->flags = swap_uint16(re->flags);
2945 re->top_bracket = swap_uint16(re->top_bracket);
2946 re->top_backref = swap_uint16(re->top_backref);
2947 re->first_char = swap_uint32(re->first_char);
2948 re->req_char = swap_uint32(re->req_char);
2949 re->name_table_offset = swap_uint16(re->name_table_offset);
2950 re->name_entry_size = swap_uint16(re->name_entry_size);
2951 re->name_count = swap_uint16(re->name_count);
2952
2953 if (extra != NULL)
2954 {
2955 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2956 rsd->size = swap_uint32(rsd->size);
2957 rsd->flags = swap_uint32(rsd->flags);
2958 rsd->minlength = swap_uint32(rsd->minlength);
2959 }
2960
2961 /* In 32-bit mode we must swap bytes
2962 in the name table, if present, and then in the pattern itself. */
2963
2964 while(TRUE)
2965 {
2966 /* Swap previous characters. */
2967 while (length-- > 0)
2968 {
2969 *ptr = swap_uint32(*ptr);
2970 ptr++;
2971 }
2972
2973 /* Get next opcode. */
2974
2975 length = 0;
2976 op = *ptr;
2977 *ptr++ = swap_uint32(op);
2978
2979 switch (op)
2980 {
2981 case OP_END:
2982 return;
2983
2984 default:
2985 length = OP_lengths32[op] - 1;
2986 break;
2987
2988 case OP_CLASS:
2989 case OP_NCLASS:
2990 /* Skip the character bit map. */
2991 ptr += 32/sizeof(pcre_uint32);
2992 length = 0;
2993 break;
2994
2995 case OP_XCLASS:
2996 /* LINK_SIZE can only be 1 in 32-bit mode. */
2997 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2998
2999 /* Reverse the size of the XCLASS instance. */
3000 *ptr = swap_uint32(*ptr);
3001 ptr++;
3002
3003 op = *ptr;
3004 *ptr = swap_uint32(op);
3005 ptr++;
3006 if ((op & XCL_MAP) != 0)
3007 {
3008 /* Skip the character bit map. */
3009 ptr += 32/sizeof(pcre_uint32);
3010 length -= 32/sizeof(pcre_uint32);
3011 }
3012 break;
3013 }
3014 }
3015 /* Control should never reach here in 32 bit mode. */
3016 }
3017
3018 #endif /* SUPPORT_PCRE32 */
3019
3020
3021
3022 static void
3023 regexflip(pcre *ere, pcre_extra *extra)
3024 {
3025 #if defined SUPPORT_PCRE32
3026 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
3027 regexflip_32(ere, extra);
3028 #endif
3029 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
3030 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
3031 regexflip8_or_16(ere, extra);
3032 #endif
3033 }
3034
3035
3036
3037 /*************************************************
3038 * Check match or recursion limit *
3039 *************************************************/
3040
3041 static int
3042 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
3043 int start_offset, int options, int *use_offsets, int use_size_offsets,
3044 int flag, unsigned long int *limit, int errnumber, const char *msg)
3045 {
3046 int count;
3047 int min = 0;
3048 int mid = 64;
3049 int max = -1;
3050
3051 extra->flags |= flag;
3052
3053 for (;;)
3054 {
3055 *limit = mid;
3056
3057 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
3058 use_offsets, use_size_offsets);
3059
3060 if (count == errnumber)
3061 {
3062 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
3063 min = mid;
3064 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
3065 }
3066
3067 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
3068 count == PCRE_ERROR_PARTIAL)
3069 {
3070 if (mid == min + 1)
3071 {
3072 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
3073 break;
3074 }
3075 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
3076 max = mid;
3077 mid = (min + mid)/2;
3078 }
3079 else break; /* Some other error */
3080 }
3081
3082 extra->flags &= ~flag;
3083 return count;
3084 }
3085
3086
3087
3088 /*************************************************
3089 * Case-independent strncmp() function *
3090 *************************************************/
3091
3092 /*
3093 Arguments:
3094 s first string
3095 t second string
3096 n number of characters to compare
3097
3098 Returns: < 0, = 0, or > 0, according to the comparison
3099 */
3100
3101 static int
3102 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
3103 {
3104 while (n--)
3105 {
3106 int c = tolower(*s++) - tolower(*t++);
3107 if (c) return c;
3108 }
3109 return 0;
3110 }
3111
3112
3113
3114 /*************************************************
3115 * Check newline indicator *
3116 *************************************************/
3117
3118 /* This is used both at compile and run-time to check for <xxx> escapes. Print
3119 a message and return 0 if there is no match.
3120
3121 Arguments:
3122 p points after the leading '<'
3123 f file for error message
3124
3125 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
3126 */
3127
3128 static int
3129 check_newline(pcre_uint8 *p, FILE *f)
3130 {
3131 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
3132 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
3133 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
3134 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
3135 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
3136 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
3137 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
3138 fprintf(f, "Unknown newline type at: <%s\n", p);
3139 return 0;
3140 }
3141
3142
3143
3144 /*************************************************
3145 * Usage function *
3146 *************************************************/
3147
3148 static void
3149 usage(void)
3150 {
3151 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
3152 printf("Input and output default to stdin and stdout.\n");
3153 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3154 printf("If input is a terminal, readline() is used to read from it.\n");
3155 #else
3156 printf("This version of pcretest is not linked with readline().\n");
3157 #endif
3158 printf("\nOptions:\n");
3159 #ifdef SUPPORT_PCRE16
3160 printf(" -16 use the 16-bit library\n");
3161 #endif
3162 #ifdef SUPPORT_PCRE32
3163 printf(" -32 use the 32-bit library\n");
3164 #endif
3165 printf(" -b show compiled code\n");
3166 printf(" -C show PCRE compile-time options and exit\n");
3167 printf(" -C arg show a specific compile-time option\n");
3168 printf(" and exit with its value. The arg can be:\n");
3169 printf(" linksize internal link size [2, 3, 4]\n");
3170 printf(" pcre8 8 bit library support enabled [0, 1]\n");
3171 printf(" pcre16 16 bit library support enabled [0, 1]\n");
3172 printf(" pcre32 32 bit library support enabled [0, 1]\n");
3173 printf(" utf Unicode Transformation Format supported [0, 1]\n");
3174 printf(" ucp Unicode Properties supported [0, 1]\n");
3175 printf(" jit Just-in-time compiler supported [0, 1]\n");
3176 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
3177 printf(" -d debug: show compiled code and information (-b and -i)\n");
3178 #if !defined NODFA
3179 printf(" -dfa force DFA matching for all subjects\n");
3180 #endif
3181 printf(" -help show usage information\n");
3182 printf(" -i show information about compiled patterns\n"
3183 " -M find MATCH_LIMIT minimum for each subject\n"
3184 " -m output memory used information\n"
3185 " -o <n> set size of offsets vector to <n>\n");
3186 #if !defined NOPOSIX
3187 printf(" -p use POSIX interface\n");
3188 #endif
3189 printf(" -q quiet: do not output PCRE version number at start\n");
3190 printf(" -S <n> set stack size to <n> megabytes\n");
3191 printf(" -s force each pattern to be studied at basic level\n"
3192 " -s+ force each pattern to be studied, using JIT if available\n"
3193 " -s++ ditto, verifying when JIT was actually used\n"
3194 " -s+n force each pattern to be studied, using JIT if available,\n"
3195 " where 1 <= n <= 7 selects JIT options\n"
3196 " -s++n ditto, verifying when JIT was actually used\n"
3197 " -t time compilation and execution\n");
3198 printf(" -t <n> time compilation and execution, repeating <n> times\n");
3199 printf(" -tm time execution (matching) only\n");
3200 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
3201 }
3202
3203
3204
3205 /*************************************************
3206 * Main Program *
3207 *************************************************/
3208
3209 /* Read lines from named file or stdin and write to named file or stdout; lines
3210 consist of a regular expression, in delimiters and optionally followed by
3211 options, followed by a set of test data, terminated by an empty line. */
3212
3213 int main(int argc, char **argv)
3214 {
3215 FILE *infile = stdin;
3216 const char *version;
3217 int options = 0;
3218 int study_options = 0;
3219 int default_find_match_limit = FALSE;
3220 int op = 1;
3221 int timeit = 0;
3222 int timeitm = 0;
3223 int showinfo = 0;
3224 int showstore = 0;
3225 int force_study = -1;
3226 int force_study_options = 0;
3227 int quiet = 0;
3228 int size_offsets = 45;
3229 int size_offsets_max;
3230 int *offsets = NULL;
3231 int debug = 0;
3232 int done = 0;
3233 int all_use_dfa = 0;
3234 int verify_jit = 0;
3235 int yield = 0;
3236 #ifdef SUPPORT_PCRE32
3237 int mask_utf32 = 0;
3238 #endif
3239 int stack_size;
3240 pcre_uint8 *dbuffer = NULL;
3241 size_t dbuffer_size = 1u << 14;
3242
3243 #if !defined NOPOSIX
3244 int posix = 0;
3245 #endif
3246 #if !defined NODFA
3247 int *dfa_workspace = NULL;
3248 #endif
3249
3250 pcre_jit_stack *jit_stack = NULL;
3251
3252 /* These vectors store, end-to-end, a list of zero-terminated captured
3253 substring names, each list itself being terminated by an empty name. Assume
3254 that 1024 is plenty long enough for the few names we'll be testing. It is
3255 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
3256 for the actual memory, to ensure alignment. */
3257
3258 pcre_uint32 copynames[1024];
3259 pcre_uint32 getnames[1024];
3260
3261 #ifdef SUPPORT_PCRE32
3262 pcre_uint32 *cn32ptr;
3263 pcre_uint32 *gn32ptr;
3264 #endif
3265
3266 #ifdef SUPPORT_PCRE16
3267 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
3268 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
3269 pcre_uint16 *cn16ptr;
3270 pcre_uint16 *gn16ptr;
3271 #endif
3272
3273 #ifdef SUPPORT_PCRE8
3274 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
3275 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
3276 pcre_uint8 *cn8ptr;
3277 pcre_uint8 *gn8ptr;
3278 #endif
3279
3280 /* Get buffers from malloc() so that valgrind will check their misuse when
3281 debugging. They grow automatically when very long lines are read. The 16-
3282 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
3283
3284 buffer = (pcre_uint8 *)malloc(buffer_size);
3285 pbuffer = (pcre_uint8 *)malloc(buffer_size);
3286
3287 /* The outfile variable is static so that new_malloc can use it. */
3288
3289 outfile = stdout;
3290
3291 /* The following _setmode() stuff is some Windows magic that tells its runtime
3292 library to translate CRLF into a single LF character. At least, that's what
3293 I've been told: never having used Windows I take this all on trust. Originally
3294 it set 0x8000, but then I was advised that _O_BINARY was better. */
3295
3296 #if defined(_WIN32) || defined(WIN32)
3297 _setmode( _fileno( stdout ), _O_BINARY );
3298 #endif
3299
3300 /* Get the version number: both pcre_version() and pcre16_version() give the
3301 same answer. We just need to ensure that we call one that is available. */
3302
3303 #if defined SUPPORT_PCRE8
3304 version = pcre_version();
3305 #elif defined SUPPORT_PCRE16
3306 version = pcre16_version();
3307 #elif defined SUPPORT_PCRE32
3308 version = pcre32_version();
3309 #endif
3310
3311 /* Scan options */
3312
3313 while (argc > 1 && argv[op][0] == '-')
3314 {
3315 pcre_uint8 *endptr;
3316 char *arg = argv[op];
3317
3318 if (strcmp(arg, "-m") == 0) showstore = 1;
3319 else if (strcmp(arg, "-s") == 0) force_study = 0;
3320
3321 else if (strncmp(arg, "-s+", 3) == 0)
3322 {
3323 arg += 3;
3324 if (*arg == '+') { arg++; verify_jit = TRUE; }
3325 force_study = 1;
3326 if (*arg == 0)
3327 force_study_options = jit_study_bits[6];
3328 else if (*arg >= '1' && *arg <= '7')
3329 force_study_options = jit_study_bits[*arg - '1'];
3330 else goto BAD_ARG;
3331 }
3332 else if (strcmp(arg, "-8") == 0)
3333 {
3334 #ifdef SUPPORT_PCRE8
3335 pcre_mode = PCRE8_MODE;
3336 #else
3337 printf("** This version of PCRE was built without 8-bit support\n");
3338 exit(1);
3339 #endif
3340 }
3341 else if (strcmp(arg, "-16") == 0)
3342 {
3343 #ifdef SUPPORT_PCRE16
3344 pcre_mode = PCRE16_MODE;
3345 #else
3346 printf("** This version of PCRE was built without 16-bit support\n");
3347 exit(1);
3348 #endif
3349 }
3350 else if (strcmp(arg, "-32") == 0 || strcmp(arg, "-32+") == 0)
3351 {
3352 #ifdef SUPPORT_PCRE32
3353 pcre_mode = PCRE32_MODE;
3354 mask_utf32 = (strcmp(arg, "-32+") == 0);
3355 #else
3356 printf("** This version of PCRE was built without 32-bit support\n");
3357 exit(1);
3358 #endif
3359 }
3360 else if (strcmp(arg, "-q") == 0) quiet = 1;
3361 else if (strcmp(arg, "-b") == 0) debug = 1;
3362 else if (strcmp(arg, "-i") == 0) showinfo = 1;
3363 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3364 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3365 #if !defined NODFA
3366 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3367 #endif
3368 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3369 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3370 *endptr == 0))
3371 {
3372 op++;
3373 argc--;
3374 }
3375 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
3376 {
3377 int both = arg[2] == 0;
3378 int temp;
3379 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3380 *endptr == 0))
3381 {
3382 timeitm = temp;
3383 op++;
3384 argc--;
3385 }
3386 else timeitm = LOOPREPEAT;
3387 if (both) timeit = timeitm;
3388 }
3389 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3390 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3391 *endptr == 0))
3392 {
3393 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
3394 printf("PCRE: -S not supported on this OS\n");
3395 exit(1);
3396 #else
3397 int rc;
3398 struct rlimit rlim;
3399 getrlimit(RLIMIT_STACK, &rlim);
3400 rlim.rlim_cur = stack_size * 1024 * 1024;
3401 rc = setrlimit(RLIMIT_STACK, &rlim);
3402 if (rc != 0)
3403 {
3404 printf("PCRE: setrlimit() failed with error %d\n", rc);
3405 exit(1);
3406 }
3407 op++;
3408 argc--;
3409 #endif
3410 }
3411 #if !defined NOPOSIX
3412 else if (strcmp(arg, "-p") == 0) posix = 1;
3413 #endif
3414 else if (strcmp(arg, "-C") == 0)
3415 {
3416 int rc;
3417 unsigned long int lrc;
3418
3419 if (argc > 2)
3420 {
3421 if (strcmp(argv[op + 1], "linksize") == 0)
3422 {
3423 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3424 printf("%d\n", rc);
3425 yield = rc;
3426 }
3427 else if (strcmp(argv[op + 1], "pcre8") == 0)
3428 {
3429 #ifdef SUPPORT_PCRE8
3430 printf("1\n");
3431 yield = 1;
3432 #else
3433 printf("0\n");
3434 yield = 0;
3435 #endif
3436 }
3437 else if (strcmp(argv[op + 1], "pcre16") == 0)
3438 {
3439 #ifdef SUPPORT_PCRE16
3440 printf("1\n");
3441 yield = 1;
3442 #else
3443 printf("0\n");
3444 yield = 0;
3445 #endif
3446 }
3447 else if (strcmp(argv[op + 1], "pcre32") == 0)
3448 {
3449 #ifdef SUPPORT_PCRE32
3450 printf("1\n");
3451 yield = 1;
3452 #else
3453 printf("0\n");
3454 yield = 0;
3455 #endif
3456 goto EXIT;
3457 }
3458 if (strcmp(argv[op + 1], "utf") == 0)
3459 {
3460 #ifdef SUPPORT_PCRE8
3461 if (pcre_mode == PCRE8_MODE)
3462 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3463 #endif
3464 #ifdef SUPPORT_PCRE16
3465 if (pcre_mode == PCRE16_MODE)
3466 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3467 #endif
3468 #ifdef SUPPORT_PCRE32
3469 if (pcre_mode == PCRE32_MODE)
3470 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3471 #endif
3472 printf("%d\n", rc);
3473 yield = rc;
3474 goto EXIT;
3475 }
3476 else if (strcmp(argv[op + 1], "ucp") == 0)
3477 {
3478 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3479 printf("%d\n", rc);
3480 yield = rc;
3481 }
3482 else if (strcmp(argv[op + 1], "jit") == 0)
3483 {
3484 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3485 printf("%d\n", rc);
3486 yield = rc;
3487 }
3488 else if (strcmp(argv[op + 1], "newline") == 0)
3489 {
3490 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3491 print_newline_config(rc, TRUE);
3492 }
3493 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3494 {
3495 #ifdef EBCDIC
3496 printf("1\n");
3497 yield = 1;
3498 #else
3499 printf("0\n");
3500 #endif
3501 }
3502 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3503 {
3504 #ifdef EBCDIC
3505 printf("0x%02x\n", CHAR_LF);
3506 #else
3507 printf("0\n");
3508 #endif
3509 }
3510 else
3511 {
3512 printf("Unknown -C option: %s\n", argv[op + 1]);
3513 }
3514 goto EXIT;
3515 }
3516
3517 /* No argument for -C: output all configuration information. */
3518
3519 printf("PCRE version %s\n", version);
3520 printf("Compiled with\n");
3521
3522 #ifdef EBCDIC
3523 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3524 #endif
3525
3526 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3527 are set, either both UTFs are supported or both are not supported. */
3528
3529 #ifdef SUPPORT_PCRE8
3530 printf(" 8-bit support\n");
3531 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3532 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3533 #endif
3534 #ifdef SUPPORT_PCRE16
3535 printf(" 16-bit support\n");
3536 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3537 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3538 #endif
3539 #ifdef SUPPORT_PCRE32
3540 printf(" 32-bit support\n");
3541 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3542 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3543 #endif
3544
3545 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3546 printf(" %sUnicode properties support\n", rc? "" : "No ");
3547 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3548 if (rc)
3549 {
3550 const char *arch;
3551 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3552 printf(" Just-in-time compiler support: %s\n", arch);
3553 }
3554 else
3555 printf(" No just-in-time compiler support\n");
3556 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3557 print_newline_config(rc, FALSE);
3558 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3559 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3560 "all Unicode newlines");
3561 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3562 printf(" Internal link size = %d\n", rc);
3563 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3564 printf(" POSIX malloc threshold = %d\n", rc);
3565 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3566 printf(" Default match limit = %ld\n", lrc);
3567 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3568 printf(" Default recursion depth limit = %ld\n", lrc);
3569 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3570 printf(" Match recursion uses %s", rc? "stack" : "heap");
3571 if (showstore)
3572 {
3573 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3574 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3575 }
3576 printf("\n");
3577 goto EXIT;
3578 }
3579 else if (strcmp(arg, "-help") == 0 ||
3580 strcmp(arg, "--help") == 0)
3581 {
3582 usage();
3583 goto EXIT;
3584 }
3585 else
3586 {
3587 BAD_ARG:
3588 printf("** Unknown or malformed option %s\n", arg);
3589 usage();
3590 yield = 1;
3591 goto EXIT;
3592 }
3593 op++;
3594 argc--;
3595 }
3596
3597 /* Get the store for the offsets vector, and remember what it was */
3598
3599 size_offsets_max = size_offsets;
3600 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3601 if (offsets == NULL)
3602 {
3603 printf("** Failed to get %d bytes of memory for offsets vector\n",
3604 (int)(size_offsets_max * sizeof(int)));
3605 yield = 1;
3606 goto EXIT;
3607 }
3608
3609 /* Sort out the input and output files */
3610
3611 if (argc > 1)
3612 {
3613 infile = fopen(argv[op], INPUT_MODE);
3614 if (infile == NULL)
3615 {
3616 printf("** Failed to open %s\n", argv[op]);
3617 yield = 1;
3618 goto EXIT;
3619 }
3620 }
3621
3622 if (argc > 2)
3623 {
3624 outfile = fopen(argv[op+1], OUTPUT_MODE);
3625 if (outfile == NULL)
3626 {
3627 printf("** Failed to open %s\n", argv[op+1]);
3628 yield = 1;
3629 goto EXIT;
3630 }
3631 }
3632
3633 /* Set alternative malloc function */
3634
3635 #ifdef SUPPORT_PCRE8
3636 pcre_malloc = new_malloc;
3637 pcre_free = new_free;
3638 pcre_stack_malloc = stack_malloc;
3639 pcre_stack_free = stack_free;
3640 #endif
3641
3642 #ifdef SUPPORT_PCRE16
3643 pcre16_malloc = new_malloc;
3644 pcre16_free = new_free;
3645 pcre16_stack_malloc = stack_malloc;
3646 pcre16_stack_free = stack_free;
3647 #endif
3648
3649 #ifdef SUPPORT_PCRE32
3650 pcre32_malloc = new_malloc;
3651 pcre32_free = new_free;
3652 pcre32_stack_malloc = stack_malloc;
3653 pcre32_stack_free = stack_free;
3654 #endif
3655
3656 /* Heading line unless quiet, then prompt for first regex if stdin */
3657
3658 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3659
3660 /* Main loop */
3661
3662 while (!done)
3663 {
3664 pcre *re = NULL;
3665 pcre_extra *extra = NULL;
3666
3667 #if !defined NOPOSIX /* There are still compilers that require no indent */
3668 regex_t preg;
3669 int do_posix = 0;
3670 #endif
3671
3672 const char *error;
3673 pcre_uint8 *markptr;
3674 pcre_uint8 *p, *pp, *ppp;
3675 pcre_uint8 *to_file = NULL;
3676 const pcre_uint8 *tables = NULL;
3677 unsigned long int get_options;
3678 unsigned long int true_size, true_study_size = 0;
3679 size_t size, regex_gotten_store;
3680 int do_allcaps = 0;
3681 int do_mark = 0;
3682 int do_study = 0;
3683 int no_force_study = 0;
3684 int do_debug = debug;
3685 int do_G = 0;
3686 int do_g = 0;
3687 int do_showinfo = showinfo;
3688 int do_showrest = 0;
3689 int do_showcaprest = 0;
3690 int do_flip = 0;
3691 int erroroffset, len, delimiter, poffset;
3692
3693 #if !defined NODFA
3694 int dfa_matched = 0;
3695 #endif
3696
3697 use_utf = 0;
3698 debug_lengths = 1;
3699
3700 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3701 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3702 fflush(outfile);
3703
3704 p = buffer;
3705 while (isspace(*p)) p++;
3706 if (*p == 0) continue;
3707
3708 /* See if the pattern is to be loaded pre-compiled from a file. */
3709
3710 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3711 {
3712 pcre_uint32 magic;
3713 pcre_uint8 sbuf[8];
3714 FILE *f;
3715
3716 p++;
3717 if (*p == '!')
3718 {
3719 do_debug = TRUE;
3720 do_showinfo = TRUE;
3721 p++;
3722 }
3723
3724 pp = p + (int)strlen((char *)p);
3725 while (isspace(pp[-1])) pp--;
3726 *pp = 0;
3727
3728 f = fopen((char *)p, "rb");
3729 if (f == NULL)
3730 {
3731 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3732 continue;
3733 }
3734
3735 first_gotten_store = 0;
3736 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3737
3738 true_size =
3739 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3740 true_study_size =
3741 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3742
3743 re = (pcre *)new_malloc(true_size);
3744 if (re == NULL)
3745 {
3746 printf("** Failed to get %d bytes of memory for pcre object\n",
3747 (int)true_size);
3748 yield = 1;
3749 goto EXIT;
3750 }
3751 regex_gotten_store = first_gotten_store;
3752
3753 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3754
3755 magic = REAL_PCRE_MAGIC(re);
3756 if (magic != MAGIC_NUMBER)
3757 {
3758 if (swap_uint32(magic) == MAGIC_NUMBER)
3759 {
3760 do_flip = 1;
3761 }
3762 else
3763 {
3764 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3765 new_free(re);
3766 fclose(f);
3767 continue;
3768 }
3769 }
3770
3771 /* We hide the byte-invert info for little and big endian tests. */
3772 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3773 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3774
3775 /* Now see if there is any following study data. */
3776
3777 if (true_study_size != 0)
3778 {
3779 pcre_study_data *psd;
3780
3781 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3782 extra->flags = PCRE_EXTRA_STUDY_DATA;
3783
3784 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3785 extra->study_data = psd;
3786
3787 if (fread(psd, 1, true_study_size, f) != true_study_size)
3788 {
3789 FAIL_READ:
3790 fprintf(outfile, "Failed to read data from %s\n", p);
3791 if (extra != NULL)
3792 {
3793 PCRE_FREE_STUDY(extra);
3794 }
3795 new_free(re);
3796 fclose(f);
3797 continue;
3798 }
3799 fprintf(outfile, "Study data loaded from %s\n", p);
3800 do_study = 1; /* To get the data output if requested */
3801 }
3802 else fprintf(outfile, "No study data\n");
3803
3804 /* Flip the necessary bytes. */
3805 if (do_flip)
3806 {
3807 int rc;
3808 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3809 if (rc == PCRE_ERROR_BADMODE)
3810 {
3811 /* Simulate the result of the function call below. */
3812 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3813 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3814 PCRE_INFO_OPTIONS);
3815 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3816 "%d-bit mode\n", 8 * CHAR_SIZE,
3817 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3818 new_free(re);
3819 fclose(f);
3820 continue;
3821 }
3822 }
3823
3824 /* Need to know if UTF-8 for printing data strings. */
3825
3826 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3827 {
3828 new_free(re);
3829 fclose(f);
3830 continue;
3831 }
3832 use_utf = (get_options & PCRE_UTF8) != 0;
3833
3834 fclose(f);
3835 goto SHOW_INFO;
3836 }
3837
3838 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3839 the pattern; if it isn't complete, read more. */
3840
3841 delimiter = *p++;
3842
3843 if (isalnum(delimiter) || delimiter == '\\')
3844 {
3845 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3846 goto SKIP_DATA;
3847 }
3848
3849 pp = p;
3850 poffset = (int)(p - buffer);
3851
3852 for(;;)
3853 {
3854 while (*pp != 0)
3855 {
3856 if (*pp == '\\' && pp[1] != 0) pp++;
3857 else if (*pp == delimiter) break;
3858 pp++;
3859 }
3860 if (*pp != 0) break;
3861 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3862 {
3863 fprintf(outfile, "** Unexpected EOF\n");
3864 done = 1;
3865 goto CONTINUE;
3866 }
3867 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3868 }
3869
3870 /* The buffer may have moved while being extended; reset the start of data
3871 pointer to the correct relative point in the buffer. */
3872
3873 p = buffer + poffset;
3874
3875 /* If the first character after the delimiter is backslash, make
3876 the pattern end with backslash. This is purely to provide a way
3877 of testing for the error message when a pattern ends with backslash. */
3878
3879 if (pp[1] == '\\') *pp++ = '\\';
3880
3881 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3882 for callouts. */
3883
3884 *pp++ = 0;
3885 strcpy((char *)pbuffer, (char *)p);
3886
3887 /* Look for options after final delimiter */
3888
3889 options = 0;
3890 study_options = force_study_options;
3891 log_store = showstore; /* default from command line */
3892
3893 while (*pp != 0)
3894 {
3895 switch (*pp++)
3896 {
3897 case 'f': options |= PCRE_FIRSTLINE; break;
3898 case 'g': do_g = 1; break;
3899 case 'i': options |= PCRE_CASELESS; break;
3900 case 'm': options |= PCRE_MULTILINE; break;
3901 case 's': options |= PCRE_DOTALL; break;
3902 case 'x': options |= PCRE_EXTENDED; break;
3903
3904 case '+':
3905 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3906 break;
3907
3908 case '=': do_allcaps = 1; break;
3909 case 'A': options |= PCRE_ANCHORED; break;
3910 case 'B': do_debug = 1; break;
3911 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3912 case 'D': do_debug = do_showinfo = 1; break;
3913 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3914 case 'F': do_flip = 1; break;
3915 case 'G': do_G = 1; break;
3916 case 'I': do_showinfo = 1; break;
3917 case 'J': options |= PCRE_DUPNAMES; break;
3918 case 'K': do_mark = 1; break;
3919 case 'M': log_store = 1; break;
3920 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3921
3922 #if !defined NOPOSIX
3923 case 'P': do_posix = 1; break;
3924 #endif
3925
3926 case 'S':
3927 do_study = 1;
3928 for (;;)
3929 {
3930 switch (*pp++)
3931 {
3932 case 'S':
3933 do_study = 0;
3934 no_force_study = 1;
3935 break;
3936
3937 case '!':
3938 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3939 break;
3940
3941 case '+':
3942 if (*pp == '+')
3943 {
3944 verify_jit = TRUE;
3945 pp++;
3946 }
3947 if (*pp >= '1' && *pp <= '7')
3948 study_options |= jit_study_bits[*pp++ - '1'];
3949 else
3950 study_options |= jit_study_bits[6];
3951 break;
3952
3953 case '-':
3954 study_options &= ~PCRE_STUDY_ALLJIT;
3955 break;
3956
3957 default:
3958 pp--;
3959 goto ENDLOOP;
3960 }
3961 }
3962 ENDLOOP:
3963 break;
3964
3965 case 'U': options |= PCRE_UNGREEDY; break;
3966 case 'W': options |= PCRE_UCP; break;
3967 case 'X': options |= PCRE_EXTRA; break;
3968 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3969 case 'Z': debug_lengths = 0; break;
3970 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3971 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3972
3973 case 'T':
3974 switch (*pp++)
3975 {
3976 case '0': tables = tables0; break;
3977 case '1': tables = tables1; break;
3978
3979 case '\r':
3980 case '\n':
3981 case ' ':
3982 case 0:
3983 fprintf(outfile, "** Missing table number after /T\n");
3984 goto SKIP_DATA;
3985
3986 default:
3987 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3988 goto SKIP_DATA;
3989 }
3990 break;
3991
3992 case 'L':
3993 ppp = pp;
3994 /* The '\r' test here is so that it works on Windows. */
3995 /* The '0' test is just in case this is an unterminated line. */
3996 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3997 *ppp = 0;
3998 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3999 {
4000 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
4001 goto SKIP_DATA;
4002 }
4003 locale_set = 1;
4004 tables = PCRE_MAKETABLES;
4005 pp = ppp;
4006 break;
4007
4008 case '>':
4009 to_file = pp;
4010 while (*pp != 0) pp++;
4011 while (isspace(pp[-1])) pp--;
4012 *pp = 0;
4013 break;
4014
4015 case '<':
4016 {
4017 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
4018 {
4019 options |= PCRE_JAVASCRIPT_COMPAT;
4020 pp += 3;
4021 }
4022 else
4023 {
4024 int x = check_newline(pp, outfile);
4025 if (x == 0) goto SKIP_DATA;
4026 options |= x;
4027 while (*pp++ != '>');
4028 }
4029 }
4030 break;
4031
4032 case '\r': /* So that it works in Windows */
4033 case '\n':
4034 case ' ':
4035 break;
4036
4037 default:
4038 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
4039 goto SKIP_DATA;
4040 }
4041 }
4042
4043 /* Handle compiling via the POSIX interface, which doesn't support the
4044 timing, showing, or debugging options, nor the ability to pass over
4045 local character tables. Neither does it have 16-bit support. */
4046
4047 #if !defined NOPOSIX
4048 if (posix || do_posix)
4049 {
4050 int rc;
4051 int cflags = 0;
4052
4053 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
4054 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
4055 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
4056 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
4057 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
4058 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
4059 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
4060
4061 first_gotten_store = 0;
4062 rc = regcomp(&preg, (char *)p, cflags);
4063
4064 /* Compilation failed; go back for another re, skipping to blank line
4065 if non-interactive. */
4066
4067 if (rc != 0)
4068 {
4069 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4070 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
4071 goto SKIP_DATA;
4072 }
4073 }
4074
4075 /* Handle compiling via the native interface */
4076
4077 else
4078 #endif /* !defined NOPOSIX */
4079
4080 {
4081 /* In 16- or 32-bit mode, convert the input. */
4082
4083 #ifdef SUPPORT_PCRE16
4084 if (pcre_mode == PCRE16_MODE)
4085 {
4086 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
4087 {
4088 case -1:
4089 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
4090 "converted to UTF-16\n");
4091 goto SKIP_DATA;
4092
4093 case -2:
4094 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4095 "cannot be converted to UTF-16\n");
4096 goto SKIP_DATA;
4097
4098 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
4099 fprintf(outfile, "**Failed: character value greater than 0xffff "
4100 "cannot be converted to 16-bit in non-UTF mode\n");
4101 goto SKIP_DATA;
4102
4103 default:
4104 break;
4105 }
4106 p = (pcre_uint8 *)buffer16;
4107 }
4108 #endif
4109
4110 #ifdef SUPPORT_PCRE32
4111 if (pcre_mode == PCRE32_MODE)
4112 {
4113 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
4114 {
4115 case -1:
4116 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
4117 "converted to UTF-32\n");
4118 goto SKIP_DATA;
4119
4120 case -2:
4121 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4122 "cannot be converted to UTF-32\n");
4123 goto SKIP_DATA;
4124
4125 case -3:
4126 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
4127 goto SKIP_DATA;
4128
4129 default:
4130 break;
4131 }
4132 p = (pcre_uint8 *)buffer32;
4133 }
4134 #endif
4135
4136 /* Compile many times when timing */
4137
4138 if (timeit > 0)
4139 {
4140 register int i;
4141 clock_t time_taken;
4142 clock_t start_time = clock();
4143 for (i = 0; i < timeit; i++)
4144 {
4145 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
4146 if (re != NULL) free(re);
4147 }
4148 time_taken = clock() - start_time;
4149 fprintf(outfile, "Compile time %.4f milliseconds\n",
4150 (((double)time_taken * 1000.0) / (double)timeit) /
4151 (double)CLOCKS_PER_SEC);
4152 }
4153
4154 first_gotten_store = 0;
4155 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
4156
4157 /* Compilation failed; go back for another re, skipping to blank line
4158 if non-interactive. */
4159
4160 if (re == NULL)
4161 {
4162 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
4163 SKIP_DATA:
4164 if (infile != stdin)
4165 {
4166 for (;;)
4167 {
4168 if (extend_inputline(infile, buffer, NULL) == NULL)
4169 {
4170 done = 1;
4171 goto CONTINUE;
4172 }
4173 len = (int)strlen((char *)buffer);
4174 while (len > 0 && isspace(buffer[len-1])) len--;
4175 if (len == 0) break;
4176 }
4177 fprintf(outfile, "\n");
4178 }
4179 goto CONTINUE;
4180 }
4181
4182 /* Compilation succeeded. It is now possible to set the UTF-8 option from
4183 within the regex; check for this so that we know how to process the data
4184 lines. */
4185
4186 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
4187 goto SKIP_DATA;
4188 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
4189
4190 /* Extract the size for possible writing before possibly flipping it,
4191 and remember the store that was got. */
4192
4193 true_size = REAL_PCRE_SIZE(re);
4194 regex_gotten_store = first_gotten_store;
4195
4196 /* Output code size information if requested */
4197
4198 if (log_store)
4199 {
4200 int name_count, name_entry_size, real_pcre_size;
4201
4202 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
4203 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
4204 #ifdef SUPPORT_PCRE8
4205 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
4206 real_pcre_size = sizeof(real_pcre);
4207 #endif
4208 #ifdef SUPPORT_PCRE16
4209 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
4210 real_pcre_size = sizeof(real_pcre16);
4211 #endif
4212 #ifdef SUPPORT_PCRE32
4213 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
4214 real_pcre_size = sizeof(real_pcre32);
4215 #endif
4216 fprintf(outfile, "Memory allocation (code space): %d\n",
4217 (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
4218 }
4219
4220 /* If -s or /S was present, study the regex to generate additional info to
4221 help with the matching, unless the pattern has the SS option, which
4222 suppresses the effect of /S (used for a few test patterns where studying is
4223 never sensible). */
4224
4225 if (do_study || (force_study >= 0 && !no_force_study))
4226 {
4227 if (timeit > 0)
4228 {
4229 register int i;
4230 clock_t time_taken;
4231 clock_t start_time = clock();
4232 for (i = 0; i < timeit; i++)
4233 {
4234 PCRE_STUDY(extra, re, study_options, &error);
4235 }
4236 time_taken = clock() - start_time;
4237 if (extra != NULL)
4238 {
4239 PCRE_FREE_STUDY(extra);
4240 }
4241 fprintf(outfile, " Study time %.4f milliseconds\n",
4242 (((double)time_taken * 1000.0) / (double)timeit) /
4243 (double)CLOCKS_PER_SEC);
4244 }
4245 PCRE_STUDY(extra, re, study_options, &error);
4246 if (error != NULL)
4247 fprintf(outfile, "Failed to study: %s\n", error);
4248 else if (extra != NULL)
4249 {
4250 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
4251 if (log_store)
4252 {
4253 size_t jitsize;
4254 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
4255 jitsize != 0)
4256 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
4257 }
4258 }
4259 }
4260
4261 /* If /K was present, we set up for handling MARK data. */
4262
4263 if (do_mark)
4264 {
4265 if (extra == NULL)
4266 {
4267 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4268 extra->flags = 0;
4269 }
4270 extra->mark = &markptr;
4271 extra->flags |= PCRE_EXTRA_MARK;
4272 }
4273
4274 /* Extract and display information from the compiled data if required. */
4275
4276 SHOW_INFO:
4277
4278 if (do_debug)
4279 {
4280 fprintf(outfile, "------------------------------------------------------------------\n");
4281 PCRE_PRINTINT(re, outfile, debug_lengths);
4282 }
4283
4284 /* We already have the options in get_options (see above) */
4285
4286 if (do_showinfo)
4287 {
4288 unsigned long int all_options;
4289 pcre_uint32 first_char, need_char;
4290 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4291 hascrorlf, maxlookbehind;
4292 int nameentrysize, namecount;
4293 const pcre_uint8 *nametable;
4294
4295 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
4296 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4297 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4298 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4299 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4300 new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4301 new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4302 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4303 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4304 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4305 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4306 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4307 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4308 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4309 != 0)
4310 goto SKIP_DATA;
4311
4312 if (size != regex_gotten_store) fprintf(outfile,
4313 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
4314 (int)size, (int)regex_gotten_store);
4315
4316 fprintf(outfile, "Capturing subpattern count = %d\n", count);
4317 if (backrefmax > 0)
4318 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4319
4320 if (namecount > 0)
4321 {
4322 fprintf(outfile, "Named capturing subpatterns:\n");
4323 while (namecount-- > 0)
4324 {
4325 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4326 int length = (int)STRLEN(nametable + imm2_size);
4327 fprintf(outfile, " ");
4328 PCHARSV(nametable, imm2_size, length, outfile);
4329 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4330 #ifdef SUPPORT_PCRE32
4331 if (pcre_mode == PCRE32_MODE)
4332 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4333 #endif
4334 #ifdef SUPPORT_PCRE16
4335 if (pcre_mode == PCRE16_MODE)
4336 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4337 #endif
4338 #ifdef SUPPORT_PCRE8
4339 if (pcre_mode == PCRE8_MODE)
4340 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4341 #endif
4342 nametable += nameentrysize * CHAR_SIZE;
4343 }
4344 }
4345
4346 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4347 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4348
4349 all_options = REAL_PCRE_OPTIONS(re);
4350 if (do_flip) all_options = swap_uint32(all_options);
4351
4352 if (get_options == 0) fprintf(outfile, "No options\n");
4353 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4354 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4355 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4356 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4357 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4358 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4359 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4360 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4361 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4362 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4363 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4364 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4365 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4366 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4367 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4368 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4369 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4370 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
4371
4372 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4373
4374 switch (get_options & PCRE_NEWLINE_BITS)
4375 {
4376 case PCRE_NEWLINE_CR:
4377 fprintf(outfile, "Forced newline sequence: CR\n");
4378 break;
4379
4380 case PCRE_NEWLINE_LF:
4381 fprintf(outfile, "Forced newline sequence: LF\n");
4382 break;
4383
4384 case PCRE_NEWLINE_CRLF:
4385 fprintf(outfile, "Forced newline sequence: CRLF\n");
4386 break;
4387
4388 case PCRE_NEWLINE_ANYCRLF:
4389 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4390 break;
4391
4392 case PCRE_NEWLINE_ANY:
4393 fprintf(outfile, "Forced newline sequence: ANY\n");
4394 break;
4395
4396 default:
4397 break;
4398 }
4399
4400 if (first_char_set == 2)
4401 {
4402 fprintf(outfile, "First char at start or follows newline\n");
4403 }
4404 else if (first_char_set == 1)
4405 {
4406 const char *caseless =
4407 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4408 "" : " (caseless)";
4409
4410 if (PRINTOK(first_char))
4411 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4412 else
4413 {
4414 fprintf(outfile, "First char = ");
4415 pchar(first_char, outfile);
4416 fprintf(outfile, "%s\n", caseless);
4417 }
4418 }
4419 else
4420 {
4421 fprintf(outfile, "No first char\n");
4422 }
4423
4424 if (need_char_set == 0)
4425 {
4426 fprintf(outfile, "No need char\n");
4427 }
4428 else
4429 {
4430 const char *caseless =
4431 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4432 "" : " (caseless)";
4433
4434 if (PRINTOK(need_char))
4435 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4436 else
4437 {
4438 fprintf(outfile, "Need char = ");
4439 pchar(need_char, outfile);
4440 fprintf(outfile, "%s\n", caseless);
4441 }
4442 }
4443
4444 if (maxlookbehind > 0)
4445 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4446
4447 /* Don't output study size; at present it is in any case a fixed
4448 value, but it varies, depending on the computer architecture, and
4449 so messes up the test suite. (And with the /F option, it might be
4450 flipped.) If study was forced by an external -s, don't show this
4451 information unless -i or -d was also present. This means that, except
4452 when auto-callouts are involved, the output from runs with and without
4453 -s should be identical. */
4454
4455 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4456 {
4457 if (extra == NULL)
4458 fprintf(outfile, "Study returned NULL\n");
4459 else
4460 {
4461 pcre_uint8 *start_bits = NULL;
4462 int minlength;
4463
4464 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4465 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4466
4467 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
4468 {
4469 if (start_bits == NULL)
4470 fprintf(outfile, "No set of starting bytes\n");
4471 else
4472 {
4473 int i;
4474 int c = 24;
4475 fprintf(outfile, "Starting byte set: ");
4476 for (i = 0; i < 256; i++)
4477 {
4478 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4479 {
4480 if (c > 75)
4481 {
4482 fprintf(outfile, "\n ");
4483 c = 2;
4484 }
4485 if (PRINTOK(i) && i != ' ')
4486 {
4487 fprintf(outfile, "%c ", i);
4488 c += 2;
4489 }
4490 else
4491 {
4492 fprintf(outfile, "\\x%02x ", i);
4493 c += 5;
4494 }
4495 }
4496 }
4497 fprintf(outfile, "\n");
4498 }
4499 }
4500 }
4501
4502 /* Show this only if the JIT was set by /S, not by -s. */
4503
4504 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4505 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4506 {
4507 int jit;
4508 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4509 {
4510 if (jit)
4511 fprintf(outfile, "JIT study was successful\n");
4512 else
4513 #ifdef SUPPORT_JIT
4514 fprintf(outfile, "JIT study was not successful\n");
4515 #else
4516 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4517 #endif
4518 }
4519 }
4520 }
4521 }
4522
4523 /* If the '>' option was present, we write out the regex to a file, and
4524 that is all. The first 8 bytes of the file are the regex length and then
4525 the study length, in big-endian order. */
4526
4527 if (to_file != NULL)
4528 {
4529 FILE *f = fopen((char *)to_file, "wb");
4530 if (f == NULL)
4531 {
4532 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4533 }
4534 else
4535 {
4536 pcre_uint8 sbuf[8];
4537
4538 if (do_flip) regexflip(re, extra);
4539 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4540 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4541 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4542 sbuf[3] = (pcre_uint8)((true_size) & 255);
4543 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4544 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4545 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4546 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4547
4548 if (fwrite(sbuf, 1, 8, f) < 8 ||
4549 fwrite(re, 1, true_size, f) < true_size)
4550 {
4551 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4552 }
4553 else
4554 {
4555 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4556
4557 /* If there is study data, write it. */
4558
4559 if (extra != NULL)
4560 {
4561 if (fwrite(extra->study_data, 1, true_study_size, f) <
4562 true_study_size)
4563 {
4564 fprintf(outfile, "Write error on %s: %s\n", to_file,
4565 strerror(errno));
4566 }
4567 else fprintf(outfile, "Study data written to %s\n", to_file);
4568 }
4569 }
4570 fclose(f);
4571 }
4572
4573 new_free(re);
4574 if (extra != NULL)
4575 {
4576 PCRE_FREE_STUDY(extra);
4577 }
4578 if (locale_set)
4579 {
4580 new_free((void *)tables);
4581 setlocale(LC_CTYPE, "C");
4582 locale_set = 0;
4583 }
4584 continue; /* With next regex */
4585 }
4586 } /* End of non-POSIX compile */
4587
4588 /* Read data lines and test them */
4589
4590 for (;;)
4591 {
4592 #ifdef SUPPORT_PCRE8
4593 pcre_uint8 *q8;
4594 #endif
4595 #ifdef SUPPORT_PCRE16
4596 pcre_uint16 *q16;
4597 #endif
4598 #ifdef SUPPORT_PCRE32
4599 pcre_uint32 *q32;
4600 #endif
4601 pcre_uint8 *bptr;
4602 int *use_offsets = offsets;
4603 int use_size_offsets = size_offsets;
4604 int callout_data = 0;
4605 int callout_data_set = 0;
4606 int count;
4607 pcre_uint32 c;
4608 int copystrings = 0;
4609 int find_match_limit = default_find_match_limit;
4610 int getstrings = 0;
4611 int getlist = 0;
4612 int gmatched = 0;
4613 int start_offset = 0;
4614 int start_offset_sign = 1;
4615 int g_notempty = 0;
4616 int use_dfa = 0;
4617
4618 *copynames = 0;
4619 *getnames = 0;
4620
4621 #ifdef SUPPORT_PCRE32
4622 cn32ptr = copynames;
4623 gn32ptr = getnames;
4624 #endif
4625 #ifdef SUPPORT_PCRE16
4626 cn16ptr = copynames16;
4627 gn16ptr = getnames16;
4628 #endif
4629 #ifdef SUPPORT_PCRE8
4630 cn8ptr = copynames8;
4631 gn8ptr = getnames8;
4632 #endif
4633
4634 SET_PCRE_CALLOUT(callout);
4635 first_callout = 1;
4636 last_callout_mark = NULL;
4637 callout_extra = 0;
4638 callout_count = 0;
4639 callout_fail_count = 999999;
4640 callout_fail_id = -1;
4641 show_malloc = 0;
4642 options = 0;
4643
4644 if (extra != NULL) extra->flags &=
4645 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4646
4647 len = 0;
4648 for (;;)
4649 {
4650 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4651 {
4652 if (len > 0) /* Reached EOF without hitting a newline */
4653 {
4654 fprintf(outfile, "\n");
4655 break;
4656 }
4657 done = 1;
4658 goto CONTINUE;
4659 }
4660 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4661 len = (int)strlen((char *)buffer);
4662 if (buffer[len-1] == '\n') break;
4663 }
4664
4665 while (len > 0 && isspace(buffer[len-1])) len--;
4666 buffer[len] = 0;
4667 if (len == 0) break;
4668
4669 p = buffer;
4670 while (isspace(*p)) p++;
4671
4672 #ifndef NOUTF
4673 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4674 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4675 if (use_utf)
4676 {
4677 pcre_uint8 *q;
4678 pcre_uint32 cc;
4679 int n = 1;
4680
4681 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4682 if (n <= 0)
4683 {
4684 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4685 goto NEXT_DATA;
4686 }
4687 }
4688 #endif
4689
4690 /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4691 the number of pcre_uchar units that will be needed. */
4692 if (dbuffer == NULL || (size_t)len >= dbuffer_size)
4693 {
4694 dbuffer_size *= 2;
4695 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4696 if (dbuffer == NULL)
4697 {
4698 fprintf(stderr, "pcretest: malloc(%d) failed\n", dbuffer_size);
4699 exit(1);
4700 }
4701 }
4702
4703 #ifdef SUPPORT_PCRE8
4704 q8 = (pcre_uint8 *) dbuffer;
4705 #endif
4706 #ifdef SUPPORT_PCRE16
4707 q16 = (pcre_uint16 *) dbuffer;
4708 #endif
4709 #ifdef SUPPORT_PCRE32
4710 q32 = (pcre_uint32 *) dbuffer;
4711 #endif
4712
4713 while ((c = *p++) != 0)
4714 {
4715 int i = 0;
4716 int n = 0;
4717
4718 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4719 In non-UTF mode, allow the value of the byte to fall through to later,
4720 where values greater than 127 are turned into UTF-8 when running in
4721 16-bit or 32-bit mode. */
4722
4723 if (c != '\\')
4724 {
4725 #ifndef NOUTF
4726 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4727 #endif
4728 }
4729
4730 /* Handle backslash escapes */
4731
4732 else switch ((c = *p++))
4733 {
4734 case 'a': c = 7; break;
4735 case 'b': c = '\b'; break;
4736 case 'e': c = 27; break;
4737 case 'f': c = '\f'; break;
4738 case 'n': c = '\n'; break;
4739 case 'r': c = '\r'; break;
4740 case 't': c = '\t'; break;
4741 case 'v': c = '\v'; break;
4742
4743 case '0': case '1': case '2': case '3':
4744 case '4': case '5': case '6': case '7':
4745 c -= '0';
4746 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4747 c = c * 8 + *p++ - '0';
4748 break;
4749
4750 case 'x':
4751 if (*p == '{')
4752 {
4753 pcre_uint8 *pt = p;
4754 c = 0;
4755
4756 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4757 when isxdigit() is a macro that refers to its argument more than
4758 once. This is banned by the C Standard, but apparently happens in at
4759 least one MacOS environment. */
4760
4761 for (pt++; isxdigit(*pt); pt++)
4762 {
4763 if (++i == 9)
4764 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4765 "using only the first eight.\n");
4766 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4767 }
4768 if (*pt == '}')
4769 {
4770 p = pt + 1;
4771 break;
4772 }
4773 /* Not correct form for \x{...}; fall through */
4774 }
4775
4776 /* \x without {} always defines just one byte in 8-bit mode. This
4777 allows UTF-8 characters to be constructed byte by byte, and also allows
4778 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4779 Otherwise, pass it down to later code so that it can be turned into
4780 UTF-8 when running in 16/32-bit mode. */
4781
4782 c = 0;
4783 while (i++ < 2 && isxdigit(*p))
4784 {
4785 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4786 p++;
4787 }
4788 #if !defined NOUTF && defined SUPPORT_PCRE8
4789 if (use_utf && (pcre_mode == PCRE8_MODE))
4790 {
4791 *q8++ = c;
4792 continue;
4793 }
4794 #endif
4795 break;
4796
4797 case 0: /* \ followed by EOF allows for an empty line */
4798 p--;
4799 continue;
4800
4801 case '>':
4802 if (*p == '-')
4803 {
4804 start_offset_sign = -1;
4805 p++;
4806 }
4807 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4808 start_offset *= start_offset_sign;
4809 continue;
4810
4811 case 'A': /* Option setting */
4812 options |= PCRE_ANCHORED;
4813 continue;
4814
4815 case 'B':
4816 options |= PCRE_NOTBOL;
4817 continue;
4818
4819 case 'C':
4820 if (isdigit(*p)) /* Set copy string */
4821 {
4822 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4823 copystrings |= 1 << n;
4824 }
4825 else if (isalnum(*p))
4826 {
4827 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4828 }
4829 else if (*p == '+')
4830 {
4831 callout_extra = 1;
4832 p++;
4833 }
4834 else if (*p == '-')
4835 {
4836 SET_PCRE_CALLOUT(NULL);
4837 p++;
4838 }
4839 else if (*p == '!')
4840 {
4841 callout_fail_id = 0;
4842 p++;
4843 while(isdigit(*p))
4844 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4845 callout_fail_count = 0;
4846 if (*p == '!')
4847 {
4848 p++;
4849 while(isdigit(*p))
4850 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4851 }
4852 }
4853 else if (*p == '*')
4854 {
4855 int sign = 1;
4856 callout_data = 0;
4857 if (*(++p) == '-') { sign = -1; p++; }
4858 while(isdigit(*p))
4859 callout_data = callout_data * 10 + *p++ - '0';
4860 callout_data *= sign;
4861 callout_data_set = 1;
4862 }
4863 continue;
4864
4865 #if !defined NODFA
4866 case 'D':
4867 #if !defined NOPOSIX
4868 if (posix || do_posix)
4869 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4870 else
4871 #endif
4872 use_dfa = 1;
4873 continue;
4874 #endif
4875
4876 #if !defined NODFA
4877 case 'F':
4878 options |= PCRE_DFA_SHORTEST;
4879 continue;
4880 #endif
4881
4882 case 'G':
4883 if (isdigit(*p))
4884 {
4885 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4886 getstrings |= 1 << n;
4887 }
4888 else if (isalnum(*p))
4889 {
4890 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4891 }
4892 continue;
4893
4894 case 'J':
4895 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4896 if (extra != NULL
4897 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4898 && extra->executable_jit != NULL)
4899 {
4900 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4901 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4902 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4903 }
4904 continue;
4905
4906 case 'L':
4907 getlist = 1;
4908 continue;
4909
4910 case 'M':
4911 find_match_limit = 1;
4912 continue;
4913
4914 case 'N':
4915 if ((options & PCRE_NOTEMPTY) != 0)
4916 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4917 else
4918 options |= PCRE_NOTEMPTY;
4919 continue;
4920
4921 case 'O':
4922 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4923 if (n > size_offsets_max)
4924 {
4925 size_offsets_max = n;
4926 free(offsets);
4927 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4928 if (offsets == NULL)
4929 {
4930 printf("** Failed to get %d bytes of memory for offsets vector\n",
4931 (int)(size_offsets_max * sizeof(int)));
4932 yield = 1;
4933 goto EXIT;
4934 }
4935 }
4936 use_size_offsets = n;
4937 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4938 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4939 continue;
4940
4941 case 'P':
4942 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4943 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4944 continue;
4945
4946 case 'Q':
4947 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4948 if (extra == NULL)
4949 {
4950 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4951 extra->flags = 0;
4952 }
4953 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4954 extra->match_limit_recursion = n;
4955 continue;
4956
4957 case 'q':
4958 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4959 if (extra == NULL)
4960 {
4961 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4962 extra->flags = 0;
4963 }
4964 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4965 extra->match_limit = n;
4966 continue;
4967
4968 #if !defined NODFA
4969 case 'R':
4970 options |= PCRE_DFA_RESTART;
4971 continue;
4972 #endif
4973
4974 case 'S':
4975 show_malloc = 1;
4976 continue;
4977
4978 case 'Y':
4979 options |= PCRE_NO_START_OPTIMIZE;
4980 continue;
4981
4982 case 'Z':
4983 options |= PCRE_NOTEOL;
4984 continue;
4985
4986 case '?':
4987 options |= PCRE_NO_UTF8_CHECK;
4988 continue;
4989
4990 case '<':
4991 {
4992 int x = check_newline(p, outfile);
4993 if (x == 0) goto NEXT_DATA;
4994 options |= x;
4995 while (*p++ != '>');
4996 }
4997 continue;
4998 }
4999
5000 /* We now have a character value in c that may be greater than 255. In
5001 16-bit or 32-bit mode, we always convert characters to UTF-8 so that
5002 values greater than 255 can be passed to non-UTF 16- or 32-bit strings.
5003 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
5004 than 127 in UTF mode must have come from \x{...} or octal constructs
5005 because values from \x.. get this far only in non-UTF mode. */
5006
5007 #ifdef SUPPORT_PCRE8
5008 if (pcre_mode == PCRE8_MODE)
5009 {
5010 #ifndef NOUTF
5011 if (use_utf)
5012 {
5013 q8 += ord2utf8(c, q8);
5014 }
5015 else
5016 #endif
5017 {
5018 if (c > 0xffu)
5019 {
5020 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
5021 "and UTF-8 mode is not enabled.\n", c);
5022 fprintf(outfile, "** Truncation will probably give the wrong "
5023 "result.\n");
5024 }
5025
5026 *q8++ = c;
5027 }
5028 }
5029 #endif
5030 #ifdef SUPPORT_PCRE16
5031 if (pcre_mode == PCRE16_MODE)
5032 {
5033 #ifndef NOUTF
5034 if (use_utf)
5035 {
5036 if (c > 0x10ffffu)
5037 {
5038 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
5039 "cannot be converted to UTF-16\n");
5040 goto NEXT_DATA;
5041 }
5042 else if (c >= 0x10000u)
5043 {
5044 c-= 0x10000u;
5045 *q16++ = 0xD800 | (c >> 10);
5046 *q16++ = 0xDC00 | (c & 0x3ff);
5047 }
5048 else
5049 *q16++ = c;
5050 }
5051 else
5052 #endif
5053 {
5054 if (c > 0xffffu)
5055 {
5056 fprintf(outfile, "** Character value is greater than 0xffff "
5057 "and UTF-16 mode is not enabled.\n");
5058 fprintf(outfile, "** Truncation will probably give the wrong "
5059 "result.\n");
5060 }
5061
5062 *q16++ = c;
5063 }
5064 }
5065 #endif
5066 #ifdef SUPPORT_PCRE32
5067 if (pcre_mode == PCRE32_MODE)
5068 {
5069 *q32++ = c;
5070 }
5071 #endif
5072
5073 }
5074
5075 /* Reached end of subject string */
5076
5077 #ifdef SUPPORT_PCRE8
5078 if (pcre_mode == PCRE8_MODE)
5079 {
5080 *q8 = 0;
5081 len = (int)(q8 - (pcre_uint8 *)dbuffer);
5082 }
5083 #endif
5084 #ifdef SUPPORT_PCRE16
5085 if (pcre_mode == PCRE16_MODE)
5086 {
5087 *q16 = 0;
5088 len = (int)(q16 - (pcre_uint16 *)dbuffer);
5089 }
5090 #endif
5091 #ifdef SUPPORT_PCRE32
5092 if (pcre_mode == PCRE32_MODE)
5093 {
5094 *q32 = 0;
5095 len = (int)(q32 - (pcre_uint32 *)dbuffer);
5096 }
5097 #endif
5098
5099 #if defined SUPPORT_UTF && defined SUPPORT_PCRE32
5100 /* If we're requsted to test UTF-32 masking of high bits, change the data
5101 string to have high bits set, unless the string is invalid UTF-32.
5102 Since the JIT doesn't support this yet, only do it when not JITing. */
5103 if (use_utf && mask_utf32 && (study_options & PCRE_STUDY_ALLJIT) == 0 &&
5104 valid_utf32((pcre_uint32 *)dbuffer, len))
5105 {
5106 for (q32 = (pcre_uint32 *)dbuffer; *q32; q32++)
5107 *q32 |= ~(pcre_uint32)UTF32_MASK;
5108
5109 /* Need to pass NO_UTF32_CHECK so the high bits are allowed */
5110 options |= PCRE_NO_UTF32_CHECK;
5111 }
5112 #endif
5113
5114 /* Move the data to the end of the buffer so that a read over the end of
5115 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
5116 we are using the POSIX interface, we must include the terminating zero. */
5117
5118 bptr = dbuffer;
5119
5120 #if !defined NOPOSIX
5121 if (posix || do_posix)
5122 {
5123 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
5124 bptr += dbuffer_size - len - 1;
5125 }
5126 else
5127 #endif
5128 {
5129 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
5130 }
5131
5132 if ((all_use_dfa || use_dfa) && find_match_limit)
5133 {
5134 printf("**Match limit not relevant for DFA matching: ignored\n");
5135 find_match_limit = 0;
5136 }
5137
5138 /* Handle matching via the POSIX interface, which does not
5139 support timing or playing with the match limit or callout data. */
5140
5141 #if !defined NOPOSIX
5142 if (posix || do_posix)
5143 {
5144 int rc;
5145 int eflags = 0;
5146 regmatch_t *pmatch = NULL;
5147 if (use_size_offsets > 0)
5148 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
5149 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
5150 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
5151 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
5152
5153 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
5154
5155 if (rc != 0)
5156 {
5157 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
5158 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
5159 }
5160 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
5161 {
5162 fprintf(outfile, "Matched with REG_NOSUB\n");
5163 }
5164 else
5165 {
5166 size_t i;
5167 for (i = 0; i < (size_t)use_size_offsets; i++)
5168 {
5169 if (pmatch[i].rm_so >= 0)
5170 {
5171 fprintf(outfile, "%2d: ", (int)i);
5172 PCHARSV(dbuffer, pmatch[i].rm_so,
5173 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
5174 fprintf(outfile, "\n");
5175 if (do_showcaprest || (i == 0 && do_showrest))
5176 {
5177 fprintf(outfile, "%2d+ ", (int)i);
5178 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
5179 outfile);
5180 fprintf(outfile, "\n");
5181 }
5182 }
5183 }
5184 }
5185 free(pmatch);
5186 goto NEXT_DATA;
5187 }
5188
5189 #endif /* !defined NOPOSIX */
5190
5191 /* Handle matching via the native interface - repeats for /g and /G */
5192
5193 /* Ensure that there is a JIT callback if we want to verify that JIT was
5194 actually used. If jit_stack == NULL, no stack has yet been assigned. */
5195
5196 if (verify_jit && jit_stack == NULL && extra != NULL)
5197 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
5198
5199 for (;; gmatched++) /* Loop for /g or /G */
5200 {
5201 markptr = NULL;
5202 jit_was_used = FALSE;
5203
5204 if (timeitm > 0)
5205 {
5206 register int i;
5207 clock_t time_taken;
5208 clock_t start_time = clock();
5209
5210 #if !defined NODFA
5211 if (all_use_dfa || use_dfa)
5212 {
5213 if ((options & PCRE_DFA_RESTART) != 0)
5214 {
5215 fprintf(outfile, "Timing DFA restarts is not supported\n");
5216 break;
5217 }
5218 if (dfa_workspace == NULL)
5219 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5220 for (i = 0; i < timeitm; i++)
5221 {
5222 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5223 (options | g_notempty), use_offsets, use_size_offsets,
5224 dfa_workspace, DFA_WS_DIMENSION);
5225 }
5226 }
5227 else
5228 #endif
5229
5230 for (i = 0; i < timeitm; i++)
5231 {
5232 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5233 (options | g_notempty), use_offsets, use_size_offsets);
5234 }
5235 time_taken = clock() - start_time;
5236 fprintf(outfile, "Execute time %.4f milliseconds\n",
5237 (((double)time_taken * 1000.0) / (double)timeitm) /
5238 (double)CLOCKS_PER_SEC);
5239 }
5240
5241 /* If find_match_limit is set, we want to do repeated matches with
5242 varying limits in order to find the minimum value for the match limit and
5243 for the recursion limit. The match limits are relevant only to the normal
5244 running of pcre_exec(), so disable the JIT optimization. This makes it
5245 possible to run the same set of tests with and without JIT externally
5246 requested. */
5247
5248 if (find_match_limit)
5249 {
5250 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
5251 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5252 extra->flags = 0;
5253
5254 (void)check_match_limit(re, extra, bptr, len, start_offset,
5255 options|g_notempty, use_offsets, use_size_offsets,
5256 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
5257 PCRE_ERROR_MATCHLIMIT, "match()");
5258
5259 count = check_match_limit(re, extra, bptr, len, start_offset,
5260 options|g_notempty, use_offsets, use_size_offsets,
5261 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
5262 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
5263 }
5264
5265 /* If callout_data is set, use the interface with additional data */
5266
5267 else if (callout_data_set)
5268 {
5269 if (extra == NULL)
5270 {
5271 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5272 extra->flags = 0;
5273 }
5274 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
5275 extra->callout_data = &callout_data;
5276 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5277 options | g_notempty, use_offsets, use_size_offsets);
5278 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
5279 }
5280
5281 /* The normal case is just to do the match once, with the default
5282 value of match_limit. */
5283
5284 #if !defined NODFA
5285 else if (all_use_dfa || use_dfa)
5286 {
5287 if (dfa_workspace == NULL)
5288 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5289 if (dfa_matched++ == 0)
5290 dfa_workspace[0] = -1; /* To catch bad restart */
5291 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5292 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5293 DFA_WS_DIMENSION);
5294 if (count == 0)
5295 {
5296 fprintf(outfile, "Matched, but too many subsidiary matches\n");
5297 count = use_size_offsets/2;
5298 }
5299 }
5300 #endif
5301
5302 else
5303 {
5304 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5305 options | g_notempty, use_offsets, use_size_offsets);
5306 if (count == 0)
5307 {
5308 fprintf(outfile, "Matched, but too many substrings\n");
5309 count = use_size_offsets/3;
5310 }
5311 }
5312
5313 /* Matched */
5314
5315 if (count >= 0)
5316 {
5317 int i, maxcount;
5318 void *cnptr, *gnptr;
5319
5320 #if !defined NODFA
5321 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5322 #endif
5323 maxcount = use_size_offsets/3;
5324
5325 /* This is a check against a lunatic return value. */
5326
5327 if (count > maxcount)
5328 {
5329 fprintf(outfile,
5330 "** PCRE error: returned count %d is too big for offset size %d\n",
5331 count, use_size_offsets);
5332 count = use_size_offsets/3;
5333 if (do_g || do_G)
5334 {
5335 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5336 do_g = do_G = FALSE; /* Break g/G loop */
5337 }
5338 }
5339
5340 /* do_allcaps requests showing of all captures in the pattern, to check
5341 unset ones at the end. */
5342
5343 if (do_allcaps)
5344 {
5345 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5346 goto SKIP_DATA;
5347 count++; /* Allow for full match */
5348 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5349 }
5350
5351 /* Output the captured substrings */
5352
5353 for (i = 0; i < count * 2; i += 2)
5354 {
5355 if (use_offsets[i] < 0)
5356 {
5357 if (use_offsets[i] != -1)
5358 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5359 use_offsets[i], i);
5360 if (use_offsets[i+1] != -1)
5361 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5362 use_offsets[i+1], i+1);
5363 fprintf(outfile, "%2d: <unset>\n", i/2);
5364 }
5365 else
5366 {
5367 fprintf(outfile, "%2d: ", i/2);
5368 PCHARSV(bptr, use_offsets[i],
5369 use_offsets[i+1] - use_offsets[i], outfile);
5370 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5371 fprintf(outfile, "\n");
5372 if (do_showcaprest || (i == 0 && do_showrest))
5373 {
5374 fprintf(outfile, "%2d+ ", i/2);
5375 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
5376 outfile);
5377 fprintf(outfile, "\n");
5378 }
5379 }
5380 }
5381
5382 if (markptr != NULL)
5383 {
5384 fprintf(outfile, "MK: ");
5385 PCHARSV(markptr, 0, -1, outfile);
5386 fprintf(outfile, "\n");
5387 }
5388
5389 for (i = 0; i < 32; i++)
5390 {
5391 if ((copystrings & (1 << i)) != 0)
5392 {
5393 int rc;
5394 char copybuffer[256];
5395 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5396 copybuffer, sizeof(copybuffer));
5397 if (rc < 0)
5398 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5399 else
5400 {
5401 fprintf(outfile, "%2dC ", i);
5402 PCHARSV(copybuffer, 0, rc, outfile);
5403 fprintf(outfile, " (%d)\n", rc);
5404 }
5405 }
5406 }
5407
5408 cnptr = copynames;
5409 for (;;)
5410 {
5411 int rc;
5412 char copybuffer[256];
5413
5414 #ifdef SUPPORT_PCRE32
5415 if (pcre_mode == PCRE32_MODE)
5416 {
5417 if (*(pcre_uint32 *)cnptr == 0) break;
5418 }
5419 #endif
5420 #ifdef SUPPORT_PCRE16
5421 if (pcre_mode == PCRE16_MODE)
5422 {
5423 if (*(pcre_uint16 *)cnptr == 0) break;
5424 }
5425 #endif
5426 #ifdef SUPPORT_PCRE8
5427 if (pcre_mode == PCRE8_MODE)
5428 {
5429 if (*(pcre_uint8 *)cnptr == 0) break;
5430 }
5431 #endif
5432
5433 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5434 cnptr, copybuffer, sizeof(copybuffer));
5435
5436 if (rc < 0)
5437 {
5438 fprintf(outfile, "copy substring ");
5439 PCHARSV(cnptr, 0, -1, outfile);
5440 fprintf(outfile, " failed %d\n", rc);
5441 }
5442 else
5443 {
5444 fprintf(outfile, " C ");
5445 PCHARSV(copybuffer, 0, rc, outfile);
5446 fprintf(outfile, " (%d) ", rc);
5447 PCHARSV(cnptr, 0, -1, outfile);
5448 putc('\n', outfile);
5449 }
5450
5451 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
5452 }
5453
5454 for (i = 0; i < 32; i++)
5455 {
5456 if ((getstrings & (1 << i)) != 0)
5457 {
5458 int rc;
5459 const char *substring;
5460 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
5461 if (rc < 0)
5462 fprintf(outfile, "get substring %d failed %d\n", i, rc);
5463 else
5464 {
5465 fprintf(outfile, "%2dG ", i);
5466 PCHARSV(substring, 0, rc, outfile);
5467 fprintf(outfile, " (%d)\n", rc);
5468 PCRE_FREE_SUBSTRING(substring);
5469 }
5470 }
5471 }
5472
5473 gnptr = getnames;
5474 for (;;)
5475 {
5476 int rc;
5477 const char *substring;
5478
5479 #ifdef SUPPORT_PCRE32
5480 if (pcre_mode == PCRE32_MODE)
5481 {
5482 if (*(pcre_uint32 *)gnptr == 0) break;
5483 }
5484 #endif
5485 #ifdef SUPPORT_PCRE16
5486 if (pcre_mode == PCRE16_MODE)
5487 {
5488 if (*(pcre_uint16 *)gnptr == 0) break;
5489 }
5490 #endif
5491 #ifdef SUPPORT_PCRE8
5492 if (pcre_mode == PCRE8_MODE)
5493 {
5494 if (*(pcre_uint8 *)gnptr == 0) break;
5495 }
5496 #endif
5497
5498 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5499 gnptr, &substring);
5500 if (rc < 0)
5501 {
5502 fprintf(outfile, "get substring ");
5503 PCHARSV(gnptr, 0, -1, outfile);
5504 fprintf(outfile, " failed %d\n", rc);
5505 }
5506 else
5507 {
5508 fprintf(outfile, " G ");
5509 PCHARSV(substring, 0, rc, outfile);
5510 fprintf(outfile, " (%d) ", rc);
5511 PCHARSV(gnptr, 0, -1, outfile);
5512 PCRE_FREE_SUBSTRING(substring);
5513 putc('\n', outfile);
5514 }
5515
5516 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5517 }
5518
5519 if (getlist)
5520 {
5521 int rc;
5522 const char **stringlist;
5523 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5524 if (rc < 0)
5525 fprintf(outfile, "get substring list failed %d\n", rc);
5526 else
5527 {
5528 for (i = 0; i < count; i++)
5529 {
5530 fprintf(outfile, "%2dL ", i);
5531 PCHARSV(stringlist[i], 0, -1, outfile);
5532 putc('\n', outfile);
5533 }
5534 if (stringlist[i] != NULL)
5535 fprintf(outfile, "string list not terminated by NULL\n");
5536 PCRE_FREE_SUBSTRING_LIST(stringlist);
5537 }
5538 }
5539 }
5540
5541 /* There was a partial match */
5542
5543 else if (count == PCRE_ERROR_PARTIAL)
5544 {
5545 if (markptr == NULL) fprintf(outfile, "Partial match");
5546 else
5547 {
5548 fprintf(outfile, "Partial match, mark=");
5549 PCHARSV(markptr, 0, -1, outfile);
5550 }
5551 if (use_size_offsets > 1)
5552 {
5553 fprintf(outfile, ": ");
5554 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5555 outfile);
5556 }
5557 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5558 fprintf(outfile, "\n");
5559 break; /* Out of the /g loop */
5560 }
5561
5562 /* Failed to match. If this is a /g or /G loop and we previously set
5563 g_notempty after a null match, this is not necessarily the end. We want
5564 to advance the start offset, and continue. We won't be at the end of the
5565 string - that was checked before setting g_notempty.
5566
5567 Complication arises in the case when the newline convention is "any",
5568 "crlf", or "anycrlf". If the previous match was at the end of a line
5569 terminated by CRLF, an advance of one character just passes the \r,
5570 whereas we should prefer the longer newline sequence, as does the code in
5571 pcre_exec(). Fudge the offset value to achieve this. We check for a
5572 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5573 find the default.
5574
5575 Otherwise, in the case of UTF-8 matching, the advance must be one
5576 character, not one byte. */
5577
5578 else
5579 {
5580 if (g_notempty != 0)
5581 {
5582 int onechar = 1;
5583 unsigned int obits = REAL_PCRE_OPTIONS(re);
5584 use_offsets[0] = start_offset;
5585 if ((obits & PCRE_NEWLINE_BITS) == 0)
5586 {
5587 int d;
5588 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5589 /* Note that these values are always the ASCII ones, even in
5590 EBCDIC environments. CR = 13, NL = 10. */
5591 obits = (d == 13)? PCRE_NEWLINE_CR :
5592 (d == 10)? PCRE_NEWLINE_LF :
5593 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5594 (d == -2)? PCRE_NEWLINE_ANYCRLF :
5595 (d == -1)? PCRE_NEWLINE_ANY : 0;
5596 }
5597 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5598 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5599 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5600 &&
5601 start_offset < len - 1 && (
5602 #ifdef SUPPORT_PCRE8
5603 (pcre_mode == PCRE8_MODE &&
5604 bptr[start_offset] == '\r' &&
5605 bptr[start_offset + 1] == '\n') ||
5606 #endif
5607 #ifdef SUPPORT_PCRE16
5608 (pcre_mode == PCRE16_MODE &&
5609 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5610 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5611 #endif
5612 #ifdef SUPPORT_PCRE32
5613 (pcre_mode == PCRE32_MODE &&
5614 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5615 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5616 #endif
5617 0))
5618 onechar++;
5619 else if (use_utf)
5620 {
5621 while (start_offset + onechar < len)
5622 {
5623 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5624 onechar++;
5625 }
5626 }
5627 use_offsets[1] = start_offset + onechar;
5628 }
5629 else
5630 {
5631 switch(count)
5632 {
5633 case PCRE_ERROR_NOMATCH:
5634 if (gmatched == 0)
5635 {
5636 if (markptr == NULL)
5637 {
5638 fprintf(outfile, "No match");
5639 }
5640 else
5641 {
5642 fprintf(outfile, "No match, mark = ");
5643 PCHARSV(markptr, 0, -1, outfile);
5644 }
5645 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5646 putc('\n', outfile);
5647 }
5648 break;
5649
5650 case PCRE_ERROR_BADUTF8:
5651 case PCRE_ERROR_SHORTUTF8:
5652 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5653 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5654 8 * CHAR_SIZE);
5655 if (use_size_offsets >= 2)
5656 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5657 use_offsets[1]);
5658 fprintf(outfile, "\n");
5659 break;
5660
5661 case PCRE_ERROR_BADUTF8_OFFSET:
5662 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5663 8 * CHAR_SIZE);
5664 break;
5665
5666 default:
5667 if (count < 0 &&
5668 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5669 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5670 else
5671 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5672 break;
5673 }
5674
5675 break; /* Out of the /g loop */
5676 }
5677 }
5678
5679 /* If not /g or /G we are done */
5680
5681 if (!do_g && !do_G) break;
5682
5683 /* If we have matched an empty string, first check to see if we are at
5684 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5685 Perl's /g options does. This turns out to be rather cunning. First we set
5686 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5687 same point. If this fails (picked up above) we advance to the next
5688 character. */
5689
5690 g_notempty = 0;
5691
5692 if (use_offsets[0] == use_offsets[1])
5693 {
5694 if (use_offsets[0] == len) break;
5695 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5696 }
5697
5698 /* For /g, update the start offset, leaving the rest alone */
5699
5700 if (do_g) start_offset = use_offsets[1];
5701
5702 /* For /G, update the pointer and length */
5703
5704 else
5705 {
5706 bptr += use_offsets[1] * CHAR_SIZE;
5707 len -= use_offsets[1];
5708 }
5709 } /* End of loop for /g and /G */
5710
5711 NEXT_DATA: continue;
5712 } /* End of loop for data lines */
5713
5714 CONTINUE:
5715
5716 #if !defined NOPOSIX
5717 if (posix || do_posix) regfree(&preg);
5718 #endif
5719
5720 if (re != NULL) new_free(re);
5721 if (extra != NULL)
5722 {
5723 PCRE_FREE_STUDY(extra);
5724 }
5725 if (locale_set)
5726 {
5727 new_free((void *)tables);
5728 setlocale(LC_CTYPE, "C");
5729 locale_set = 0;
5730 }
5731 if (jit_stack != NULL)
5732 {
5733 PCRE_JIT_STACK_FREE(jit_stack);
5734 jit_stack = NULL;
5735 }
5736 }
5737
5738 if (infile == stdin) fprintf(outfile, "\n");
5739
5740 EXIT:
5741
5742 if (infile != NULL && infile != stdin) fclose(infile);
5743 if (outfile != NULL && outfile != stdout) fclose(outfile);
5744
5745 free(buffer);
5746 free(dbuffer);
5747 free(pbuffer);
5748 free(offsets);
5749
5750 #ifdef SUPPORT_PCRE16
5751 if (buffer16 != NULL) free(buffer16);
5752 #endif
5753 #ifdef SUPPORT_PCRE32
5754 if (buffer32 != NULL) free(buffer32);
5755 #endif
5756
5757 #if !defined NODFA
5758 if (dfa_workspace != NULL)
5759 free(dfa_workspace);
5760 #endif
5761
5762 return yield;
5763 }
5764
5765 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5