/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1127 - (show annotations)
Thu Oct 18 18:35:05 2012 UTC (7 years, 1 month ago) by chpe
File MIME type: text/plain
File size: 173074 byte(s)
Error occurred while calculating annotation data.
Remove unused variable
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136
137 #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138 /* Configure internal macros to 32 bit mode. */
139 #define COMPILE_PCRE32
140 #endif
141 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142 /* Configure internal macros to 16 bit mode. */
143 #define COMPILE_PCRE16
144 #endif
145 #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146 /* Configure internal macros to 16 bit mode. */
147 #define COMPILE_PCRE8
148 #endif
149
150 #include "pcre_internal.h"
151
152 /* The pcre_printint() function, which prints the internal form of a compiled
153 regex, is held in a separate file so that (a) it can be compiled in either
154 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
155 when that is compiled in debug mode. */
156
157 #ifdef SUPPORT_PCRE8
158 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159 #endif
160 #ifdef SUPPORT_PCRE16
161 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162 #endif
163 #ifdef SUPPORT_PCRE32
164 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165 #endif
166
167 /* We need access to some of the data tables that PCRE uses. So as not to have
168 to keep two copies, we include the source files here, changing the names of the
169 external symbols to prevent clashes. */
170
171 #define PCRE_INCLUDED
172
173 #include "pcre_tables.c"
174 #include "pcre_ucd.c"
175
176 /* The definition of the macro PRINTABLE, which determines whether to print an
177 output character as-is or as a hex value when showing compiled patterns, is
178 the same as in the printint.src file. We uses it here in cases when the locale
179 has not been explicitly changed, so as to get consistent output from systems
180 that differ in their output from isprint() even in the "C" locale. */
181
182 #ifdef EBCDIC
183 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
184 #else
185 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
186 #endif
187
188 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189
190 /* Posix support is disabled in 16 or 32 bit only mode. */
191 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192 #define NOPOSIX
193 #endif
194
195 /* It is possible to compile this test program without including support for
196 testing the POSIX interface, though this is not available via the standard
197 Makefile. */
198
199 #if !defined NOPOSIX
200 #include "pcreposix.h"
201 #endif
202
203 /* It is also possible, originally for the benefit of a version that was
204 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
205 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
206 automatically cut out the UTF support if PCRE is built without it. */
207
208 #ifndef SUPPORT_UTF
209 #ifndef NOUTF
210 #define NOUTF
211 #endif
212 #endif
213
214 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
215 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216 only from one place and is handled differently). I couldn't dream up any way of
217 using a single macro to do this in a generic way, because of the many different
218 argument requirements. We know that at least one of SUPPORT_PCRE8 and
219 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
220 use these in the definitions of generic macros.
221
222 **** Special note about the PCHARSxxx macros: the address of the string to be
223 printed is always given as two arguments: a base address followed by an offset.
224 The base address is cast to the correct data size for 8 or 16 bit data; the
225 offset is in units of this size. If the string were given as base+offset in one
226 argument, the casting might be incorrectly applied. */
227
228 #ifdef SUPPORT_PCRE8
229
230 #define PCHARS8(lv, p, offset, len, f) \
231 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
232
233 #define PCHARSV8(p, offset, len, f) \
234 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235
236 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237 p = read_capture_name8(p, cn8, re)
238
239 #define STRLEN8(p) ((int)strlen((char *)p))
240
241 #define SET_PCRE_CALLOUT8(callout) \
242 pcre_callout = callout
243
244 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
245 pcre_assign_jit_stack(extra, callback, userdata)
246
247 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
248 re = pcre_compile((char *)pat, options, error, erroffset, tables)
249
250 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251 namesptr, cbuffer, size) \
252 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
253 (char *)namesptr, cbuffer, size)
254
255 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
256 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
257
258 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259 offsets, size_offsets, workspace, size_workspace) \
260 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
261 offsets, size_offsets, workspace, size_workspace)
262
263 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
264 offsets, size_offsets) \
265 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
266 offsets, size_offsets)
267
268 #define PCRE_FREE_STUDY8(extra) \
269 pcre_free_study(extra)
270
271 #define PCRE_FREE_SUBSTRING8(substring) \
272 pcre_free_substring(substring)
273
274 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
275 pcre_free_substring_list(listptr)
276
277 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
278 getnamesptr, subsptr) \
279 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
280 (char *)getnamesptr, subsptr)
281
282 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
283 n = pcre_get_stringnumber(re, (char *)ptr)
284
285 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
286 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
287
288 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
289 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
290
291 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
292 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
293
294 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
295 pcre_printint(re, outfile, debug_lengths)
296
297 #define PCRE_STUDY8(extra, re, options, error) \
298 extra = pcre_study(re, options, error)
299
300 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
301 pcre_jit_stack_alloc(startsize, maxsize)
302
303 #define PCRE_JIT_STACK_FREE8(stack) \
304 pcre_jit_stack_free(stack)
305
306 #endif /* SUPPORT_PCRE8 */
307
308 /* -----------------------------------------------------------*/
309
310 #ifdef SUPPORT_PCRE16
311
312 #define PCHARS16(lv, p, offset, len, f) \
313 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
314
315 #define PCHARSV16(p, offset, len, f) \
316 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
317
318 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319 p = read_capture_name16(p, cn16, re)
320
321 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
322
323 #define SET_PCRE_CALLOUT16(callout) \
324 pcre16_callout = (int (*)(pcre16_callout_block *))callout
325
326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327 pcre16_assign_jit_stack((pcre16_extra *)extra, \
328 (pcre16_jit_callback)callback, userdata)
329
330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332 tables)
333
334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335 namesptr, cbuffer, size) \
336 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338
339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341 (PCRE_UCHAR16 *)cbuffer, size/2)
342
343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344 offsets, size_offsets, workspace, size_workspace) \
345 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347 workspace, size_workspace)
348
349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350 offsets, size_offsets) \
351 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352 len, start_offset, options, offsets, size_offsets)
353
354 #define PCRE_FREE_STUDY16(extra) \
355 pcre16_free_study((pcre16_extra *)extra)
356
357 #define PCRE_FREE_SUBSTRING16(substring) \
358 pcre16_free_substring((PCRE_SPTR16)substring)
359
360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362
363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364 getnamesptr, subsptr) \
365 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367
368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370
371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373 (PCRE_SPTR16 *)(void*)subsptr)
374
375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377 (PCRE_SPTR16 **)(void*)listptr)
378
379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381 tables)
382
383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384 pcre16_printint(re, outfile, debug_lengths)
385
386 #define PCRE_STUDY16(extra, re, options, error) \
387 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388
389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391
392 #define PCRE_JIT_STACK_FREE16(stack) \
393 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394
395 #endif /* SUPPORT_PCRE16 */
396
397 /* -----------------------------------------------------------*/
398
399 #ifdef SUPPORT_PCRE32
400
401 #define PCHARS32(lv, p, offset, len, f) \
402 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
403
404 #define PCHARSV32(p, offset, len, f) \
405 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
406
407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408 p = read_capture_name32(p, cn32, re)
409
410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411
412 #define SET_PCRE_CALLOUT32(callout) \
413 pcre32_callout = (int (*)(pcre32_callout_block *))callout
414
415 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416 pcre32_assign_jit_stack((pcre32_extra *)extra, \
417 (pcre32_jit_callback)callback, userdata)
418
419 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421 tables)
422
423 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424 namesptr, cbuffer, size) \
425 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427
428 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430 (PCRE_UCHAR32 *)cbuffer, size/2)
431
432 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433 offsets, size_offsets, workspace, size_workspace) \
434 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436 workspace, size_workspace)
437
438 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439 offsets, size_offsets) \
440 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441 len, start_offset, options, offsets, size_offsets)
442
443 #define PCRE_FREE_STUDY32(extra) \
444 pcre32_free_study((pcre32_extra *)extra)
445
446 #define PCRE_FREE_SUBSTRING32(substring) \
447 pcre32_free_substring((PCRE_SPTR32)substring)
448
449 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
451
452 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453 getnamesptr, subsptr) \
454 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456
457 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
459
460 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462 (PCRE_SPTR32 *)(void*)subsptr)
463
464 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466 (PCRE_SPTR32 **)(void*)listptr)
467
468 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470 tables)
471
472 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473 pcre32_printint(re, outfile, debug_lengths)
474
475 #define PCRE_STUDY32(extra, re, options, error) \
476 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477
478 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480
481 #define PCRE_JIT_STACK_FREE32(stack) \
482 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483
484 #endif /* SUPPORT_PCRE32 */
485
486
487 /* ----- More than one mode is supported; a runtime test is needed, except for
488 pcre_config(), and the JIT stack functions, when it doesn't matter which
489 version is called. ----- */
490
491 enum {
492 PCRE8_MODE,
493 PCRE16_MODE,
494 PCRE32_MODE
495 };
496
497 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
498 defined (SUPPORT_PCRE32)) >= 2
499
500 #define CHAR_SIZE (1 << pcre_mode)
501
502 /* There doesn't seem to be an easy way of writing these macros that can cope
503 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
504 cases separately. */
505
506 /* ----- All three modes supported ----- */
507
508 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
509
510 #define PCHARS(lv, p, offset, len, f) \
511 if (pcre_mode == PCRE32_MODE) \
512 PCHARS32(lv, p, offset, len, f); \
513 else if (pcre_mode == PCRE16_MODE) \
514 PCHARS16(lv, p, offset, len, f); \
515 else \
516 PCHARS8(lv, p, offset, len, f)
517
518 #define PCHARSV(p, offset, len, f) \
519 if (pcre_mode == PCRE32_MODE) \
520 PCHARSV32(p, offset, len, f); \
521 else if (pcre_mode == PCRE16_MODE) \
522 PCHARSV16(p, offset, len, f); \
523 else \
524 PCHARSV8(p, offset, len, f)
525
526 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
527 if (pcre_mode == PCRE32_MODE) \
528 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
529 else if (pcre_mode == PCRE16_MODE) \
530 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
531 else \
532 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
533
534 #define SET_PCRE_CALLOUT(callout) \
535 if (pcre_mode == PCRE32_MODE) \
536 SET_PCRE_CALLOUT32(callout); \
537 else if (pcre_mode == PCRE16_MODE) \
538 SET_PCRE_CALLOUT16(callout); \
539 else \
540 SET_PCRE_CALLOUT8(callout)
541
542 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
543
544 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
545 if (pcre_mode == PCRE32_MODE) \
546 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
547 else if (pcre_mode == PCRE16_MODE) \
548 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
549 else \
550 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
551
552 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
553 if (pcre_mode == PCRE32_MODE) \
554 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
555 else if (pcre_mode == PCRE16_MODE) \
556 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
557 else \
558 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
559
560 #define PCRE_CONFIG pcre_config
561
562 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
563 namesptr, cbuffer, size) \
564 if (pcre_mode == PCRE32_MODE) \
565 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
566 namesptr, cbuffer, size); \
567 else if (pcre_mode == PCRE16_MODE) \
568 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
569 namesptr, cbuffer, size); \
570 else \
571 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
572 namesptr, cbuffer, size)
573
574 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
575 if (pcre_mode == PCRE32_MODE) \
576 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
577 else if (pcre_mode == PCRE16_MODE) \
578 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
579 else \
580 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
581
582 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets, workspace, size_workspace) \
584 if (pcre_mode == PCRE32_MODE) \
585 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets, workspace, size_workspace); \
587 else if (pcre_mode == PCRE16_MODE) \
588 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets, workspace, size_workspace); \
590 else \
591 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets, workspace, size_workspace)
593
594 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
595 offsets, size_offsets) \
596 if (pcre_mode == PCRE32_MODE) \
597 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
598 offsets, size_offsets); \
599 else if (pcre_mode == PCRE16_MODE) \
600 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
601 offsets, size_offsets); \
602 else \
603 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
604 offsets, size_offsets)
605
606 #define PCRE_FREE_STUDY(extra) \
607 if (pcre_mode == PCRE32_MODE) \
608 PCRE_FREE_STUDY32(extra); \
609 else if (pcre_mode == PCRE16_MODE) \
610 PCRE_FREE_STUDY16(extra); \
611 else \
612 PCRE_FREE_STUDY8(extra)
613
614 #define PCRE_FREE_SUBSTRING(substring) \
615 if (pcre_mode == PCRE32_MODE) \
616 PCRE_FREE_SUBSTRING32(substring); \
617 else if (pcre_mode == PCRE16_MODE) \
618 PCRE_FREE_SUBSTRING16(substring); \
619 else \
620 PCRE_FREE_SUBSTRING8(substring)
621
622 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
623 if (pcre_mode == PCRE32_MODE) \
624 PCRE_FREE_SUBSTRING_LIST32(listptr); \
625 else if (pcre_mode == PCRE16_MODE) \
626 PCRE_FREE_SUBSTRING_LIST16(listptr); \
627 else \
628 PCRE_FREE_SUBSTRING_LIST8(listptr)
629
630 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
631 getnamesptr, subsptr) \
632 if (pcre_mode == PCRE32_MODE) \
633 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
634 getnamesptr, subsptr); \
635 else if (pcre_mode == PCRE16_MODE) \
636 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
637 getnamesptr, subsptr); \
638 else \
639 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
640 getnamesptr, subsptr)
641
642 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
643 if (pcre_mode == PCRE32_MODE) \
644 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
645 else if (pcre_mode == PCRE16_MODE) \
646 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
647 else \
648 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
649
650 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
651 if (pcre_mode == PCRE32_MODE) \
652 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
653 else if (pcre_mode == PCRE16_MODE) \
654 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
655 else \
656 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
657
658 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
659 if (pcre_mode == PCRE32_MODE) \
660 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
661 else if (pcre_mode == PCRE16_MODE) \
662 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
663 else \
664 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
665
666 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
667 (pcre_mode == PCRE32_MODE ? \
668 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
669 : pcre_mode == PCRE16_MODE ? \
670 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
671 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
672
673 #define PCRE_JIT_STACK_FREE(stack) \
674 if (pcre_mode == PCRE32_MODE) \
675 PCRE_JIT_STACK_FREE32(stack); \
676 else if (pcre_mode == PCRE16_MODE) \
677 PCRE_JIT_STACK_FREE16(stack); \
678 else \
679 PCRE_JIT_STACK_FREE8(stack)
680
681 #define PCRE_MAKETABLES \
682 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
683
684 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
685 if (pcre_mode == PCRE32_MODE) \
686 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
687 else if (pcre_mode == PCRE16_MODE) \
688 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
689 else \
690 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
691
692 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
693 if (pcre_mode == PCRE32_MODE) \
694 PCRE_PRINTINT32(re, outfile, debug_lengths); \
695 else if (pcre_mode == PCRE16_MODE) \
696 PCRE_PRINTINT16(re, outfile, debug_lengths); \
697 else \
698 PCRE_PRINTINT8(re, outfile, debug_lengths)
699
700 #define PCRE_STUDY(extra, re, options, error) \
701 if (pcre_mode == PCRE32_MODE) \
702 PCRE_STUDY32(extra, re, options, error); \
703 else if (pcre_mode == PCRE16_MODE) \
704 PCRE_STUDY16(extra, re, options, error); \
705 else \
706 PCRE_STUDY8(extra, re, options, error)
707
708
709 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
710
711 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
712 #define PCHARS(lv, p, offset, len, f) \
713 if (pcre_mode == PCRE32_MODE) \
714 PCHARS32(lv, p, offset, len, f); \
715 else \
716 PCHARS16(lv, p, offset, len, f)
717
718 #define PCHARSV(p, offset, len, f) \
719 if (pcre_mode == PCRE32_MODE) \
720 PCHARSV32(p, offset, len, f); \
721 else \
722 PCHARSV16(p, offset, len, f)
723
724 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
725 if (pcre_mode == PCRE32_MODE) \
726 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
727 else \
728 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re)
729
730 #define SET_PCRE_CALLOUT(callout) \
731 if (pcre_mode == PCRE32_MODE) \
732 SET_PCRE_CALLOUT32(callout); \
733 else \
734 SET_PCRE_CALLOUT16(callout)
735
736 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : STRLEN16(p)
737
738 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
739 if (pcre_mode == PCRE32_MODE) \
740 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
741 else \
742 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata)
743
744 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
745 if (pcre_mode == PCRE32_MODE) \
746 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
747 else \
748 PCRE_COMPILE16(re, pat, options, error, erroffset, tables)
749
750 #define PCRE_CONFIG pcre_config
751
752 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
753 namesptr, cbuffer, size) \
754 if (pcre_mode == PCRE32_MODE) \
755 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
756 namesptr, cbuffer, size); \
757 else \
758 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
759 namesptr, cbuffer, size)
760
761 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
762 if (pcre_mode == PCRE32_MODE) \
763 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
764 else \
765 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size)
766
767 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
768 offsets, size_offsets, workspace, size_workspace) \
769 if (pcre_mode == PCRE32_MODE) \
770 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
771 offsets, size_offsets, workspace, size_workspace); \
772 else \
773 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
774 offsets, size_offsets, workspace, size_workspace)
775
776 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
777 offsets, size_offsets) \
778 if (pcre_mode == PCRE32_MODE) \
779 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
780 offsets, size_offsets); \
781 else \
782 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
783 offsets, size_offsets)
784
785 #define PCRE_FREE_STUDY(extra) \
786 if (pcre_mode == PCRE32_MODE) \
787 PCRE_FREE_STUDY32(extra); \
788 else \
789 PCRE_FREE_STUDY16(extra)
790
791 #define PCRE_FREE_SUBSTRING(substring) \
792 if (pcre_mode == PCRE32_MODE) \
793 PCRE_FREE_SUBSTRING32(substring); \
794 else \
795 PCRE_FREE_SUBSTRING16(substring)
796
797 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
798 if (pcre_mode == PCRE32_MODE) \
799 PCRE_FREE_SUBSTRING_LIST32(listptr); \
800 else \
801 PCRE_FREE_SUBSTRING_LIST16(listptr)
802
803 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
804 getnamesptr, subsptr) \
805 if (pcre_mode == PCRE32_MODE) \
806 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
807 getnamesptr, subsptr); \
808 else \
809 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
810 getnamesptr, subsptr)
811
812 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
813 if (pcre_mode == PCRE32_MODE) \
814 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
815 else \
816 PCRE_GET_STRINGNUMBER16(n, rc, ptr)
817
818 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
819 if (pcre_mode == PCRE32_MODE) \
820 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
821 else \
822 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr)
823
824 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
825 if (pcre_mode == PCRE32_MODE) \
826 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
827 else \
828 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr)
829
830 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
831 (pcre_mode == PCRE32_MODE ? \
832 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
833 : PCRE_JIT_STACK_ALLOC16(startsize, maxsize)
834
835 #define PCRE_JIT_STACK_FREE(stack) \
836 if (pcre_mode == PCRE32_MODE) \
837 PCRE_JIT_STACK_FREE32(stack); \
838 else \
839 PCRE_JIT_STACK_FREE16(stack)
840
841 #define PCRE_MAKETABLES \
842 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre16_maketables())
843
844 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
845 if (pcre_mode == PCRE32_MODE) \
846 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
847 else \
848 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables)
849
850 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
851 if (pcre_mode == PCRE32_MODE) \
852 PCRE_PRINTINT32(re, outfile, debug_lengths); \
853 else \
854 PCRE_PRINTINT16(re, outfile, debug_lengths)
855
856 #define PCRE_STUDY(extra, re, options, error) \
857 if (pcre_mode == PCRE32_MODE) \
858 PCRE_STUDY32(extra, re, options, error); \
859 else \
860 PCRE_STUDY16(extra, re, options, error)
861
862
863 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
864
865 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
866
867 #define PCHARS(lv, p, offset, len, f) \
868 if (pcre_mode == PCRE32_MODE) \
869 PCHARS32(lv, p, offset, len, f); \
870 else \
871 PCHARS8(lv, p, offset, len, f)
872
873 #define PCHARSV(p, offset, len, f) \
874 if (pcre_mode == PCRE32_MODE) \
875 PCHARSV32(p, offset, len, f); \
876 else \
877 PCHARSV8(p, offset, len, f)
878
879 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
880 if (pcre_mode == PCRE32_MODE) \
881 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
882 else \
883 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
884
885 #define SET_PCRE_CALLOUT(callout) \
886 if (pcre_mode == PCRE32_MODE) \
887 SET_PCRE_CALLOUT32(callout); \
888 else \
889 SET_PCRE_CALLOUT8(callout)
890
891 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : STRLEN8(p))
892
893 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
894 if (pcre_mode == PCRE32_MODE) \
895 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
896 else \
897 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
898
899 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
900 if (pcre_mode == PCRE32_MODE) \
901 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
902 else \
903 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
904
905 #define PCRE_CONFIG pcre_config
906
907 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
908 namesptr, cbuffer, size) \
909 if (pcre_mode == PCRE32_MODE) \
910 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
911 namesptr, cbuffer, size); \
912 else \
913 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
914 namesptr, cbuffer, size)
915
916 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
917 if (pcre_mode == PCRE32_MODE) \
918 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
919 else \
920 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
921
922 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
923 offsets, size_offsets, workspace, size_workspace) \
924 if (pcre_mode == PCRE32_MODE) \
925 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
926 offsets, size_offsets, workspace, size_workspace); \
927 else \
928 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
929 offsets, size_offsets, workspace, size_workspace)
930
931 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
932 offsets, size_offsets) \
933 if (pcre_mode == PCRE32_MODE) \
934 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
935 offsets, size_offsets); \
936 else \
937 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
938 offsets, size_offsets)
939
940 #define PCRE_FREE_STUDY(extra) \
941 if (pcre_mode == PCRE32_MODE) \
942 PCRE_FREE_STUDY32(extra); \
943 else \
944 PCRE_FREE_STUDY8(extra)
945
946 #define PCRE_FREE_SUBSTRING(substring) \
947 if (pcre_mode == PCRE32_MODE) \
948 PCRE_FREE_SUBSTRING32(substring); \
949 else \
950 PCRE_FREE_SUBSTRING8(substring)
951
952 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
953 if (pcre_mode == PCRE32_MODE) \
954 PCRE_FREE_SUBSTRING_LIST32(listptr); \
955 else \
956 PCRE_FREE_SUBSTRING_LIST8(listptr)
957
958 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
959 getnamesptr, subsptr) \
960 if (pcre_mode == PCRE32_MODE) \
961 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
962 getnamesptr, subsptr); \
963 else \
964 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
965 getnamesptr, subsptr)
966
967 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
968 if (pcre_mode == PCRE32_MODE) \
969 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
970 else \
971 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
972
973 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
974 if (pcre_mode == PCRE32_MODE) \
975 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
976 else \
977 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
978
979 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
980 if (pcre_mode == PCRE32_MODE) \
981 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
982 else \
983 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
984
985 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
986 (pcre_mode == PCRE32_MODE ? \
987 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
988 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
989
990 #define PCRE_JIT_STACK_FREE(stack) \
991 if (pcre_mode == PCRE32_MODE) \
992 PCRE_JIT_STACK_FREE32(stack); \
993 else \
994 PCRE_JIT_STACK_FREE8(stack)
995
996 #define PCRE_MAKETABLES \
997 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_maketables())
998
999 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
1000 if (pcre_mode == PCRE32_MODE) \
1001 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
1002 else \
1003 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
1004
1005 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
1006 if (pcre_mode == PCRE32_MODE) \
1007 PCRE_PRINTINT32(re, outfile, debug_lengths); \
1008 else \
1009 PCRE_PRINTINT8(re, outfile, debug_lengths)
1010
1011 #define PCRE_STUDY(extra, re, options, error) \
1012 if (pcre_mode == PCRE32_MODE) \
1013 PCRE_STUDY32(extra, re, options, error); \
1014 else \
1015 PCRE_STUDY8(extra, re, options, error)
1016
1017
1018 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1019
1020 #else
1021 #define PCHARS(lv, p, offset, len, f) \
1022 if (pcre_mode == PCRE16_MODE) \
1023 PCHARS16(lv, p, offset, len, f); \
1024 else \
1025 PCHARS8(lv, p, offset, len, f)
1026
1027 #define PCHARSV(p, offset, len, f) \
1028 if (pcre_mode == PCRE16_MODE) \
1029 PCHARSV16(p, offset, len, f); \
1030 else \
1031 PCHARSV8(p, offset, len, f)
1032
1033 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
1034 if (pcre_mode == PCRE16_MODE) \
1035 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
1036 else \
1037 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
1038
1039 #define SET_PCRE_CALLOUT(callout) \
1040 if (pcre_mode == PCRE16_MODE) \
1041 SET_PCRE_CALLOUT16(callout); \
1042 else \
1043 SET_PCRE_CALLOUT8(callout)
1044
1045 #define STRLEN(p) (pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
1046
1047 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
1048 if (pcre_mode == PCRE16_MODE) \
1049 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
1050 else \
1051 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
1052
1053 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
1054 if (pcre_mode == PCRE16_MODE) \
1055 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
1056 else \
1057 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
1058
1059 #define PCRE_CONFIG pcre_config
1060
1061 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
1062 namesptr, cbuffer, size) \
1063 if (pcre_mode == PCRE16_MODE) \
1064 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
1065 namesptr, cbuffer, size); \
1066 else \
1067 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
1068 namesptr, cbuffer, size)
1069
1070 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
1071 if (pcre_mode == PCRE16_MODE) \
1072 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
1073 else \
1074 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
1075
1076 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
1077 offsets, size_offsets, workspace, size_workspace) \
1078 if (pcre_mode == PCRE16_MODE) \
1079 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
1080 offsets, size_offsets, workspace, size_workspace); \
1081 else \
1082 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
1083 offsets, size_offsets, workspace, size_workspace)
1084
1085 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
1086 offsets, size_offsets) \
1087 if (pcre_mode == PCRE16_MODE) \
1088 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
1089 offsets, size_offsets); \
1090 else \
1091 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
1092 offsets, size_offsets)
1093
1094 #define PCRE_FREE_STUDY(extra) \
1095 if (pcre_mode == PCRE16_MODE) \
1096 PCRE_FREE_STUDY16(extra); \
1097 else \
1098 PCRE_FREE_STUDY8(extra)
1099
1100 #define PCRE_FREE_SUBSTRING(substring) \
1101 if (pcre_mode == PCRE16_MODE) \
1102 PCRE_FREE_SUBSTRING16(substring); \
1103 else \
1104 PCRE_FREE_SUBSTRING8(substring)
1105
1106 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
1107 if (pcre_mode == PCRE16_MODE) \
1108 PCRE_FREE_SUBSTRING_LIST16(listptr); \
1109 else \
1110 PCRE_FREE_SUBSTRING_LIST8(listptr)
1111
1112 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
1113 getnamesptr, subsptr) \
1114 if (pcre_mode == PCRE16_MODE) \
1115 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
1116 getnamesptr, subsptr); \
1117 else \
1118 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
1119 getnamesptr, subsptr)
1120
1121 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
1122 if (pcre_mode == PCRE16_MODE) \
1123 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
1124 else \
1125 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
1126
1127 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
1128 if (pcre_mode == PCRE16_MODE) \
1129 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
1130 else \
1131 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
1132
1133 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
1134 if (pcre_mode == PCRE16_MODE) \
1135 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
1136 else \
1137 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
1138
1139 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
1140 (pcre_mode == PCRE16_MODE ? \
1141 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
1142 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
1143
1144 #define PCRE_JIT_STACK_FREE(stack) \
1145 if (pcre_mode == PCRE16_MODE) \
1146 PCRE_JIT_STACK_FREE16(stack); \
1147 else \
1148 PCRE_JIT_STACK_FREE8(stack)
1149
1150 #define PCRE_MAKETABLES \
1151 (pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
1152
1153 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
1154 if (pcre_mode == PCRE16_MODE) \
1155 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
1156 else \
1157 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
1158
1159 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
1160 if (pcre_mode == PCRE16_MODE) \
1161 PCRE_PRINTINT16(re, outfile, debug_lengths); \
1162 else \
1163 PCRE_PRINTINT8(re, outfile, debug_lengths)
1164
1165 #define PCRE_STUDY(extra, re, options, error) \
1166 if (pcre_mode == PCRE16_MODE) \
1167 PCRE_STUDY16(extra, re, options, error); \
1168 else \
1169 PCRE_STUDY8(extra, re, options, error)
1170
1171 #endif
1172
1173 /* ----- End of cases where more than one mode is supported ----- */
1174
1175
1176 /* ----- Only 8-bit mode is supported ----- */
1177
1178 #elif defined SUPPORT_PCRE8
1179 #define CHAR_SIZE 1
1180 #define PCHARS PCHARS8
1181 #define PCHARSV PCHARSV8
1182 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
1183 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
1184 #define STRLEN STRLEN8
1185 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
1186 #define PCRE_COMPILE PCRE_COMPILE8
1187 #define PCRE_CONFIG pcre_config
1188 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
1189 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
1190 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
1191 #define PCRE_EXEC PCRE_EXEC8
1192 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
1193 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
1194 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
1195 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
1196 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
1197 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
1198 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
1199 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
1200 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
1201 #define PCRE_MAKETABLES pcre_maketables()
1202 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
1203 #define PCRE_PRINTINT PCRE_PRINTINT8
1204 #define PCRE_STUDY PCRE_STUDY8
1205
1206 /* ----- Only 16-bit mode is supported ----- */
1207
1208 #elif defined SUPPORT_PCRE16
1209 #define CHAR_SIZE 2
1210 #define PCHARS PCHARS16
1211 #define PCHARSV PCHARSV16
1212 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
1213 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
1214 #define STRLEN STRLEN16
1215 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
1216 #define PCRE_COMPILE PCRE_COMPILE16
1217 #define PCRE_CONFIG pcre16_config
1218 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
1219 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
1220 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
1221 #define PCRE_EXEC PCRE_EXEC16
1222 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
1223 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
1224 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
1225 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
1226 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
1227 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
1228 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
1229 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
1230 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
1231 #define PCRE_MAKETABLES pcre16_maketables()
1232 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
1233 #define PCRE_PRINTINT PCRE_PRINTINT16
1234 #define PCRE_STUDY PCRE_STUDY16
1235
1236 /* ----- Only 32-bit mode is supported ----- */
1237
1238 #elif defined SUPPORT_PCRE32
1239 #define CHAR_SIZE 4
1240 #define PCHARS PCHARS32
1241 #define PCHARSV PCHARSV32
1242 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
1243 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
1244 #define STRLEN STRLEN32
1245 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
1246 #define PCRE_COMPILE PCRE_COMPILE32
1247 #define PCRE_CONFIG pcre32_config
1248 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
1249 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
1250 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
1251 #define PCRE_EXEC PCRE_EXEC32
1252 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
1253 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
1254 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
1255 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
1256 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
1257 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
1258 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
1259 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
1260 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
1261 #define PCRE_MAKETABLES pcre32_maketables()
1262 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
1263 #define PCRE_PRINTINT PCRE_PRINTINT32
1264 #define PCRE_STUDY PCRE_STUDY32
1265
1266 #endif
1267
1268 /* ----- End of mode-specific function call macros ----- */
1269
1270
1271 /* Other parameters */
1272
1273 #ifndef CLOCKS_PER_SEC
1274 #ifdef CLK_TCK
1275 #define CLOCKS_PER_SEC CLK_TCK
1276 #else
1277 #define CLOCKS_PER_SEC 100
1278 #endif
1279 #endif
1280
1281 #if !defined NODFA
1282 #define DFA_WS_DIMENSION 1000
1283 #endif
1284
1285 /* This is the default loop count for timing. */
1286
1287 #define LOOPREPEAT 500000
1288
1289 /* Static variables */
1290
1291 static FILE *outfile;
1292 static int log_store = 0;
1293 static int callout_count;
1294 static int callout_extra;
1295 static int callout_fail_count;
1296 static int callout_fail_id;
1297 static int debug_lengths;
1298 static int first_callout;
1299 static int jit_was_used;
1300 static int locale_set = 0;
1301 static int show_malloc;
1302 static int use_utf;
1303 static size_t gotten_store;
1304 static size_t first_gotten_store = 0;
1305 static const unsigned char *last_callout_mark = NULL;
1306
1307 /* The buffers grow automatically if very long input lines are encountered. */
1308
1309 static int buffer_size = 50000;
1310 static pcre_uint8 *buffer = NULL;
1311 static pcre_uint8 *pbuffer = NULL;
1312
1313 /* Another buffer is needed translation to 16/32-bit character strings. It will
1314 obtained and extended as required. */
1315
1316 #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
1317
1318 /* We need the table of operator lengths that is used for 16/32-bit compiling, in
1319 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
1320 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
1321 appropriately for the 16/32-bit world. Just as a safety check, make sure that
1322 COMPILE_PCRE[16|32] is *not* set. */
1323
1324 #ifdef COMPILE_PCRE16
1325 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1326 #endif
1327
1328 #ifdef COMPILE_PCRE32
1329 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1330 #endif
1331
1332 #if LINK_SIZE == 2
1333 #undef LINK_SIZE
1334 #define LINK_SIZE 1
1335 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1336 #undef LINK_SIZE
1337 #define LINK_SIZE 2
1338 #else
1339 #error LINK_SIZE must be either 2, 3, or 4
1340 #endif
1341
1342 #undef IMM2_SIZE
1343 #define IMM2_SIZE 1
1344
1345 #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
1346
1347 #ifdef SUPPORT_PCRE16
1348 static int buffer16_size = 0;
1349 static pcre_uint16 *buffer16 = NULL;
1350 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1351 #endif /* SUPPORT_PCRE16 */
1352
1353 #ifdef SUPPORT_PCRE32
1354 static int buffer32_size = 0;
1355 static pcre_uint32 *buffer32 = NULL;
1356 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1357 #endif /* SUPPORT_PCRE32 */
1358
1359 /* If we have 8-bit support, default to it; if there is also
1360 16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
1361 there must be 16-or 32-bit support, so default it to 1. */
1362
1363 #if defined SUPPORT_PCRE8
1364 static int pcre_mode = PCRE8_MODE;
1365 #elif defined SUPPORT_PCRE16
1366 static int pcre_mode = PCRE16_MODE;
1367 #elif defined SUPPORT_PCRE32
1368 static int pcre_mode = PCRE32_MODE;
1369 #endif
1370
1371 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1372
1373 static int jit_study_bits[] =
1374 {
1375 PCRE_STUDY_JIT_COMPILE,
1376 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1377 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1378 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1379 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1380 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1381 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1382 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1383 };
1384
1385 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1386 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1387
1388 /* Textual explanations for runtime error codes */
1389
1390 static const char *errtexts[] = {
1391 NULL, /* 0 is no error */
1392 NULL, /* NOMATCH is handled specially */
1393 "NULL argument passed",
1394 "bad option value",
1395 "magic number missing",
1396 "unknown opcode - pattern overwritten?",
1397 "no more memory",
1398 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1399 "match limit exceeded",
1400 "callout error code",
1401 NULL, /* BADUTF8/16 is handled specially */
1402 NULL, /* BADUTF8/16 offset is handled specially */
1403 NULL, /* PARTIAL is handled specially */
1404 "not used - internal error",
1405 "internal error - pattern overwritten?",
1406 "bad count value",
1407 "item unsupported for DFA matching",
1408 "backreference condition or recursion test not supported for DFA matching",
1409 "match limit not supported for DFA matching",
1410 "workspace size exceeded in DFA matching",
1411 "too much recursion for DFA matching",
1412 "recursion limit exceeded",
1413 "not used - internal error",
1414 "invalid combination of newline options",
1415 "bad offset value",
1416 NULL, /* SHORTUTF8/16 is handled specially */
1417 "nested recursion at the same subject position",
1418 "JIT stack limit reached",
1419 "pattern compiled in wrong mode: 8-bit/16-bit error",
1420 "pattern compiled with other endianness",
1421 "invalid data in workspace for DFA restart"
1422 };
1423
1424
1425 /*************************************************
1426 * Alternate character tables *
1427 *************************************************/
1428
1429 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1430 using the default tables of the library. However, the T option can be used to
1431 select alternate sets of tables, for different kinds of testing. Note also that
1432 the L (locale) option also adjusts the tables. */
1433
1434 /* This is the set of tables distributed as default with PCRE. It recognizes
1435 only ASCII characters. */
1436
1437 static const pcre_uint8 tables0[] = {
1438
1439 /* This table is a lower casing table. */
1440
1441 0, 1, 2, 3, 4, 5, 6, 7,
1442 8, 9, 10, 11, 12, 13, 14, 15,
1443 16, 17, 18, 19, 20, 21, 22, 23,
1444 24, 25, 26, 27, 28, 29, 30, 31,
1445 32, 33, 34, 35, 36, 37, 38, 39,
1446 40, 41, 42, 43, 44, 45, 46, 47,
1447 48, 49, 50, 51, 52, 53, 54, 55,
1448 56, 57, 58, 59, 60, 61, 62, 63,
1449 64, 97, 98, 99,100,101,102,103,
1450 104,105,106,107,108,109,110,111,
1451 112,113,114,115,116,117,118,119,
1452 120,121,122, 91, 92, 93, 94, 95,
1453 96, 97, 98, 99,100,101,102,103,
1454 104,105,106,107,108,109,110,111,
1455 112,113,114,115,116,117,118,119,
1456 120,121,122,123,124,125,126,127,
1457 128,129,130,131,132,133,134,135,
1458 136,137,138,139,140,141,142,143,
1459 144,145,146,147,148,149,150,151,
1460 152,153,154,155,156,157,158,159,
1461 160,161,162,163,164,165,166,167,
1462 168,169,170,171,172,173,174,175,
1463 176,177,178,179,180,181,182,183,
1464 184,185,186,187,188,189,190,191,
1465 192,193,194,195,196,197,198,199,
1466 200,201,202,203,204,205,206,207,
1467 208,209,210,211,212,213,214,215,
1468 216,217,218,219,220,221,222,223,
1469 224,225,226,227,228,229,230,231,
1470 232,233,234,235,236,237,238,239,
1471 240,241,242,243,244,245,246,247,
1472 248,249,250,251,252,253,254,255,
1473
1474 /* This table is a case flipping table. */
1475
1476 0, 1, 2, 3, 4, 5, 6, 7,
1477 8, 9, 10, 11, 12, 13, 14, 15,
1478 16, 17, 18, 19, 20, 21, 22, 23,
1479 24, 25, 26, 27, 28, 29, 30, 31,
1480 32, 33, 34, 35, 36, 37, 38, 39,
1481 40, 41, 42, 43, 44, 45, 46, 47,
1482 48, 49, 50, 51, 52, 53, 54, 55,
1483 56, 57, 58, 59, 60, 61, 62, 63,
1484 64, 97, 98, 99,100,101,102,103,
1485 104,105,106,107,108,109,110,111,
1486 112,113,114,115,116,117,118,119,
1487 120,121,122, 91, 92, 93, 94, 95,
1488 96, 65, 66, 67, 68, 69, 70, 71,
1489 72, 73, 74, 75, 76, 77, 78, 79,
1490 80, 81, 82, 83, 84, 85, 86, 87,
1491 88, 89, 90,123,124,125,126,127,
1492 128,129,130,131,132,133,134,135,
1493 136,137,138,139,140,141,142,143,
1494 144,145,146,147,148,149,150,151,
1495 152,153,154,155,156,157,158,159,
1496 160,161,162,163,164,165,166,167,
1497 168,169,170,171,172,173,174,175,
1498 176,177,178,179,180,181,182,183,
1499 184,185,186,187,188,189,190,191,
1500 192,193,194,195,196,197,198,199,
1501 200,201,202,203,204,205,206,207,
1502 208,209,210,211,212,213,214,215,
1503 216,217,218,219,220,221,222,223,
1504 224,225,226,227,228,229,230,231,
1505 232,233,234,235,236,237,238,239,
1506 240,241,242,243,244,245,246,247,
1507 248,249,250,251,252,253,254,255,
1508
1509 /* This table contains bit maps for various character classes. Each map is 32
1510 bytes long and the bits run from the least significant end of each byte. The
1511 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1512 graph, print, punct, and cntrl. Other classes are built from combinations. */
1513
1514 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1515 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1516 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1517 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1518
1519 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1520 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1521 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1522 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1523
1524 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1525 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1526 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1527 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1528
1529 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1530 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1531 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1532 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1533
1534 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1535 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1536 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1537 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1538
1539 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1540 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1541 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1542 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1543
1544 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1545 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1546 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1547 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1548
1549 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1550 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1551 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1552 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1553
1554 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1555 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1556 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1557 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1558
1559 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1560 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1561 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1562 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1563
1564 /* This table identifies various classes of character by individual bits:
1565 0x01 white space character
1566 0x02 letter
1567 0x04 decimal digit
1568 0x08 hexadecimal digit
1569 0x10 alphanumeric or '_'
1570 0x80 regular expression metacharacter or binary zero
1571 */
1572
1573 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1574 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1575 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1576 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1577 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1578 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1579 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1580 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1581 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1582 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1583 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1584 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1585 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1586 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1587 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1588 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1591 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1592 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1593 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1594 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1595 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1596 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1597 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1603 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1604 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1605
1606 /* This is a set of tables that came orginally from a Windows user. It seems to
1607 be at least an approximation of ISO 8859. In particular, there are characters
1608 greater than 128 that are marked as spaces, letters, etc. */
1609
1610 static const pcre_uint8 tables1[] = {
1611 0,1,2,3,4,5,6,7,
1612 8,9,10,11,12,13,14,15,
1613 16,17,18,19,20,21,22,23,
1614 24,25,26,27,28,29,30,31,
1615 32,33,34,35,36,37,38,39,
1616 40,41,42,43,44,45,46,47,
1617 48,49,50,51,52,53,54,55,
1618 56,57,58,59,60,61,62,63,
1619 64,97,98,99,100,101,102,103,
1620 104,105,106,107,108,109,110,111,
1621 112,113,114,115,116,117,118,119,
1622 120,121,122,91,92,93,94,95,
1623 96,97,98,99,100,101,102,103,
1624 104,105,106,107,108,109,110,111,
1625 112,113,114,115,116,117,118,119,
1626 120,121,122,123,124,125,126,127,
1627 128,129,130,131,132,133,134,135,
1628 136,137,138,139,140,141,142,143,
1629 144,145,146,147,148,149,150,151,
1630 152,153,154,155,156,157,158,159,
1631 160,161,162,163,164,165,166,167,
1632 168,169,170,171,172,173,174,175,
1633 176,177,178,179,180,181,182,183,
1634 184,185,186,187,188,189,190,191,
1635 224,225,226,227,228,229,230,231,
1636 232,233,234,235,236,237,238,239,
1637 240,241,242,243,244,245,246,215,
1638 248,249,250,251,252,253,254,223,
1639 224,225,226,227,228,229,230,231,
1640 232,233,234,235,236,237,238,239,
1641 240,241,242,243,244,245,246,247,
1642 248,249,250,251,252,253,254,255,
1643 0,1,2,3,4,5,6,7,
1644 8,9,10,11,12,13,14,15,
1645 16,17,18,19,20,21,22,23,
1646 24,25,26,27,28,29,30,31,
1647 32,33,34,35,36,37,38,39,
1648 40,41,42,43,44,45,46,47,
1649 48,49,50,51,52,53,54,55,
1650 56,57,58,59,60,61,62,63,
1651 64,97,98,99,100,101,102,103,
1652 104,105,106,107,108,109,110,111,
1653 112,113,114,115,116,117,118,119,
1654 120,121,122,91,92,93,94,95,
1655 96,65,66,67,68,69,70,71,
1656 72,73,74,75,76,77,78,79,
1657 80,81,82,83,84,85,86,87,
1658 88,89,90,123,124,125,126,127,
1659 128,129,130,131,132,133,134,135,
1660 136,137,138,139,140,141,142,143,
1661 144,145,146,147,148,149,150,151,
1662 152,153,154,155,156,157,158,159,
1663 160,161,162,163,164,165,166,167,
1664 168,169,170,171,172,173,174,175,
1665 176,177,178,179,180,181,182,183,
1666 184,185,186,187,188,189,190,191,
1667 224,225,226,227,228,229,230,231,
1668 232,233,234,235,236,237,238,239,
1669 240,241,242,243,244,245,246,215,
1670 248,249,250,251,252,253,254,223,
1671 192,193,194,195,196,197,198,199,
1672 200,201,202,203,204,205,206,207,
1673 208,209,210,211,212,213,214,247,
1674 216,217,218,219,220,221,222,255,
1675 0,62,0,0,1,0,0,0,
1676 0,0,0,0,0,0,0,0,
1677 32,0,0,0,1,0,0,0,
1678 0,0,0,0,0,0,0,0,
1679 0,0,0,0,0,0,255,3,
1680 126,0,0,0,126,0,0,0,
1681 0,0,0,0,0,0,0,0,
1682 0,0,0,0,0,0,0,0,
1683 0,0,0,0,0,0,255,3,
1684 0,0,0,0,0,0,0,0,
1685 0,0,0,0,0,0,12,2,
1686 0,0,0,0,0,0,0,0,
1687 0,0,0,0,0,0,0,0,
1688 254,255,255,7,0,0,0,0,
1689 0,0,0,0,0,0,0,0,
1690 255,255,127,127,0,0,0,0,
1691 0,0,0,0,0,0,0,0,
1692 0,0,0,0,254,255,255,7,
1693 0,0,0,0,0,4,32,4,
1694 0,0,0,128,255,255,127,255,
1695 0,0,0,0,0,0,255,3,
1696 254,255,255,135,254,255,255,7,
1697 0,0,0,0,0,4,44,6,
1698 255,255,127,255,255,255,127,255,
1699 0,0,0,0,254,255,255,255,
1700 255,255,255,255,255,255,255,127,
1701 0,0,0,0,254,255,255,255,
1702 255,255,255,255,255,255,255,255,
1703 0,2,0,0,255,255,255,255,
1704 255,255,255,255,255,255,255,127,
1705 0,0,0,0,255,255,255,255,
1706 255,255,255,255,255,255,255,255,
1707 0,0,0,0,254,255,0,252,
1708 1,0,0,248,1,0,0,120,
1709 0,0,0,0,254,255,255,255,
1710 0,0,128,0,0,0,128,0,
1711 255,255,255,255,0,0,0,0,
1712 0,0,0,0,0,0,0,128,
1713 255,255,255,255,0,0,0,0,
1714 0,0,0,0,0,0,0,0,
1715 128,0,0,0,0,0,0,0,
1716 0,1,1,0,1,1,0,0,
1717 0,0,0,0,0,0,0,0,
1718 0,0,0,0,0,0,0,0,
1719 1,0,0,0,128,0,0,0,
1720 128,128,128,128,0,0,128,0,
1721 28,28,28,28,28,28,28,28,
1722 28,28,0,0,0,0,0,128,
1723 0,26,26,26,26,26,26,18,
1724 18,18,18,18,18,18,18,18,
1725 18,18,18,18,18,18,18,18,
1726 18,18,18,128,128,0,128,16,
1727 0,26,26,26,26,26,26,18,
1728 18,18,18,18,18,18,18,18,
1729 18,18,18,18,18,18,18,18,
1730 18,18,18,128,128,0,0,0,
1731 0,0,0,0,0,1,0,0,
1732 0,0,0,0,0,0,0,0,
1733 0,0,0,0,0,0,0,0,
1734 0,0,0,0,0,0,0,0,
1735 1,0,0,0,0,0,0,0,
1736 0,0,18,0,0,0,0,0,
1737 0,0,20,20,0,18,0,0,
1738 0,20,18,0,0,0,0,0,
1739 18,18,18,18,18,18,18,18,
1740 18,18,18,18,18,18,18,18,
1741 18,18,18,18,18,18,18,0,
1742 18,18,18,18,18,18,18,18,
1743 18,18,18,18,18,18,18,18,
1744 18,18,18,18,18,18,18,18,
1745 18,18,18,18,18,18,18,0,
1746 18,18,18,18,18,18,18,18
1747 };
1748
1749
1750
1751
1752 #ifndef HAVE_STRERROR
1753 /*************************************************
1754 * Provide strerror() for non-ANSI libraries *
1755 *************************************************/
1756
1757 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1758 in their libraries, but can provide the same facility by this simple
1759 alternative function. */
1760
1761 extern int sys_nerr;
1762 extern char *sys_errlist[];
1763
1764 char *
1765 strerror(int n)
1766 {
1767 if (n < 0 || n >= sys_nerr) return "unknown error number";
1768 return sys_errlist[n];
1769 }
1770 #endif /* HAVE_STRERROR */
1771
1772
1773
1774 /*************************************************
1775 * Print newline configuration *
1776 *************************************************/
1777
1778 /*
1779 Arguments:
1780 rc the return code from PCRE_CONFIG_NEWLINE
1781 isc TRUE if called from "-C newline"
1782 Returns: nothing
1783 */
1784
1785 static void
1786 print_newline_config(int rc, BOOL isc)
1787 {
1788 const char *s = NULL;
1789 if (!isc) printf(" Newline sequence is ");
1790 switch(rc)
1791 {
1792 case CHAR_CR: s = "CR"; break;
1793 case CHAR_LF: s = "LF"; break;
1794 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1795 case -1: s = "ANY"; break;
1796 case -2: s = "ANYCRLF"; break;
1797
1798 default:
1799 printf("a non-standard value: 0x%04x\n", rc);
1800 return;
1801 }
1802
1803 printf("%s\n", s);
1804 }
1805
1806
1807
1808 /*************************************************
1809 * JIT memory callback *
1810 *************************************************/
1811
1812 static pcre_jit_stack* jit_callback(void *arg)
1813 {
1814 jit_was_used = TRUE;
1815 return (pcre_jit_stack *)arg;
1816 }
1817
1818
1819 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1820 /*************************************************
1821 * Convert UTF-8 string to value *
1822 *************************************************/
1823
1824 /* This function takes one or more bytes that represents a UTF-8 character,
1825 and returns the value of the character.
1826
1827 Argument:
1828 utf8bytes a pointer to the byte vector
1829 vptr a pointer to an int to receive the value
1830
1831 Returns: > 0 => the number of bytes consumed
1832 -6 to 0 => malformed UTF-8 character at offset = (-return)
1833 */
1834
1835 static int
1836 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1837 {
1838 pcre_uint32 c = *utf8bytes++;
1839 pcre_uint32 d = c;
1840 int i, j, s;
1841
1842 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1843 {
1844 if ((d & 0x80) == 0) break;
1845 d <<= 1;
1846 }
1847
1848 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1849 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1850
1851 /* i now has a value in the range 1-5 */
1852
1853 s = 6*i;
1854 d = (c & utf8_table3[i]) << s;
1855
1856 for (j = 0; j < i; j++)
1857 {
1858 c = *utf8bytes++;
1859 if ((c & 0xc0) != 0x80) return -(j+1);
1860 s -= 6;
1861 d |= (c & 0x3f) << s;
1862 }
1863
1864 /* Check that encoding was the correct unique one */
1865
1866 for (j = 0; j < utf8_table1_size; j++)
1867 if (d <= (pcre_uint32)utf8_table1[j]) break;
1868 if (j != i) return -(i+1);
1869
1870 /* Valid value */
1871
1872 *vptr = d;
1873 return i+1;
1874 }
1875 #endif /* NOUTF || SUPPORT_PCRE16 */
1876
1877
1878
1879 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1880 /*************************************************
1881 * Convert character value to UTF-8 *
1882 *************************************************/
1883
1884 /* This function takes an integer value in the range 0 - 0x7fffffff
1885 and encodes it as a UTF-8 character in 0 to 6 bytes.
1886
1887 Arguments:
1888 cvalue the character value
1889 utf8bytes pointer to buffer for result - at least 6 bytes long
1890
1891 Returns: number of characters placed in the buffer
1892 */
1893
1894 static int
1895 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1896 {
1897 register int i, j;
1898 if (cvalue > 0x7fffffffu)
1899 return -1;
1900 for (i = 0; i < utf8_table1_size; i++)
1901 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1902 utf8bytes += i;
1903 for (j = i; j > 0; j--)
1904 {
1905 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1906 cvalue >>= 6;
1907 }
1908 *utf8bytes = utf8_table2[i] | cvalue;
1909 return i + 1;
1910 }
1911 #endif
1912
1913
1914 #ifdef SUPPORT_PCRE16
1915 /*************************************************
1916 * Convert a string to 16-bit *
1917 *************************************************/
1918
1919 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1920 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1921 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1922 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1923 result is always left in buffer16.
1924
1925 Note that this function does not object to surrogate values. This is
1926 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1927 for the purpose of testing that they are correctly faulted.
1928
1929 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1930 in UTF-8 so that values greater than 255 can be handled.
1931
1932 Arguments:
1933 data TRUE if converting a data line; FALSE for a regex
1934 p points to a byte string
1935 utf true if UTF-8 (to be converted to UTF-16)
1936 len number of bytes in the string (excluding trailing zero)
1937
1938 Returns: number of 16-bit data items used (excluding trailing zero)
1939 OR -1 if a UTF-8 string is malformed
1940 OR -2 if a value > 0x10ffff is encountered
1941 OR -3 if a value > 0xffff is encountered when not in UTF mode
1942 */
1943
1944 static int
1945 to16(int data, pcre_uint8 *p, int utf, int len)
1946 {
1947 pcre_uint16 *pp;
1948
1949 if (buffer16_size < 2*len + 2)
1950 {
1951 if (buffer16 != NULL) free(buffer16);
1952 buffer16_size = 2*len + 2;
1953 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1954 if (buffer16 == NULL)
1955 {
1956 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1957 exit(1);
1958 }
1959 }
1960
1961 pp = buffer16;
1962
1963 if (!utf && !data)
1964 {
1965 while (len-- > 0) *pp++ = *p++;
1966 }
1967
1968 else
1969 {
1970 pcre_uint32 c = 0;
1971 while (len > 0)
1972 {
1973 int chlen = utf82ord(p, &c);
1974 if (chlen <= 0) return -1;
1975 if (c > 0x10ffff) return -2;
1976 p += chlen;
1977 len -= chlen;
1978 if (c < 0x10000) *pp++ = c; else
1979 {
1980 if (!utf) return -3;
1981 c -= 0x10000;
1982 *pp++ = 0xD800 | (c >> 10);
1983 *pp++ = 0xDC00 | (c & 0x3ff);
1984 }
1985 }
1986 }
1987
1988 *pp = 0;
1989 return pp - buffer16;
1990 }
1991 #endif
1992
1993 #ifdef SUPPORT_PCRE32
1994 /*************************************************
1995 * Convert a string to 32-bit *
1996 *************************************************/
1997
1998 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1999 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
2000 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
2001 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
2002 result is always left in buffer32.
2003
2004 Note that this function does not object to surrogate values. This is
2005 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
2006 for the purpose of testing that they are correctly faulted.
2007
2008 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
2009 in UTF-8 so that values greater than 255 can be handled.
2010
2011 Arguments:
2012 data TRUE if converting a data line; FALSE for a regex
2013 p points to a byte string
2014 utf true if UTF-8 (to be converted to UTF-32)
2015 len number of bytes in the string (excluding trailing zero)
2016
2017 Returns: number of 32-bit data items used (excluding trailing zero)
2018 OR -1 if a UTF-8 string is malformed
2019 OR -2 if a value > 0x10ffff is encountered
2020 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
2021 */
2022
2023 static int
2024 to32(int data, pcre_uint8 *p, int utf, int len)
2025 {
2026 pcre_uint32 *pp;
2027
2028 if (buffer32_size < 4*len + 4)
2029 {
2030 if (buffer32 != NULL) free(buffer32);
2031 buffer32_size = 4*len + 4;
2032 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
2033 if (buffer32 == NULL)
2034 {
2035 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
2036 exit(1);
2037 }
2038 }
2039
2040 pp = buffer32;
2041
2042 if (!utf && !data)
2043 {
2044 while (len-- > 0) *pp++ = *p++;
2045 }
2046
2047 else
2048 {
2049 pcre_uint32 c = 0;
2050 while (len > 0)
2051 {
2052 int chlen = utf82ord(p, &c);
2053 if (chlen <= 0) return -1;
2054 if (utf)
2055 {
2056 if (c > 0x10ffff) return -2;
2057 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
2058 }
2059
2060 p += chlen;
2061 len -= chlen;
2062 *pp++ = c;
2063 }
2064 }
2065
2066 *pp = 0;
2067 return pp - buffer32;
2068 }
2069
2070 /* Check that a 32-bit character string is valid UTF-32.
2071
2072 Arguments:
2073 string points to the string
2074 length length of string, or -1 if the string is zero-terminated
2075
2076 Returns: TRUE if the string is a valid UTF-32 string
2077 FALSE otherwise
2078 */
2079
2080 #ifdef SUPPORT_UTF
2081 static BOOL
2082 valid_utf32(pcre_uint32 *string, int length)
2083 {
2084 register pcre_uint32 *p;
2085 register pcre_uint32 c;
2086
2087 for (p = string; length-- > 0; p++)
2088 {
2089 c = *p;
2090
2091 if (c > 0x10ffffu)
2092 return FALSE;
2093
2094 /* A surrogate */
2095 if ((c & 0xfffff800u) == 0xd800u)
2096 return FALSE;
2097
2098 /* Non-character */
2099 if ((c & 0xfffeu) == 0xfffeu || (c >= 0xfdd0u && c <= 0xfdefu))
2100 return FALSE;
2101 }
2102
2103 return TRUE;
2104 }
2105 #endif /* SUPPORT_UTF */
2106
2107 #endif
2108
2109 /*************************************************
2110 * Read or extend an input line *
2111 *************************************************/
2112
2113 /* Input lines are read into buffer, but both patterns and data lines can be
2114 continued over multiple input lines. In addition, if the buffer fills up, we
2115 want to automatically expand it so as to be able to handle extremely large
2116 lines that are needed for certain stress tests. When the input buffer is
2117 expanded, the other two buffers must also be expanded likewise, and the
2118 contents of pbuffer, which are a copy of the input for callouts, must be
2119 preserved (for when expansion happens for a data line). This is not the most
2120 optimal way of handling this, but hey, this is just a test program!
2121
2122 Arguments:
2123 f the file to read
2124 start where in buffer to start (this *must* be within buffer)
2125 prompt for stdin or readline()
2126
2127 Returns: pointer to the start of new data
2128 could be a copy of start, or could be moved
2129 NULL if no data read and EOF reached
2130 */
2131
2132 static pcre_uint8 *
2133 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
2134 {
2135 pcre_uint8 *here = start;
2136
2137 for (;;)
2138 {
2139 size_t rlen = (size_t)(buffer_size - (here - buffer));
2140
2141 if (rlen > 1000)
2142 {
2143 int dlen;
2144
2145 /* If libreadline or libedit support is required, use readline() to read a
2146 line if the input is a terminal. Note that readline() removes the trailing
2147 newline, so we must put it back again, to be compatible with fgets(). */
2148
2149 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2150 if (isatty(fileno(f)))
2151 {
2152 size_t len;
2153 char *s = readline(prompt);
2154 if (s == NULL) return (here == start)? NULL : start;
2155 len = strlen(s);
2156 if (len > 0) add_history(s);
2157 if (len > rlen - 1) len = rlen - 1;
2158 memcpy(here, s, len);
2159 here[len] = '\n';
2160 here[len+1] = 0;
2161 free(s);
2162 }
2163 else
2164 #endif
2165
2166 /* Read the next line by normal means, prompting if the file is stdin. */
2167
2168 {
2169 if (f == stdin) printf("%s", prompt);
2170 if (fgets((char *)here, rlen, f) == NULL)
2171 return (here == start)? NULL : start;
2172 }
2173
2174 dlen = (int)strlen((char *)here);
2175 if (dlen > 0 && here[dlen - 1] == '\n') return start;
2176 here += dlen;
2177 }
2178
2179 else
2180 {
2181 int new_buffer_size = 2*buffer_size;
2182 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
2183 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
2184
2185 if (new_buffer == NULL || new_pbuffer == NULL)
2186 {
2187 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
2188 exit(1);
2189 }
2190
2191 memcpy(new_buffer, buffer, buffer_size);
2192 memcpy(new_pbuffer, pbuffer, buffer_size);
2193
2194 buffer_size = new_buffer_size;
2195
2196 start = new_buffer + (start - buffer);
2197 here = new_buffer + (here - buffer);
2198
2199 free(buffer);
2200 free(pbuffer);
2201
2202 buffer = new_buffer;
2203 pbuffer = new_pbuffer;
2204 }
2205 }
2206
2207 return NULL; /* Control never gets here */
2208 }
2209
2210
2211
2212 /*************************************************
2213 * Read number from string *
2214 *************************************************/
2215
2216 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
2217 around with conditional compilation, just do the job by hand. It is only used
2218 for unpicking arguments, so just keep it simple.
2219
2220 Arguments:
2221 str string to be converted
2222 endptr where to put the end pointer
2223
2224 Returns: the unsigned long
2225 */
2226
2227 static int
2228 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
2229 {
2230 int result = 0;
2231 while(*str != 0 && isspace(*str)) str++;
2232 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
2233 *endptr = str;
2234 return(result);
2235 }
2236
2237
2238
2239 /*************************************************
2240 * Print one character *
2241 *************************************************/
2242
2243 /* Print a single character either literally, or as a hex escape. */
2244
2245 static int pchar(pcre_uint32 c, FILE *f)
2246 {
2247 int n = 0;
2248 if (PRINTOK(c))
2249 {
2250 if (f != NULL) fprintf(f, "%c", c);
2251 return 1;
2252 }
2253
2254 if (c < 0x100)
2255 {
2256 if (use_utf)
2257 {
2258 if (f != NULL) fprintf(f, "\\x{%02x}", c);
2259 return 6;
2260 }
2261 else
2262 {
2263 if (f != NULL) fprintf(f, "\\x%02x", c);
2264 return 4;
2265 }
2266 }
2267
2268 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2269 return n >= 0 ? n : 0;
2270 }
2271
2272
2273
2274 #ifdef SUPPORT_PCRE8
2275 /*************************************************
2276 * Print 8-bit character string *
2277 *************************************************/
2278
2279 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2280 If handed a NULL file, just counts chars without printing. */
2281
2282 static int pchars(pcre_uint8 *p, int length, FILE *f)
2283 {
2284 pcre_uint32 c = 0;
2285 int yield = 0;
2286
2287 if (length < 0)
2288 length = strlen((char *)p);
2289
2290 while (length-- > 0)
2291 {
2292 #if !defined NOUTF
2293 if (use_utf)
2294 {
2295 int rc = utf82ord(p, &c);
2296 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2297 {
2298 length -= rc - 1;
2299 p += rc;
2300 yield += pchar(c, f);
2301 continue;
2302 }
2303 }
2304 #endif
2305 c = *p++;
2306 yield += pchar(c, f);
2307 }
2308
2309 return yield;
2310 }
2311 #endif
2312
2313
2314
2315 #ifdef SUPPORT_PCRE16
2316 /*************************************************
2317 * Find length of 0-terminated 16-bit string *
2318 *************************************************/
2319
2320 static int strlen16(PCRE_SPTR16 p)
2321 {
2322 int len = 0;
2323 while (*p++ != 0) len++;
2324 return len;
2325 }
2326 #endif /* SUPPORT_PCRE16 */
2327
2328
2329
2330 #ifdef SUPPORT_PCRE32
2331 /*************************************************
2332 * Find length of 0-terminated 32-bit string *
2333 *************************************************/
2334
2335 static int strlen32(PCRE_SPTR32 p)
2336 {
2337 int len = 0;
2338 while (*p++ != 0) len++;
2339 return len;
2340 }
2341 #endif /* SUPPORT_PCRE32 */
2342
2343
2344
2345 #ifdef SUPPORT_PCRE16
2346 /*************************************************
2347 * Print 16-bit character string *
2348 *************************************************/
2349
2350 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2351 If handed a NULL file, just counts chars without printing. */
2352
2353 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2354 {
2355 int yield = 0;
2356
2357 if (length < 0)
2358 length = strlen16(p);
2359
2360 while (length-- > 0)
2361 {
2362 pcre_uint32 c = *p++ & 0xffff;
2363 #if !defined NOUTF
2364 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2365 {
2366 int d = *p & 0xffff;
2367 if (d >= 0xDC00 && d < 0xDFFF)
2368 {
2369 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2370 length--;
2371 p++;
2372 }
2373 }
2374 #endif
2375 yield += pchar(c, f);
2376 }
2377
2378 return yield;
2379 }
2380 #endif /* SUPPORT_PCRE16 */
2381
2382
2383
2384 #ifdef SUPPORT_PCRE32
2385 /*************************************************
2386 * Print 32-bit character string *
2387 *************************************************/
2388
2389 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2390 If handed a NULL file, just counts chars without printing. */
2391
2392 #define UTF32_MASK (0x1fffffu)
2393
2394 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2395 {
2396 int yield = 0;
2397
2398 if (length < 0)
2399 length = strlen32(p);
2400
2401 while (length-- > 0)
2402 {
2403 pcre_uint32 c = *p++;
2404 if (utf) c &= UTF32_MASK;
2405 yield += pchar(c, f);
2406 }
2407
2408 return yield;
2409 }
2410 #endif /* SUPPORT_PCRE32 */
2411
2412
2413
2414 #ifdef SUPPORT_PCRE8
2415 /*************************************************
2416 * Read a capture name (8-bit) and check it *
2417 *************************************************/
2418
2419 static pcre_uint8 *
2420 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2421 {
2422 pcre_uint8 *npp = *pp;
2423 while (isalnum(*p)) *npp++ = *p++;
2424 *npp++ = 0;
2425 *npp = 0;
2426 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2427 {
2428 fprintf(outfile, "no parentheses with name \"");
2429 PCHARSV(*pp, 0, -1, outfile);
2430 fprintf(outfile, "\"\n");
2431 }
2432
2433 *pp = npp;
2434 return p;
2435 }
2436 #endif /* SUPPORT_PCRE8 */
2437
2438
2439
2440 #ifdef SUPPORT_PCRE16
2441 /*************************************************
2442 * Read a capture name (16-bit) and check it *
2443 *************************************************/
2444
2445 /* Note that the text being read is 8-bit. */
2446
2447 static pcre_uint8 *
2448 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2449 {
2450 pcre_uint16 *npp = *pp;
2451 while (isalnum(*p)) *npp++ = *p++;
2452 *npp++ = 0;
2453 *npp = 0;
2454 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2455 {
2456 fprintf(outfile, "no parentheses with name \"");
2457 PCHARSV(*pp, 0, -1, outfile);
2458 fprintf(outfile, "\"\n");
2459 }
2460 *pp = npp;
2461 return p;
2462 }
2463 #endif /* SUPPORT_PCRE16 */
2464
2465
2466
2467 #ifdef SUPPORT_PCRE32
2468 /*************************************************
2469 * Read a capture name (32-bit) and check it *
2470 *************************************************/
2471
2472 /* Note that the text being read is 8-bit. */
2473
2474 static pcre_uint8 *
2475 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2476 {
2477 pcre_uint32 *npp = *pp;
2478 while (isalnum(*p)) *npp++ = *p++;
2479 *npp++ = 0;
2480 *npp = 0;
2481 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2482 {
2483 fprintf(outfile, "no parentheses with name \"");
2484 PCHARSV(*pp, 0, -1, outfile);
2485 fprintf(outfile, "\"\n");
2486 }
2487 *pp = npp;
2488 return p;
2489 }
2490 #endif /* SUPPORT_PCRE32 */
2491
2492
2493
2494 /*************************************************
2495 * Callout function *
2496 *************************************************/
2497
2498 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2499 the match. Yield zero unless more callouts than the fail count, or the callout
2500 data is not zero. */
2501
2502 static int callout(pcre_callout_block *cb)
2503 {
2504 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2505 int i, pre_start, post_start, subject_length;
2506
2507 if (callout_extra)
2508 {
2509 fprintf(f, "Callout %d: last capture = %d\n",
2510 cb->callout_number, cb->capture_last);
2511
2512 for (i = 0; i < cb->capture_top * 2; i += 2)
2513 {
2514 if (cb->offset_vector[i] < 0)
2515 fprintf(f, "%2d: <unset>\n", i/2);
2516 else
2517 {
2518 fprintf(f, "%2d: ", i/2);
2519 PCHARSV(cb->subject, cb->offset_vector[i],
2520 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2521 fprintf(f, "\n");
2522 }
2523 }
2524 }
2525
2526 /* Re-print the subject in canonical form, the first time or if giving full
2527 datails. On subsequent calls in the same match, we use pchars just to find the
2528 printed lengths of the substrings. */
2529
2530 if (f != NULL) fprintf(f, "--->");
2531
2532 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2533 PCHARS(post_start, cb->subject, cb->start_match,
2534 cb->current_position - cb->start_match, f);
2535
2536 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2537
2538 PCHARSV(cb->subject, cb->current_position,
2539 cb->subject_length - cb->current_position, f);
2540
2541 if (f != NULL) fprintf(f, "\n");
2542
2543 /* Always print appropriate indicators, with callout number if not already
2544 shown. For automatic callouts, show the pattern offset. */
2545
2546 if (cb->callout_number == 255)
2547 {
2548 fprintf(outfile, "%+3d ", cb->pattern_position);
2549 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2550 }
2551 else
2552 {
2553 if (callout_extra) fprintf(outfile, " ");
2554 else fprintf(outfile, "%3d ", cb->callout_number);
2555 }
2556
2557 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2558 fprintf(outfile, "^");
2559
2560 if (post_start > 0)
2561 {
2562 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2563 fprintf(outfile, "^");
2564 }
2565
2566 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2567 fprintf(outfile, " ");
2568
2569 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2570 pbuffer + cb->pattern_position);
2571
2572 fprintf(outfile, "\n");
2573 first_callout = 0;
2574
2575 if (cb->mark != last_callout_mark)
2576 {
2577 if (cb->mark == NULL)
2578 fprintf(outfile, "Latest Mark: <unset>\n");
2579 else
2580 {
2581 fprintf(outfile, "Latest Mark: ");
2582 PCHARSV(cb->mark, 0, -1, outfile);
2583 putc('\n', outfile);
2584 }
2585 last_callout_mark = cb->mark;
2586 }
2587
2588 if (cb->callout_data != NULL)
2589 {
2590 int callout_data = *((int *)(cb->callout_data));
2591 if (callout_data != 0)
2592 {
2593 fprintf(outfile, "Callout data = %d\n", callout_data);
2594 return callout_data;
2595 }
2596 }
2597
2598 return (cb->callout_number != callout_fail_id)? 0 :
2599 (++callout_count >= callout_fail_count)? 1 : 0;
2600 }
2601
2602
2603 /*************************************************
2604 * Local malloc functions *
2605 *************************************************/
2606
2607 /* Alternative malloc function, to test functionality and save the size of a
2608 compiled re, which is the first store request that pcre_compile() makes. The
2609 show_malloc variable is set only during matching. */
2610
2611 static void *new_malloc(size_t size)
2612 {
2613 void *block = malloc(size);
2614 gotten_store = size;
2615 if (first_gotten_store == 0) first_gotten_store = size;
2616 if (show_malloc)
2617 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2618 return block;
2619 }
2620
2621 static void new_free(void *block)
2622 {
2623 if (show_malloc)
2624 fprintf(outfile, "free %p\n", block);
2625 free(block);
2626 }
2627
2628 /* For recursion malloc/free, to test stacking calls */
2629
2630 static void *stack_malloc(size_t size)
2631 {
2632 void *block = malloc(size);
2633 if (show_malloc)
2634 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2635 return block;
2636 }
2637
2638 static void stack_free(void *block)
2639 {
2640 if (show_malloc)
2641 fprintf(outfile, "stack_free %p\n", block);
2642 free(block);
2643 }
2644
2645
2646 /*************************************************
2647 * Call pcre_fullinfo() *
2648 *************************************************/
2649
2650 /* Get one piece of information from the pcre_fullinfo() function. When only
2651 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2652 value, but the code is defensive.
2653
2654 Arguments:
2655 re compiled regex
2656 study study data
2657 option PCRE_INFO_xxx option
2658 ptr where to put the data
2659
2660 Returns: 0 when OK, < 0 on error
2661 */
2662
2663 static int
2664 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2665 {
2666 int rc;
2667
2668 if (pcre_mode == PCRE32_MODE)
2669 #ifdef SUPPORT_PCRE32
2670 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2671 #else
2672 rc = PCRE_ERROR_BADMODE;
2673 #endif
2674 else if (pcre_mode == PCRE16_MODE)
2675 #ifdef SUPPORT_PCRE16
2676 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2677 #else
2678 rc = PCRE_ERROR_BADMODE;
2679 #endif
2680 else
2681 #ifdef SUPPORT_PCRE8
2682 rc = pcre_fullinfo(re, study, option, ptr);
2683 #else
2684 rc = PCRE_ERROR_BADMODE;
2685 #endif
2686
2687 if (rc < 0)
2688 {
2689 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2690 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2691 if (rc == PCRE_ERROR_BADMODE)
2692 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2693 "%d-bit mode\n", 8 * CHAR_SIZE,
2694 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2695 }
2696
2697 return rc;
2698 }
2699
2700
2701
2702 /*************************************************
2703 * Swap byte functions *
2704 *************************************************/
2705
2706 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2707 value, respectively.
2708
2709 Arguments:
2710 value any number
2711
2712 Returns: the byte swapped value
2713 */
2714
2715 static pcre_uint32
2716 swap_uint32(pcre_uint32 value)
2717 {
2718 return ((value & 0x000000ff) << 24) |
2719 ((value & 0x0000ff00) << 8) |
2720 ((value & 0x00ff0000) >> 8) |
2721 (value >> 24);
2722 }
2723
2724 static pcre_uint16
2725 swap_uint16(pcre_uint16 value)
2726 {
2727 return (value >> 8) | (value << 8);
2728 }
2729
2730
2731
2732 /*************************************************
2733 * Flip bytes in a compiled pattern *
2734 *************************************************/
2735
2736 /* This function is called if the 'F' option was present on a pattern that is
2737 to be written to a file. We flip the bytes of all the integer fields in the
2738 regex data block and the study block. In 16-bit mode this also flips relevant
2739 bytes in the pattern itself. This is to make it possible to test PCRE's
2740 ability to reload byte-flipped patterns, e.g. those compiled on a different
2741 architecture. */
2742
2743 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2744 static void
2745 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2746 {
2747 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2748 #ifdef SUPPORT_PCRE16
2749 int op;
2750 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2751 int length = re->name_count * re->name_entry_size;
2752 #ifdef SUPPORT_UTF
2753 BOOL utf = (re->options & PCRE_UTF16) != 0;
2754 BOOL utf16_char = FALSE;
2755 #endif /* SUPPORT_UTF */
2756 #endif /* SUPPORT_PCRE16 */
2757
2758 /* Always flip the bytes in the main data block and study blocks. */
2759
2760 re->magic_number = REVERSED_MAGIC_NUMBER;
2761 re->size = swap_uint32(re->size);
2762 re->options = swap_uint32(re->options);
2763 re->flags = swap_uint16(re->flags);
2764 re->top_bracket = swap_uint16(re->top_bracket);
2765 re->top_backref = swap_uint16(re->top_backref);
2766 re->first_char = swap_uint16(re->first_char);
2767 re->req_char = swap_uint16(re->req_char);
2768 re->name_table_offset = swap_uint16(re->name_table_offset);
2769 re->name_entry_size = swap_uint16(re->name_entry_size);
2770 re->name_count = swap_uint16(re->name_count);
2771
2772 if (extra != NULL)
2773 {
2774 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2775 rsd->size = swap_uint32(rsd->size);
2776 rsd->flags = swap_uint32(rsd->flags);
2777 rsd->minlength = swap_uint32(rsd->minlength);
2778 }
2779
2780 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2781 in the name table, if present, and then in the pattern itself. */
2782
2783 #ifdef SUPPORT_PCRE16
2784 if (pcre_mode != PCRE16_MODE) return;
2785
2786 while(TRUE)
2787 {
2788 /* Swap previous characters. */
2789 while (length-- > 0)
2790 {
2791 *ptr = swap_uint16(*ptr);
2792 ptr++;
2793 }
2794 #ifdef SUPPORT_UTF
2795 if (utf16_char)
2796 {
2797 if ((ptr[-1] & 0xfc00) == 0xd800)
2798 {
2799 /* We know that there is only one extra character in UTF-16. */
2800 *ptr = swap_uint16(*ptr);
2801 ptr++;
2802 }
2803 }
2804 utf16_char = FALSE;
2805 #endif /* SUPPORT_UTF */
2806
2807 /* Get next opcode. */
2808
2809 length = 0;
2810 op = *ptr;
2811 *ptr++ = swap_uint16(op);
2812
2813 switch (op)
2814 {
2815 case OP_END:
2816 return;
2817
2818 #ifdef SUPPORT_UTF
2819 case OP_CHAR:
2820 case OP_CHARI:
2821 case OP_NOT:
2822 case OP_NOTI:
2823 case OP_STAR:
2824 case OP_MINSTAR:
2825 case OP_PLUS:
2826 case OP_MINPLUS:
2827 case OP_QUERY:
2828 case OP_MINQUERY:
2829 case OP_UPTO:
2830 case OP_MINUPTO:
2831 case OP_EXACT:
2832 case OP_POSSTAR:
2833 case OP_POSPLUS:
2834 case OP_POSQUERY:
2835 case OP_POSUPTO:
2836 case OP_STARI:
2837 case OP_MINSTARI:
2838 case OP_PLUSI:
2839 case OP_MINPLUSI:
2840 case OP_QUERYI:
2841 case OP_MINQUERYI:
2842 case OP_UPTOI:
2843 case OP_MINUPTOI:
2844 case OP_EXACTI:
2845 case OP_POSSTARI:
2846 case OP_POSPLUSI:
2847 case OP_POSQUERYI:
2848 case OP_POSUPTOI:
2849 case OP_NOTSTAR:
2850 case OP_NOTMINSTAR:
2851 case OP_NOTPLUS:
2852 case OP_NOTMINPLUS:
2853 case OP_NOTQUERY:
2854 case OP_NOTMINQUERY:
2855 case OP_NOTUPTO:
2856 case OP_NOTMINUPTO:
2857 case OP_NOTEXACT:
2858 case OP_NOTPOSSTAR:
2859 case OP_NOTPOSPLUS:
2860 case OP_NOTPOSQUERY:
2861 case OP_NOTPOSUPTO:
2862 case OP_NOTSTARI:
2863 case OP_NOTMINSTARI:
2864 case OP_NOTPLUSI:
2865 case OP_NOTMINPLUSI:
2866 case OP_NOTQUERYI:
2867 case OP_NOTMINQUERYI:
2868 case OP_NOTUPTOI:
2869 case OP_NOTMINUPTOI:
2870 case OP_NOTEXACTI:
2871 case OP_NOTPOSSTARI:
2872 case OP_NOTPOSPLUSI:
2873 case OP_NOTPOSQUERYI:
2874 case OP_NOTPOSUPTOI:
2875 if (utf) utf16_char = TRUE;
2876 #endif
2877 /* Fall through. */
2878
2879 default:
2880 length = OP_lengths16[op] - 1;
2881 break;
2882
2883 case OP_CLASS:
2884 case OP_NCLASS:
2885 /* Skip the character bit map. */
2886 ptr += 32/sizeof(pcre_uint16);
2887 length = 0;
2888 break;
2889
2890 case OP_XCLASS:
2891 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2892 if (LINK_SIZE > 1)
2893 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2894 - (1 + LINK_SIZE + 1));
2895 else
2896 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2897
2898 /* Reverse the size of the XCLASS instance. */
2899 *ptr = swap_uint16(*ptr);
2900 ptr++;
2901 if (LINK_SIZE > 1)
2902 {
2903 *ptr = swap_uint16(*ptr);
2904 ptr++;
2905 }
2906
2907 op = *ptr;
2908 *ptr = swap_uint16(op);
2909 ptr++;
2910 if ((op & XCL_MAP) != 0)
2911 {
2912 /* Skip the character bit map. */
2913 ptr += 32/sizeof(pcre_uint16);
2914 length -= 32/sizeof(pcre_uint16);
2915 }
2916 break;
2917 }
2918 }
2919 /* Control should never reach here in 16 bit mode. */
2920 #endif /* SUPPORT_PCRE16 */
2921 }
2922 #endif /* SUPPORT_PCRE[8|16] */
2923
2924
2925
2926 #if defined SUPPORT_PCRE32
2927 static void
2928 regexflip_32(pcre *ere, pcre_extra *extra)
2929 {
2930 real_pcre32 *re = (real_pcre32 *)ere;
2931 int op;
2932 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2933 int length = re->name_count * re->name_entry_size;
2934
2935 /* Always flip the bytes in the main data block and study blocks. */
2936
2937 re->magic_number = REVERSED_MAGIC_NUMBER;
2938 re->size = swap_uint32(re->size);
2939 re->options = swap_uint32(re->options);
2940 re->flags = swap_uint16(re->flags);
2941 re->top_bracket = swap_uint16(re->top_bracket);
2942 re->top_backref = swap_uint16(re->top_backref);
2943 re->first_char = swap_uint32(re->first_char);
2944 re->req_char = swap_uint32(re->req_char);
2945 re->name_table_offset = swap_uint16(re->name_table_offset);
2946 re->name_entry_size = swap_uint16(re->name_entry_size);
2947 re->name_count = swap_uint16(re->name_count);
2948
2949 if (extra != NULL)
2950 {
2951 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2952 rsd->size = swap_uint32(rsd->size);
2953 rsd->flags = swap_uint32(rsd->flags);
2954 rsd->minlength = swap_uint32(rsd->minlength);
2955 }
2956
2957 /* In 32-bit mode we must swap bytes
2958 in the name table, if present, and then in the pattern itself. */
2959
2960 while(TRUE)
2961 {
2962 /* Swap previous characters. */
2963 while (length-- > 0)
2964 {
2965 *ptr = swap_uint32(*ptr);
2966 ptr++;
2967 }
2968
2969 /* Get next opcode. */
2970
2971 length = 0;
2972 op = *ptr;
2973 *ptr++ = swap_uint32(op);
2974
2975 switch (op)
2976 {
2977 case OP_END:
2978 return;
2979
2980 default:
2981 length = OP_lengths32[op] - 1;
2982 break;
2983
2984 case OP_CLASS:
2985 case OP_NCLASS:
2986 /* Skip the character bit map. */
2987 ptr += 32/sizeof(pcre_uint32);
2988 length = 0;
2989 break;
2990
2991 case OP_XCLASS:
2992 /* LINK_SIZE can only be 1 in 32-bit mode. */
2993 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2994
2995 /* Reverse the size of the XCLASS instance. */
2996 *ptr = swap_uint32(*ptr);
2997 ptr++;
2998
2999 op = *ptr;
3000 *ptr = swap_uint32(op);
3001 ptr++;
3002 if ((op & XCL_MAP) != 0)
3003 {
3004 /* Skip the character bit map. */
3005 ptr += 32/sizeof(pcre_uint32);
3006 length -= 32/sizeof(pcre_uint32);
3007 }
3008 break;
3009 }
3010 }
3011 /* Control should never reach here in 32 bit mode. */
3012 }
3013
3014 #endif /* SUPPORT_PCRE32 */
3015
3016
3017
3018 static void
3019 regexflip(pcre *ere, pcre_extra *extra)
3020 {
3021 #if defined SUPPORT_PCRE32
3022 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
3023 regexflip_32(ere, extra);
3024 #endif
3025 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
3026 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
3027 regexflip8_or_16(ere, extra);
3028 #endif
3029 }
3030
3031
3032
3033 /*************************************************
3034 * Check match or recursion limit *
3035 *************************************************/
3036
3037 static int
3038 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
3039 int start_offset, int options, int *use_offsets, int use_size_offsets,
3040 int flag, unsigned long int *limit, int errnumber, const char *msg)
3041 {
3042 int count;
3043 int min = 0;
3044 int mid = 64;
3045 int max = -1;
3046
3047 extra->flags |= flag;
3048
3049 for (;;)
3050 {
3051 *limit = mid;
3052
3053 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
3054 use_offsets, use_size_offsets);
3055
3056 if (count == errnumber)
3057 {
3058 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
3059 min = mid;
3060 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
3061 }
3062
3063 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
3064 count == PCRE_ERROR_PARTIAL)
3065 {
3066 if (mid == min + 1)
3067 {
3068 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
3069 break;
3070 }
3071 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
3072 max = mid;
3073 mid = (min + mid)/2;
3074 }
3075 else break; /* Some other error */
3076 }
3077
3078 extra->flags &= ~flag;
3079 return count;
3080 }
3081
3082
3083
3084 /*************************************************
3085 * Case-independent strncmp() function *
3086 *************************************************/
3087
3088 /*
3089 Arguments:
3090 s first string
3091 t second string
3092 n number of characters to compare
3093
3094 Returns: < 0, = 0, or > 0, according to the comparison
3095 */
3096
3097 static int
3098 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
3099 {
3100 while (n--)
3101 {
3102 int c = tolower(*s++) - tolower(*t++);
3103 if (c) return c;
3104 }
3105 return 0;
3106 }
3107
3108
3109
3110 /*************************************************
3111 * Check newline indicator *
3112 *************************************************/
3113
3114 /* This is used both at compile and run-time to check for <xxx> escapes. Print
3115 a message and return 0 if there is no match.
3116
3117 Arguments:
3118 p points after the leading '<'
3119 f file for error message
3120
3121 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
3122 */
3123
3124 static int
3125 check_newline(pcre_uint8 *p, FILE *f)
3126 {
3127 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
3128 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
3129 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
3130 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
3131 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
3132 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
3133 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
3134 fprintf(f, "Unknown newline type at: <%s\n", p);
3135 return 0;
3136 }
3137
3138
3139
3140 /*************************************************
3141 * Usage function *
3142 *************************************************/
3143
3144 static void
3145 usage(void)
3146 {
3147 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
3148 printf("Input and output default to stdin and stdout.\n");
3149 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3150 printf("If input is a terminal, readline() is used to read from it.\n");
3151 #else
3152 printf("This version of pcretest is not linked with readline().\n");
3153 #endif
3154 printf("\nOptions:\n");
3155 #ifdef SUPPORT_PCRE16
3156 printf(" -16 use the 16-bit library\n");
3157 #endif
3158 #ifdef SUPPORT_PCRE32
3159 printf(" -32 use the 32-bit library\n");
3160 #endif
3161 printf(" -b show compiled code\n");
3162 printf(" -C show PCRE compile-time options and exit\n");
3163 printf(" -C arg show a specific compile-time option\n");
3164 printf(" and exit with its value. The arg can be:\n");
3165 printf(" linksize internal link size [2, 3, 4]\n");
3166 printf(" pcre8 8 bit library support enabled [0, 1]\n");
3167 printf(" pcre16 16 bit library support enabled [0, 1]\n");
3168 printf(" pcre32 32 bit library support enabled [0, 1]\n");
3169 printf(" utf Unicode Transformation Format supported [0, 1]\n");
3170 printf(" ucp Unicode Properties supported [0, 1]\n");
3171 printf(" jit Just-in-time compiler supported [0, 1]\n");
3172 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
3173 printf(" -d debug: show compiled code and information (-b and -i)\n");
3174 #if !defined NODFA
3175 printf(" -dfa force DFA matching for all subjects\n");
3176 #endif
3177 printf(" -help show usage information\n");
3178 printf(" -i show information about compiled patterns\n"
3179 " -M find MATCH_LIMIT minimum for each subject\n"
3180 " -m output memory used information\n"
3181 " -o <n> set size of offsets vector to <n>\n");
3182 #if !defined NOPOSIX
3183 printf(" -p use POSIX interface\n");
3184 #endif
3185 printf(" -q quiet: do not output PCRE version number at start\n");
3186 printf(" -S <n> set stack size to <n> megabytes\n");
3187 printf(" -s force each pattern to be studied at basic level\n"
3188 " -s+ force each pattern to be studied, using JIT if available\n"
3189 " -s++ ditto, verifying when JIT was actually used\n"
3190 " -s+n force each pattern to be studied, using JIT if available,\n"
3191 " where 1 <= n <= 7 selects JIT options\n"
3192 " -s++n ditto, verifying when JIT was actually used\n"
3193 " -t time compilation and execution\n");
3194 printf(" -t <n> time compilation and execution, repeating <n> times\n");
3195 printf(" -tm time execution (matching) only\n");
3196 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
3197 }
3198
3199
3200
3201 /*************************************************
3202 * Main Program *
3203 *************************************************/
3204
3205 /* Read lines from named file or stdin and write to named file or stdout; lines
3206 consist of a regular expression, in delimiters and optionally followed by
3207 options, followed by a set of test data, terminated by an empty line. */
3208
3209 int main(int argc, char **argv)
3210 {
3211 FILE *infile = stdin;
3212 const char *version;
3213 int options = 0;
3214 int study_options = 0;
3215 int default_find_match_limit = FALSE;
3216 int op = 1;
3217 int timeit = 0;
3218 int timeitm = 0;
3219 int showinfo = 0;
3220 int showstore = 0;
3221 int force_study = -1;
3222 int force_study_options = 0;
3223 int quiet = 0;
3224 int size_offsets = 45;
3225 int size_offsets_max;
3226 int *offsets = NULL;
3227 int debug = 0;
3228 int done = 0;
3229 int all_use_dfa = 0;
3230 int verify_jit = 0;
3231 int yield = 0;
3232 #ifdef SUPPORT_PCRE32
3233 int mask_utf32 = 0;
3234 #endif
3235 int stack_size;
3236 pcre_uint8 *dbuffer = NULL;
3237 size_t dbuffer_size = 1u << 14;
3238
3239 #if !defined NOPOSIX
3240 int posix = 0;
3241 #endif
3242 #if !defined NODFA
3243 int *dfa_workspace = NULL;
3244 #endif
3245
3246 pcre_jit_stack *jit_stack = NULL;
3247
3248 /* These vectors store, end-to-end, a list of zero-terminated captured
3249 substring names, each list itself being terminated by an empty name. Assume
3250 that 1024 is plenty long enough for the few names we'll be testing. It is
3251 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
3252 for the actual memory, to ensure alignment. */
3253
3254 pcre_uint32 copynames[1024];
3255 pcre_uint32 getnames[1024];
3256
3257 #ifdef SUPPORT_PCRE32
3258 pcre_uint32 *cn32ptr;
3259 pcre_uint32 *gn32ptr;
3260 #endif
3261
3262 #ifdef SUPPORT_PCRE16
3263 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
3264 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
3265 pcre_uint16 *cn16ptr;
3266 pcre_uint16 *gn16ptr;
3267 #endif
3268
3269 #ifdef SUPPORT_PCRE8
3270 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
3271 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
3272 pcre_uint8 *cn8ptr;
3273 pcre_uint8 *gn8ptr;
3274 #endif
3275
3276 /* Get buffers from malloc() so that valgrind will check their misuse when
3277 debugging. They grow automatically when very long lines are read. The 16-
3278 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
3279
3280 buffer = (pcre_uint8 *)malloc(buffer_size);
3281 pbuffer = (pcre_uint8 *)malloc(buffer_size);
3282
3283 /* The outfile variable is static so that new_malloc can use it. */
3284
3285 outfile = stdout;
3286
3287 /* The following _setmode() stuff is some Windows magic that tells its runtime
3288 library to translate CRLF into a single LF character. At least, that's what
3289 I've been told: never having used Windows I take this all on trust. Originally
3290 it set 0x8000, but then I was advised that _O_BINARY was better. */
3291
3292 #if defined(_WIN32) || defined(WIN32)
3293 _setmode( _fileno( stdout ), _O_BINARY );
3294 #endif
3295
3296 /* Get the version number: both pcre_version() and pcre16_version() give the
3297 same answer. We just need to ensure that we call one that is available. */
3298
3299 #if defined SUPPORT_PCRE8
3300 version = pcre_version();
3301 #elif defined SUPPORT_PCRE16
3302 version = pcre16_version();
3303 #elif defined SUPPORT_PCRE32
3304 version = pcre32_version();
3305 #endif
3306
3307 /* Scan options */
3308
3309 while (argc > 1 && argv[op][0] == '-')
3310 {
3311 pcre_uint8 *endptr;
3312 char *arg = argv[op];
3313
3314 if (strcmp(arg, "-m") == 0) showstore = 1;
3315 else if (strcmp(arg, "-s") == 0) force_study = 0;
3316
3317 else if (strncmp(arg, "-s+", 3) == 0)
3318 {
3319 arg += 3;
3320 if (*arg == '+') { arg++; verify_jit = TRUE; }
3321 force_study = 1;
3322 if (*arg == 0)
3323 force_study_options = jit_study_bits[6];
3324 else if (*arg >= '1' && *arg <= '7')
3325 force_study_options = jit_study_bits[*arg - '1'];
3326 else goto BAD_ARG;
3327 }
3328 else if (strcmp(arg, "-8") == 0)
3329 {
3330 #ifdef SUPPORT_PCRE8
3331 pcre_mode = PCRE8_MODE;
3332 #else
3333 printf("** This version of PCRE was built without 8-bit support\n");
3334 exit(1);
3335 #endif
3336 }
3337 else if (strcmp(arg, "-16") == 0)
3338 {
3339 #ifdef SUPPORT_PCRE16
3340 pcre_mode = PCRE16_MODE;
3341 #else
3342 printf("** This version of PCRE was built without 16-bit support\n");
3343 exit(1);
3344 #endif
3345 }
3346 else if (strcmp(arg, "-32") == 0 || strcmp(arg, "-32+") == 0)
3347 {
3348 #ifdef SUPPORT_PCRE32
3349 pcre_mode = PCRE32_MODE;
3350 mask_utf32 = (strcmp(arg, "-32+") == 0);
3351 #else
3352 printf("** This version of PCRE was built without 32-bit support\n");
3353 exit(1);
3354 #endif
3355 }
3356 else if (strcmp(arg, "-q") == 0) quiet = 1;
3357 else if (strcmp(arg, "-b") == 0) debug = 1;
3358 else if (strcmp(arg, "-i") == 0) showinfo = 1;
3359 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3360 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3361 #if !defined NODFA
3362 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3363 #endif
3364 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3365 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3366 *endptr == 0))
3367 {
3368 op++;
3369 argc--;
3370 }
3371 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
3372 {
3373 int both = arg[2] == 0;
3374 int temp;
3375 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3376 *endptr == 0))
3377 {
3378 timeitm = temp;
3379 op++;
3380 argc--;
3381 }
3382 else timeitm = LOOPREPEAT;
3383 if (both) timeit = timeitm;
3384 }
3385 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3386 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3387 *endptr == 0))
3388 {
3389 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
3390 printf("PCRE: -S not supported on this OS\n");
3391 exit(1);
3392 #else
3393 int rc;
3394 struct rlimit rlim;
3395 getrlimit(RLIMIT_STACK, &rlim);
3396 rlim.rlim_cur = stack_size * 1024 * 1024;
3397 rc = setrlimit(RLIMIT_STACK, &rlim);
3398 if (rc != 0)
3399 {
3400 printf("PCRE: setrlimit() failed with error %d\n", rc);
3401 exit(1);
3402 }
3403 op++;
3404 argc--;
3405 #endif
3406 }
3407 #if !defined NOPOSIX
3408 else if (strcmp(arg, "-p") == 0) posix = 1;
3409 #endif
3410 else if (strcmp(arg, "-C") == 0)
3411 {
3412 int rc;
3413 unsigned long int lrc;
3414
3415 if (argc > 2)
3416 {
3417 if (strcmp(argv[op + 1], "linksize") == 0)
3418 {
3419 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3420 printf("%d\n", rc);
3421 yield = rc;
3422 }
3423 else if (strcmp(argv[op + 1], "pcre8") == 0)
3424 {
3425 #ifdef SUPPORT_PCRE8
3426 printf("1\n");
3427 yield = 1;
3428 #else
3429 printf("0\n");
3430 yield = 0;
3431 #endif
3432 }
3433 else if (strcmp(argv[op + 1], "pcre16") == 0)
3434 {
3435 #ifdef SUPPORT_PCRE16
3436 printf("1\n");
3437 yield = 1;
3438 #else
3439 printf("0\n");
3440 yield = 0;
3441 #endif
3442 }
3443 else if (strcmp(argv[op + 1], "pcre32") == 0)
3444 {
3445 #ifdef SUPPORT_PCRE32
3446 printf("1\n");
3447 yield = 1;
3448 #else
3449 printf("0\n");
3450 yield = 0;
3451 #endif
3452 goto EXIT;
3453 }
3454 if (strcmp(argv[op + 1], "utf") == 0)
3455 {
3456 #ifdef SUPPORT_PCRE8
3457 if (pcre_mode == PCRE8_MODE)
3458 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3459 #endif
3460 #ifdef SUPPORT_PCRE16
3461 if (pcre_mode == PCRE16_MODE)
3462 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3463 #endif
3464 #ifdef SUPPORT_PCRE32
3465 if (pcre_mode == PCRE32_MODE)
3466 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3467 #endif
3468 printf("%d\n", rc);
3469 yield = rc;
3470 goto EXIT;
3471 }
3472 else if (strcmp(argv[op + 1], "ucp") == 0)
3473 {
3474 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3475 printf("%d\n", rc);
3476 yield = rc;
3477 }
3478 else if (strcmp(argv[op + 1], "jit") == 0)
3479 {
3480 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3481 printf("%d\n", rc);
3482 yield = rc;
3483 }
3484 else if (strcmp(argv[op + 1], "newline") == 0)
3485 {
3486 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3487 print_newline_config(rc, TRUE);
3488 }
3489 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3490 {
3491 #ifdef EBCDIC
3492 printf("1\n");
3493 yield = 1;
3494 #else
3495 printf("0\n");
3496 #endif
3497 }
3498 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3499 {
3500 #ifdef EBCDIC
3501 printf("0x%02x\n", CHAR_LF);
3502 #else
3503 printf("0\n");
3504 #endif
3505 }
3506 else
3507 {
3508 printf("Unknown -C option: %s\n", argv[op + 1]);
3509 }
3510 goto EXIT;
3511 }
3512
3513 /* No argument for -C: output all configuration information. */
3514
3515 printf("PCRE version %s\n", version);
3516 printf("Compiled with\n");
3517
3518 #ifdef EBCDIC
3519 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3520 #endif
3521
3522 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3523 are set, either both UTFs are supported or both are not supported. */
3524
3525 #ifdef SUPPORT_PCRE8
3526 printf(" 8-bit support\n");
3527 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3528 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3529 #endif
3530 #ifdef SUPPORT_PCRE16
3531 printf(" 16-bit support\n");
3532 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3533 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3534 #endif
3535 #ifdef SUPPORT_PCRE32
3536 printf(" 32-bit support\n");
3537 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3538 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3539 #endif
3540
3541 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3542 printf(" %sUnicode properties support\n", rc? "" : "No ");
3543 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3544 if (rc)
3545 {
3546 const char *arch;
3547 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3548 printf(" Just-in-time compiler support: %s\n", arch);
3549 }
3550 else
3551 printf(" No just-in-time compiler support\n");
3552 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3553 print_newline_config(rc, FALSE);
3554 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3555 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3556 "all Unicode newlines");
3557 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3558 printf(" Internal link size = %d\n", rc);
3559 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3560 printf(" POSIX malloc threshold = %d\n", rc);
3561 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3562 printf(" Default match limit = %ld\n", lrc);
3563 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3564 printf(" Default recursion depth limit = %ld\n", lrc);
3565 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3566 printf(" Match recursion uses %s", rc? "stack" : "heap");
3567 if (showstore)
3568 {
3569 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3570 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3571 }
3572 printf("\n");
3573 goto EXIT;
3574 }
3575 else if (strcmp(arg, "-help") == 0 ||
3576 strcmp(arg, "--help") == 0)
3577 {
3578 usage();
3579 goto EXIT;
3580 }
3581 else
3582 {
3583 BAD_ARG:
3584 printf("** Unknown or malformed option %s\n", arg);
3585 usage();
3586 yield = 1;
3587 goto EXIT;
3588 }
3589 op++;
3590 argc--;
3591 }
3592
3593 /* Get the store for the offsets vector, and remember what it was */
3594
3595 size_offsets_max = size_offsets;
3596 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3597 if (offsets == NULL)
3598 {
3599 printf("** Failed to get %d bytes of memory for offsets vector\n",
3600 (int)(size_offsets_max * sizeof(int)));
3601 yield = 1;
3602 goto EXIT;
3603 }
3604
3605 /* Sort out the input and output files */
3606
3607 if (argc > 1)
3608 {
3609 infile = fopen(argv[op], INPUT_MODE);
3610 if (infile == NULL)
3611 {
3612 printf("** Failed to open %s\n", argv[op]);
3613 yield = 1;
3614 goto EXIT;
3615 }
3616 }
3617
3618 if (argc > 2)
3619 {
3620 outfile = fopen(argv[op+1], OUTPUT_MODE);
3621 if (outfile == NULL)
3622 {
3623 printf("** Failed to open %s\n", argv[op+1]);
3624 yield = 1;
3625 goto EXIT;
3626 }
3627 }
3628
3629 /* Set alternative malloc function */
3630
3631 #ifdef SUPPORT_PCRE8
3632 pcre_malloc = new_malloc;
3633 pcre_free = new_free;
3634 pcre_stack_malloc = stack_malloc;
3635 pcre_stack_free = stack_free;
3636 #endif
3637
3638 #ifdef SUPPORT_PCRE16
3639 pcre16_malloc = new_malloc;
3640 pcre16_free = new_free;
3641 pcre16_stack_malloc = stack_malloc;
3642 pcre16_stack_free = stack_free;
3643 #endif
3644
3645 #ifdef SUPPORT_PCRE32
3646 pcre32_malloc = new_malloc;
3647 pcre32_free = new_free;
3648 pcre32_stack_malloc = stack_malloc;
3649 pcre32_stack_free = stack_free;
3650 #endif
3651
3652 /* Heading line unless quiet, then prompt for first regex if stdin */
3653
3654 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3655
3656 /* Main loop */
3657
3658 while (!done)
3659 {
3660 pcre *re = NULL;
3661 pcre_extra *extra = NULL;
3662
3663 #if !defined NOPOSIX /* There are still compilers that require no indent */
3664 regex_t preg;
3665 int do_posix = 0;
3666 #endif
3667
3668 const char *error;
3669 pcre_uint8 *markptr;
3670 pcre_uint8 *p, *pp, *ppp;
3671 pcre_uint8 *to_file = NULL;
3672 const pcre_uint8 *tables = NULL;
3673 unsigned long int get_options;
3674 unsigned long int true_size, true_study_size = 0;
3675 size_t size, regex_gotten_store;
3676 int do_allcaps = 0;
3677 int do_mark = 0;
3678 int do_study = 0;
3679 int no_force_study = 0;
3680 int do_debug = debug;
3681 int do_G = 0;
3682 int do_g = 0;
3683 int do_showinfo = showinfo;
3684 int do_showrest = 0;
3685 int do_showcaprest = 0;
3686 int do_flip = 0;
3687 int erroroffset, len, delimiter, poffset;
3688
3689 #if !defined NODFA
3690 int dfa_matched = 0;
3691 #endif
3692
3693 use_utf = 0;
3694 debug_lengths = 1;
3695
3696 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3697 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3698 fflush(outfile);
3699
3700 p = buffer;
3701 while (isspace(*p)) p++;
3702 if (*p == 0) continue;
3703
3704 /* See if the pattern is to be loaded pre-compiled from a file. */
3705
3706 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3707 {
3708 pcre_uint32 magic;
3709 pcre_uint8 sbuf[8];
3710 FILE *f;
3711
3712 p++;
3713 if (*p == '!')
3714 {
3715 do_debug = TRUE;
3716 do_showinfo = TRUE;
3717 p++;
3718 }
3719
3720 pp = p + (int)strlen((char *)p);
3721 while (isspace(pp[-1])) pp--;
3722 *pp = 0;
3723
3724 f = fopen((char *)p, "rb");
3725 if (f == NULL)
3726 {
3727 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3728 continue;
3729 }
3730
3731 first_gotten_store = 0;
3732 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3733
3734 true_size =
3735 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3736 true_study_size =
3737 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3738
3739 re = (pcre *)new_malloc(true_size);
3740 if (re == NULL)
3741 {
3742 printf("** Failed to get %d bytes of memory for pcre object\n",
3743 (int)true_size);
3744 yield = 1;
3745 goto EXIT;
3746 }
3747 regex_gotten_store = first_gotten_store;
3748
3749 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3750
3751 magic = REAL_PCRE_MAGIC(re);
3752 if (magic != MAGIC_NUMBER)
3753 {
3754 if (swap_uint32(magic) == MAGIC_NUMBER)
3755 {
3756 do_flip = 1;
3757 }
3758 else
3759 {
3760 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3761 new_free(re);
3762 fclose(f);
3763 continue;
3764 }
3765 }
3766
3767 /* We hide the byte-invert info for little and big endian tests. */
3768 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3769 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3770
3771 /* Now see if there is any following study data. */
3772
3773 if (true_study_size != 0)
3774 {
3775 pcre_study_data *psd;
3776
3777 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3778 extra->flags = PCRE_EXTRA_STUDY_DATA;
3779
3780 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3781 extra->study_data = psd;
3782
3783 if (fread(psd, 1, true_study_size, f) != true_study_size)
3784 {
3785 FAIL_READ:
3786 fprintf(outfile, "Failed to read data from %s\n", p);
3787 if (extra != NULL)
3788 {
3789 PCRE_FREE_STUDY(extra);
3790 }
3791 new_free(re);
3792 fclose(f);
3793 continue;
3794 }
3795 fprintf(outfile, "Study data loaded from %s\n", p);
3796 do_study = 1; /* To get the data output if requested */
3797 }
3798 else fprintf(outfile, "No study data\n");
3799
3800 /* Flip the necessary bytes. */
3801 if (do_flip)
3802 {
3803 int rc;
3804 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3805 if (rc == PCRE_ERROR_BADMODE)
3806 {
3807 /* Simulate the result of the function call below. */
3808 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3809 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3810 PCRE_INFO_OPTIONS);
3811 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3812 "%d-bit mode\n", 8 * CHAR_SIZE,
3813 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3814 new_free(re);
3815 fclose(f);
3816 continue;
3817 }
3818 }
3819
3820 /* Need to know if UTF-8 for printing data strings. */
3821
3822 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3823 {
3824 new_free(re);
3825 fclose(f);
3826 continue;
3827 }
3828 use_utf = (get_options & PCRE_UTF8) != 0;
3829
3830 fclose(f);
3831 goto SHOW_INFO;
3832 }
3833
3834 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3835 the pattern; if it isn't complete, read more. */
3836
3837 delimiter = *p++;
3838
3839 if (isalnum(delimiter) || delimiter == '\\')
3840 {
3841 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3842 goto SKIP_DATA;
3843 }
3844
3845 pp = p;
3846 poffset = (int)(p - buffer);
3847
3848 for(;;)
3849 {
3850 while (*pp != 0)
3851 {
3852 if (*pp == '\\' && pp[1] != 0) pp++;
3853 else if (*pp == delimiter) break;
3854 pp++;
3855 }
3856 if (*pp != 0) break;
3857 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3858 {
3859 fprintf(outfile, "** Unexpected EOF\n");
3860 done = 1;
3861 goto CONTINUE;
3862 }
3863 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3864 }
3865
3866 /* The buffer may have moved while being extended; reset the start of data
3867 pointer to the correct relative point in the buffer. */
3868
3869 p = buffer + poffset;
3870
3871 /* If the first character after the delimiter is backslash, make
3872 the pattern end with backslash. This is purely to provide a way
3873 of testing for the error message when a pattern ends with backslash. */
3874
3875 if (pp[1] == '\\') *pp++ = '\\';
3876
3877 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3878 for callouts. */
3879
3880 *pp++ = 0;
3881 strcpy((char *)pbuffer, (char *)p);
3882
3883 /* Look for options after final delimiter */
3884
3885 options = 0;
3886 study_options = force_study_options;
3887 log_store = showstore; /* default from command line */
3888
3889 while (*pp != 0)
3890 {
3891 switch (*pp++)
3892 {
3893 case 'f': options |= PCRE_FIRSTLINE; break;
3894 case 'g': do_g = 1; break;
3895 case 'i': options |= PCRE_CASELESS; break;
3896 case 'm': options |= PCRE_MULTILINE; break;
3897 case 's': options |= PCRE_DOTALL; break;
3898 case 'x': options |= PCRE_EXTENDED; break;
3899
3900 case '+':
3901 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3902 break;
3903
3904 case '=': do_allcaps = 1; break;
3905 case 'A': options |= PCRE_ANCHORED; break;
3906 case 'B': do_debug = 1; break;
3907 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3908 case 'D': do_debug = do_showinfo = 1; break;
3909 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3910 case 'F': do_flip = 1; break;
3911 case 'G': do_G = 1; break;
3912 case 'I': do_showinfo = 1; break;
3913 case 'J': options |= PCRE_DUPNAMES; break;
3914 case 'K': do_mark = 1; break;
3915 case 'M': log_store = 1; break;
3916 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3917
3918 #if !defined NOPOSIX
3919 case 'P': do_posix = 1; break;
3920 #endif
3921
3922 case 'S':
3923 do_study = 1;
3924 for (;;)
3925 {
3926 switch (*pp++)
3927 {
3928 case 'S':
3929 do_study = 0;
3930 no_force_study = 1;
3931 break;
3932
3933 case '!':
3934 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3935 break;
3936
3937 case '+':
3938 if (*pp == '+')
3939 {
3940 verify_jit = TRUE;
3941 pp++;
3942 }
3943 if (*pp >= '1' && *pp <= '7')
3944 study_options |= jit_study_bits[*pp++ - '1'];
3945 else
3946 study_options |= jit_study_bits[6];
3947 break;
3948
3949 case '-':
3950 study_options &= ~PCRE_STUDY_ALLJIT;
3951 break;
3952
3953 default:
3954 pp--;
3955 goto ENDLOOP;
3956 }
3957 }
3958 ENDLOOP:
3959 break;
3960
3961 case 'U': options |= PCRE_UNGREEDY; break;
3962 case 'W': options |= PCRE_UCP; break;
3963 case 'X': options |= PCRE_EXTRA; break;
3964 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3965 case 'Z': debug_lengths = 0; break;
3966 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3967 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3968
3969 case 'T':
3970 switch (*pp++)
3971 {
3972 case '0': tables = tables0; break;
3973 case '1': tables = tables1; break;
3974
3975 case '\r':
3976 case '\n':
3977 case ' ':
3978 case 0:
3979 fprintf(outfile, "** Missing table number after /T\n");
3980 goto SKIP_DATA;
3981
3982 default:
3983 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3984 goto SKIP_DATA;
3985 }
3986 break;
3987
3988 case 'L':
3989 ppp = pp;
3990 /* The '\r' test here is so that it works on Windows. */
3991 /* The '0' test is just in case this is an unterminated line. */
3992 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3993 *ppp = 0;
3994 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3995 {
3996 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3997 goto SKIP_DATA;
3998 }
3999 locale_set = 1;
4000 tables = PCRE_MAKETABLES;
4001 pp = ppp;
4002 break;
4003
4004 case '>':
4005 to_file = pp;
4006 while (*pp != 0) pp++;
4007 while (isspace(pp[-1])) pp--;
4008 *pp = 0;
4009 break;
4010
4011 case '<':
4012 {
4013 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
4014 {
4015 options |= PCRE_JAVASCRIPT_COMPAT;
4016 pp += 3;
4017 }
4018 else
4019 {
4020 int x = check_newline(pp, outfile);
4021 if (x == 0) goto SKIP_DATA;
4022 options |= x;
4023 while (*pp++ != '>');
4024 }
4025 }
4026 break;
4027
4028 case '\r': /* So that it works in Windows */
4029 case '\n':
4030 case ' ':
4031 break;
4032
4033 default:
4034 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
4035 goto SKIP_DATA;
4036 }
4037 }
4038
4039 /* Handle compiling via the POSIX interface, which doesn't support the
4040 timing, showing, or debugging options, nor the ability to pass over
4041 local character tables. Neither does it have 16-bit support. */
4042
4043 #if !defined NOPOSIX
4044 if (posix || do_posix)
4045 {
4046 int rc;
4047 int cflags = 0;
4048
4049 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
4050 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
4051 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
4052 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
4053 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
4054 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
4055 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
4056
4057 first_gotten_store = 0;
4058 rc = regcomp(&preg, (char *)p, cflags);
4059
4060 /* Compilation failed; go back for another re, skipping to blank line
4061 if non-interactive. */
4062
4063 if (rc != 0)
4064 {
4065 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4066 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
4067 goto SKIP_DATA;
4068 }
4069 }
4070
4071 /* Handle compiling via the native interface */
4072
4073 else
4074 #endif /* !defined NOPOSIX */
4075
4076 {
4077 /* In 16- or 32-bit mode, convert the input. */
4078
4079 #ifdef SUPPORT_PCRE16
4080 if (pcre_mode == PCRE16_MODE)
4081 {
4082 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
4083 {
4084 case -1:
4085 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
4086 "converted to UTF-16\n");
4087 goto SKIP_DATA;
4088
4089 case -2:
4090 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4091 "cannot be converted to UTF-16\n");
4092 goto SKIP_DATA;
4093
4094 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
4095 fprintf(outfile, "**Failed: character value greater than 0xffff "
4096 "cannot be converted to 16-bit in non-UTF mode\n");
4097 goto SKIP_DATA;
4098
4099 default:
4100 break;
4101 }
4102 p = (pcre_uint8 *)buffer16;
4103 }
4104 #endif
4105
4106 #ifdef SUPPORT_PCRE32
4107 if (pcre_mode == PCRE32_MODE)
4108 {
4109 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
4110 {
4111 case -1:
4112 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
4113 "converted to UTF-32\n");
4114 goto SKIP_DATA;
4115
4116 case -2:
4117 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4118 "cannot be converted to UTF-32\n");
4119 goto SKIP_DATA;
4120
4121 case -3:
4122 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
4123 goto SKIP_DATA;
4124
4125 default:
4126 break;
4127 }
4128 p = (pcre_uint8 *)buffer32;
4129 }
4130 #endif
4131
4132 /* Compile many times when timing */
4133
4134 if (timeit > 0)
4135 {
4136 register int i;
4137 clock_t time_taken;
4138 clock_t start_time = clock();
4139 for (i = 0; i < timeit; i++)
4140 {
4141 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
4142 if (re != NULL) free(re);
4143 }
4144 time_taken = clock() - start_time;
4145 fprintf(outfile, "Compile time %.4f milliseconds\n",
4146 (((double)time_taken * 1000.0) / (double)timeit) /
4147 (double)CLOCKS_PER_SEC);
4148 }
4149
4150 first_gotten_store = 0;
4151 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
4152
4153 /* Compilation failed; go back for another re, skipping to blank line
4154 if non-interactive. */
4155
4156 if (re == NULL)
4157 {
4158 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
4159 SKIP_DATA:
4160 if (infile != stdin)
4161 {
4162 for (;;)
4163 {
4164 if (extend_inputline(infile, buffer, NULL) == NULL)
4165 {
4166 done = 1;
4167 goto CONTINUE;
4168 }
4169 len = (int)strlen((char *)buffer);
4170 while (len > 0 && isspace(buffer[len-1])) len--;
4171 if (len == 0) break;
4172 }
4173 fprintf(outfile, "\n");
4174 }
4175 goto CONTINUE;
4176 }
4177
4178 /* Compilation succeeded. It is now possible to set the UTF-8 option from
4179 within the regex; check for this so that we know how to process the data
4180 lines. */
4181
4182 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
4183 goto SKIP_DATA;
4184 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
4185
4186 /* Extract the size for possible writing before possibly flipping it,
4187 and remember the store that was got. */
4188
4189 true_size = REAL_PCRE_SIZE(re);
4190 regex_gotten_store = first_gotten_store;
4191
4192 /* Output code size information if requested */
4193
4194 if (log_store)
4195 {
4196 int name_count, name_entry_size, real_pcre_size;
4197
4198 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
4199 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
4200 real_pcre_size = 0;
4201 #ifdef SUPPORT_PCRE8
4202 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
4203 real_pcre_size = sizeof(real_pcre);
4204 #endif
4205 #ifdef SUPPORT_PCRE16
4206 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
4207 real_pcre_size = sizeof(real_pcre16);
4208 #endif
4209 #ifdef SUPPORT_PCRE32
4210 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
4211 real_pcre_size = sizeof(real_pcre32);
4212 #endif
4213 fprintf(outfile, "Memory allocation (code space): %d\n",
4214 (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
4215 }
4216
4217 /* If -s or /S was present, study the regex to generate additional info to
4218 help with the matching, unless the pattern has the SS option, which
4219 suppresses the effect of /S (used for a few test patterns where studying is
4220 never sensible). */
4221
4222 if (do_study || (force_study >= 0 && !no_force_study))
4223 {
4224 if (timeit > 0)
4225 {
4226 register int i;
4227 clock_t time_taken;
4228 clock_t start_time = clock();
4229 for (i = 0; i < timeit; i++)
4230 {
4231 PCRE_STUDY(extra, re, study_options, &error);
4232 }
4233 time_taken = clock() - start_time;
4234 if (extra != NULL)
4235 {
4236 PCRE_FREE_STUDY(extra);
4237 }
4238 fprintf(outfile, " Study time %.4f milliseconds\n",
4239 (((double)time_taken * 1000.0) / (double)timeit) /
4240 (double)CLOCKS_PER_SEC);
4241 }
4242 PCRE_STUDY(extra, re, study_options, &error);
4243 if (error != NULL)
4244 fprintf(outfile, "Failed to study: %s\n", error);
4245 else if (extra != NULL)
4246 {
4247 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
4248 if (log_store)
4249 {
4250 size_t jitsize;
4251 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
4252 jitsize != 0)
4253 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
4254 }
4255 }
4256 }
4257
4258 /* If /K was present, we set up for handling MARK data. */
4259
4260 if (do_mark)
4261 {
4262 if (extra == NULL)
4263 {
4264 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4265 extra->flags = 0;
4266 }
4267 extra->mark = &markptr;
4268 extra->flags |= PCRE_EXTRA_MARK;
4269 }
4270
4271 /* Extract and display information from the compiled data if required. */
4272
4273 SHOW_INFO:
4274
4275 if (do_debug)
4276 {
4277 fprintf(outfile, "------------------------------------------------------------------\n");
4278 PCRE_PRINTINT(re, outfile, debug_lengths);
4279 }
4280
4281 /* We already have the options in get_options (see above) */
4282
4283 if (do_showinfo)
4284 {
4285 unsigned long int all_options;
4286 pcre_uint32 first_char, need_char;
4287 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4288 hascrorlf, maxlookbehind;
4289 int nameentrysize, namecount;
4290 const pcre_uint8 *nametable;
4291
4292 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
4293 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4294 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4295 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4296 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4297 new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4298 new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4299 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4300 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4301 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4302 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4303 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4304 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4305 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4306 != 0)
4307 goto SKIP_DATA;
4308
4309 if (size != regex_gotten_store) fprintf(outfile,
4310 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
4311 (int)size, (int)regex_gotten_store);
4312
4313 fprintf(outfile, "Capturing subpattern count = %d\n", count);
4314 if (backrefmax > 0)
4315 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4316
4317 if (namecount > 0)
4318 {
4319 fprintf(outfile, "Named capturing subpatterns:\n");
4320 while (namecount-- > 0)
4321 {
4322 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4323 int length = (int)STRLEN(nametable + imm2_size);
4324 fprintf(outfile, " ");
4325 PCHARSV(nametable, imm2_size, length, outfile);
4326 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4327 #ifdef SUPPORT_PCRE32
4328 if (pcre_mode == PCRE32_MODE)
4329 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4330 #endif
4331 #ifdef SUPPORT_PCRE16
4332 if (pcre_mode == PCRE16_MODE)
4333 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4334 #endif
4335 #ifdef SUPPORT_PCRE8
4336 if (pcre_mode == PCRE8_MODE)
4337 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4338 #endif
4339 nametable += nameentrysize * CHAR_SIZE;
4340 }
4341 }
4342
4343 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4344 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4345
4346 all_options = REAL_PCRE_OPTIONS(re);
4347 if (do_flip) all_options = swap_uint32(all_options);
4348
4349 if (get_options == 0) fprintf(outfile, "No options\n");
4350 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4351 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4352 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4353 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4354 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4355 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4356 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4357 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4358 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4359 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4360 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4361 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4362 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4363 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4364 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4365 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4366 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4367 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
4368
4369 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4370
4371 switch (get_options & PCRE_NEWLINE_BITS)
4372 {
4373 case PCRE_NEWLINE_CR:
4374 fprintf(outfile, "Forced newline sequence: CR\n");
4375 break;
4376
4377 case PCRE_NEWLINE_LF:
4378 fprintf(outfile, "Forced newline sequence: LF\n");
4379 break;
4380
4381 case PCRE_NEWLINE_CRLF:
4382 fprintf(outfile, "Forced newline sequence: CRLF\n");
4383 break;
4384
4385 case PCRE_NEWLINE_ANYCRLF:
4386 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4387 break;
4388
4389 case PCRE_NEWLINE_ANY:
4390 fprintf(outfile, "Forced newline sequence: ANY\n");
4391 break;
4392
4393 default:
4394 break;
4395 }
4396
4397 if (first_char_set == 2)
4398 {
4399 fprintf(outfile, "First char at start or follows newline\n");
4400 }
4401 else if (first_char_set == 1)
4402 {
4403 const char *caseless =
4404 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4405 "" : " (caseless)";
4406
4407 if (PRINTOK(first_char))
4408 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4409 else
4410 {
4411 fprintf(outfile, "First char = ");
4412 pchar(first_char, outfile);
4413 fprintf(outfile, "%s\n", caseless);
4414 }
4415 }
4416 else
4417 {
4418 fprintf(outfile, "No first char\n");
4419 }
4420
4421 if (need_char_set == 0)
4422 {
4423 fprintf(outfile, "No need char\n");
4424 }
4425 else
4426 {
4427 const char *caseless =
4428 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4429 "" : " (caseless)";
4430
4431 if (PRINTOK(need_char))
4432 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4433 else
4434 {
4435 fprintf(outfile, "Need char = ");
4436 pchar(need_char, outfile);
4437 fprintf(outfile, "%s\n", caseless);
4438 }
4439 }
4440
4441 if (maxlookbehind > 0)
4442 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4443
4444 /* Don't output study size; at present it is in any case a fixed
4445 value, but it varies, depending on the computer architecture, and
4446 so messes up the test suite. (And with the /F option, it might be
4447 flipped.) If study was forced by an external -s, don't show this
4448 information unless -i or -d was also present. This means that, except
4449 when auto-callouts are involved, the output from runs with and without
4450 -s should be identical. */
4451
4452 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4453 {
4454 if (extra == NULL)
4455 fprintf(outfile, "Study returned NULL\n");
4456 else
4457 {
4458 pcre_uint8 *start_bits = NULL;
4459 int minlength;
4460
4461 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4462 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4463
4464 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
4465 {
4466 if (start_bits == NULL)
4467 fprintf(outfile, "No set of starting bytes\n");
4468 else
4469 {
4470 int i;
4471 int c = 24;
4472 fprintf(outfile, "Starting byte set: ");
4473 for (i = 0; i < 256; i++)
4474 {
4475 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4476 {
4477 if (c > 75)
4478 {
4479 fprintf(outfile, "\n ");
4480 c = 2;
4481 }
4482 if (PRINTOK(i) && i != ' ')
4483 {
4484 fprintf(outfile, "%c ", i);
4485 c += 2;
4486 }
4487 else
4488 {
4489 fprintf(outfile, "\\x%02x ", i);
4490 c += 5;
4491 }
4492 }
4493 }
4494 fprintf(outfile, "\n");
4495 }
4496 }
4497 }
4498
4499 /* Show this only if the JIT was set by /S, not by -s. */
4500
4501 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4502 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4503 {
4504 int jit;
4505 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4506 {
4507 if (jit)
4508 fprintf(outfile, "JIT study was successful\n");
4509 else
4510 #ifdef SUPPORT_JIT
4511 fprintf(outfile, "JIT study was not successful\n");
4512 #else
4513 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4514 #endif
4515 }
4516 }
4517 }
4518 }
4519
4520 /* If the '>' option was present, we write out the regex to a file, and
4521 that is all. The first 8 bytes of the file are the regex length and then
4522 the study length, in big-endian order. */
4523
4524 if (to_file != NULL)
4525 {
4526 FILE *f = fopen((char *)to_file, "wb");
4527 if (f == NULL)
4528 {
4529 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4530 }
4531 else
4532 {
4533 pcre_uint8 sbuf[8];
4534
4535 if (do_flip) regexflip(re, extra);
4536 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4537 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4538 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4539 sbuf[3] = (pcre_uint8)((true_size) & 255);
4540 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4541 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4542 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4543 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4544
4545 if (fwrite(sbuf, 1, 8, f) < 8 ||
4546 fwrite(re, 1, true_size, f) < true_size)
4547 {
4548 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4549 }
4550 else
4551 {
4552 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4553
4554 /* If there is study data, write it. */
4555
4556 if (extra != NULL)
4557 {
4558 if (fwrite(extra->study_data, 1, true_study_size, f) <
4559 true_study_size)
4560 {
4561 fprintf(outfile, "Write error on %s: %s\n", to_file,
4562 strerror(errno));
4563 }
4564 else fprintf(outfile, "Study data written to %s\n", to_file);
4565 }
4566 }
4567 fclose(f);
4568 }
4569
4570 new_free(re);
4571 if (extra != NULL)
4572 {
4573 PCRE_FREE_STUDY(extra);
4574 }
4575 if (locale_set)
4576 {
4577 new_free((void *)tables);
4578 setlocale(LC_CTYPE, "C");
4579 locale_set = 0;
4580 }
4581 continue; /* With next regex */
4582 }
4583 } /* End of non-POSIX compile */
4584
4585 /* Read data lines and test them */
4586
4587 for (;;)
4588 {
4589 #ifdef SUPPORT_PCRE8
4590 pcre_uint8 *q8;
4591 #endif
4592 #ifdef SUPPORT_PCRE16
4593 pcre_uint16 *q16;
4594 #endif
4595 #ifdef SUPPORT_PCRE32
4596 pcre_uint32 *q32;
4597 #endif
4598 pcre_uint8 *bptr;
4599 int *use_offsets = offsets;
4600 int use_size_offsets = size_offsets;
4601 int callout_data = 0;
4602 int callout_data_set = 0;
4603 int count;
4604 pcre_uint32 c;
4605 int copystrings = 0;
4606 int find_match_limit = default_find_match_limit;
4607 int getstrings = 0;
4608 int getlist = 0;
4609 int gmatched = 0;
4610 int start_offset = 0;
4611 int start_offset_sign = 1;
4612 int g_notempty = 0;
4613 int use_dfa = 0;
4614
4615 *copynames = 0;
4616 *getnames = 0;
4617
4618 #ifdef SUPPORT_PCRE32
4619 cn32ptr = copynames;
4620 gn32ptr = getnames;
4621 #endif
4622 #ifdef SUPPORT_PCRE16
4623 cn16ptr = copynames16;
4624 gn16ptr = getnames16;
4625 #endif
4626 #ifdef SUPPORT_PCRE8
4627 cn8ptr = copynames8;
4628 gn8ptr = getnames8;
4629 #endif
4630
4631 SET_PCRE_CALLOUT(callout);
4632 first_callout = 1;
4633 last_callout_mark = NULL;
4634 callout_extra = 0;
4635 callout_count = 0;
4636 callout_fail_count = 999999;
4637 callout_fail_id = -1;
4638 show_malloc = 0;
4639 options = 0;
4640
4641 if (extra != NULL) extra->flags &=
4642 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4643
4644 len = 0;
4645 for (;;)
4646 {
4647 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4648 {
4649 if (len > 0) /* Reached EOF without hitting a newline */
4650 {
4651 fprintf(outfile, "\n");
4652 break;
4653 }
4654 done = 1;
4655 goto CONTINUE;
4656 }
4657 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4658 len = (int)strlen((char *)buffer);
4659 if (buffer[len-1] == '\n') break;
4660 }
4661
4662 while (len > 0 && isspace(buffer[len-1])) len--;
4663 buffer[len] = 0;
4664 if (len == 0) break;
4665
4666 p = buffer;
4667 while (isspace(*p)) p++;
4668
4669 #ifndef NOUTF
4670 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4671 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4672 if (use_utf)
4673 {
4674 pcre_uint8 *q;
4675 pcre_uint32 cc;
4676 int n = 1;
4677
4678 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4679 if (n <= 0)
4680 {
4681 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4682 goto NEXT_DATA;
4683 }
4684 }
4685 #endif
4686
4687 /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4688 the number of pcre_uchar units that will be needed. */
4689 if (dbuffer == NULL || (size_t)len >= dbuffer_size)
4690 {
4691 dbuffer_size *= 2;
4692 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4693 if (dbuffer == NULL)
4694 {
4695 fprintf(stderr, "pcretest: malloc(%d) failed\n", dbuffer_size);
4696 exit(1);
4697 }
4698 }
4699
4700 #ifdef SUPPORT_PCRE8
4701 q8 = (pcre_uint8 *) dbuffer;
4702 #endif
4703 #ifdef SUPPORT_PCRE16
4704 q16 = (pcre_uint16 *) dbuffer;
4705 #endif
4706 #ifdef SUPPORT_PCRE32
4707 q32 = (pcre_uint32 *) dbuffer;
4708 #endif
4709
4710 while ((c = *p++) != 0)
4711 {
4712 int i = 0;
4713 int n = 0;
4714
4715 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4716 In non-UTF mode, allow the value of the byte to fall through to later,
4717 where values greater than 127 are turned into UTF-8 when running in
4718 16-bit or 32-bit mode. */
4719
4720 if (c != '\\')
4721 {
4722 #ifndef NOUTF
4723 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4724 #endif
4725 }
4726
4727 /* Handle backslash escapes */
4728
4729 else switch ((c = *p++))
4730 {
4731 case 'a': c = 7; break;
4732 case 'b': c = '\b'; break;
4733 case 'e': c = 27; break;
4734 case 'f': c = '\f'; break;
4735 case 'n': c = '\n'; break;
4736 case 'r': c = '\r'; break;
4737 case 't': c = '\t'; break;
4738 case 'v': c = '\v'; break;
4739
4740 case '0': case '1': case '2': case '3':
4741 case '4': case '5': case '6': case '7':
4742 c -= '0';
4743 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4744 c = c * 8 + *p++ - '0';
4745 break;
4746
4747 case 'x':
4748 if (*p == '{')
4749 {
4750 pcre_uint8 *pt = p;
4751 c = 0;
4752
4753 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4754 when isxdigit() is a macro that refers to its argument more than
4755 once. This is banned by the C Standard, but apparently happens in at
4756 least one MacOS environment. */
4757
4758 for (pt++; isxdigit(*pt); pt++)
4759 {
4760 if (++i == 9)
4761 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4762 "using only the first eight.\n");
4763 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4764 }
4765 if (*pt == '}')
4766 {
4767 p = pt + 1;
4768 break;
4769 }
4770 /* Not correct form for \x{...}; fall through */
4771 }
4772
4773 /* \x without {} always defines just one byte in 8-bit mode. This
4774 allows UTF-8 characters to be constructed byte by byte, and also allows
4775 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4776 Otherwise, pass it down to later code so that it can be turned into
4777 UTF-8 when running in 16/32-bit mode. */
4778
4779 c = 0;
4780 while (i++ < 2 && isxdigit(*p))
4781 {
4782 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4783 p++;
4784 }
4785 #if !defined NOUTF && defined SUPPORT_PCRE8
4786 if (use_utf && (pcre_mode == PCRE8_MODE))
4787 {
4788 *q8++ = c;
4789 continue;
4790 }
4791 #endif
4792 break;
4793
4794 case 0: /* \ followed by EOF allows for an empty line */
4795 p--;
4796 continue;
4797
4798 case '>':
4799 if (*p == '-')
4800 {
4801 start_offset_sign = -1;
4802 p++;
4803 }
4804 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4805 start_offset *= start_offset_sign;
4806 continue;
4807
4808 case 'A': /* Option setting */
4809 options |= PCRE_ANCHORED;
4810 continue;
4811
4812 case 'B':
4813 options |= PCRE_NOTBOL;
4814 continue;
4815
4816 case 'C':
4817 if (isdigit(*p)) /* Set copy string */
4818 {
4819 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4820 copystrings |= 1 << n;
4821 }
4822 else if (isalnum(*p))
4823 {
4824 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4825 }
4826 else if (*p == '+')
4827 {
4828 callout_extra = 1;
4829 p++;
4830 }
4831 else if (*p == '-')
4832 {
4833 SET_PCRE_CALLOUT(NULL);
4834 p++;
4835 }
4836 else if (*p == '!')
4837 {
4838 callout_fail_id = 0;
4839 p++;
4840 while(isdigit(*p))
4841 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4842 callout_fail_count = 0;
4843 if (*p == '!')
4844 {
4845 p++;
4846 while(isdigit(*p))
4847 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4848 }
4849 }
4850 else if (*p == '*')
4851 {
4852 int sign = 1;
4853 callout_data = 0;
4854 if (*(++p) == '-') { sign = -1; p++; }
4855 while(isdigit(*p))
4856 callout_data = callout_data * 10 + *p++ - '0';
4857 callout_data *= sign;
4858 callout_data_set = 1;
4859 }
4860 continue;
4861
4862 #if !defined NODFA
4863 case 'D':
4864 #if !defined NOPOSIX
4865 if (posix || do_posix)
4866 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4867 else
4868 #endif
4869 use_dfa = 1;
4870 continue;
4871 #endif
4872
4873 #if !defined NODFA
4874 case 'F':
4875 options |= PCRE_DFA_SHORTEST;
4876 continue;
4877 #endif
4878
4879 case 'G':
4880 if (isdigit(*p))
4881 {
4882 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4883 getstrings |= 1 << n;
4884 }
4885 else if (isalnum(*p))
4886 {
4887 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4888 }
4889 continue;
4890
4891 case 'J':
4892 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4893 if (extra != NULL
4894 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4895 && extra->executable_jit != NULL)
4896 {
4897 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4898 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4899 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4900 }
4901 continue;
4902
4903 case 'L':
4904 getlist = 1;
4905 continue;
4906
4907 case 'M':
4908 find_match_limit = 1;
4909 continue;
4910
4911 case 'N':
4912 if ((options & PCRE_NOTEMPTY) != 0)
4913 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4914 else
4915 options |= PCRE_NOTEMPTY;
4916 continue;
4917
4918 case 'O':
4919 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4920 if (n > size_offsets_max)
4921 {
4922 size_offsets_max = n;
4923 free(offsets);
4924 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4925 if (offsets == NULL)
4926 {
4927 printf("** Failed to get %d bytes of memory for offsets vector\n",
4928 (int)(size_offsets_max * sizeof(int)));
4929 yield = 1;
4930 goto EXIT;
4931 }
4932 }
4933 use_size_offsets = n;
4934 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4935 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4936 continue;
4937
4938 case 'P':
4939 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4940 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4941 continue;
4942
4943 case 'Q':
4944 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4945 if (extra == NULL)
4946 {
4947 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4948 extra->flags = 0;
4949 }
4950 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4951 extra->match_limit_recursion = n;
4952 continue;
4953
4954 case 'q':
4955 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4956 if (extra == NULL)
4957 {
4958 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4959 extra->flags = 0;
4960 }
4961 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4962 extra->match_limit = n;
4963 continue;
4964
4965 #if !defined NODFA
4966 case 'R':
4967 options |= PCRE_DFA_RESTART;
4968 continue;
4969 #endif
4970
4971 case 'S':
4972 show_malloc = 1;
4973 continue;
4974
4975 case 'Y':
4976 options |= PCRE_NO_START_OPTIMIZE;
4977 continue;
4978
4979 case 'Z':
4980 options |= PCRE_NOTEOL;
4981 continue;
4982
4983 case '?':
4984 options |= PCRE_NO_UTF8_CHECK;
4985 continue;
4986
4987 case '<':
4988 {
4989 int x = check_newline(p, outfile);
4990 if (x == 0) goto NEXT_DATA;
4991 options |= x;
4992 while (*p++ != '>');
4993 }
4994 continue;
4995 }
4996
4997 /* We now have a character value in c that may be greater than 255. In
4998 16-bit or 32-bit mode, we always convert characters to UTF-8 so that
4999 values greater than 255 can be passed to non-UTF 16- or 32-bit strings.
5000 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
5001 than 127 in UTF mode must have come from \x{...} or octal constructs
5002 because values from \x.. get this far only in non-UTF mode. */
5003
5004 #ifdef SUPPORT_PCRE8
5005 if (pcre_mode == PCRE8_MODE)
5006 {
5007 #ifndef NOUTF
5008 if (use_utf)
5009 {
5010 q8 += ord2utf8(c, q8);
5011 }
5012 else
5013 #endif
5014 {
5015 if (c > 0xffu)
5016 {
5017 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
5018 "and UTF-8 mode is not enabled.\n", c);
5019 fprintf(outfile, "** Truncation will probably give the wrong "
5020 "result.\n");
5021 }
5022
5023 *q8++ = c;
5024 }
5025 }
5026 #endif
5027 #ifdef SUPPORT_PCRE16
5028 if (pcre_mode == PCRE16_MODE)
5029 {
5030 #ifndef NOUTF
5031 if (use_utf)
5032 {
5033 if (c > 0x10ffffu)
5034 {
5035 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
5036 "cannot be converted to UTF-16\n");
5037 goto NEXT_DATA;
5038 }
5039 else if (c >= 0x10000u)
5040 {
5041 c-= 0x10000u;
5042 *q16++ = 0xD800 | (c >> 10);
5043 *q16++ = 0xDC00 | (c & 0x3ff);
5044 }
5045 else
5046 *q16++ = c;
5047 }
5048 else
5049 #endif
5050 {
5051 if (c > 0xffffu)
5052 {
5053 fprintf(outfile, "** Character value is greater than 0xffff "
5054 "and UTF-16 mode is not enabled.\n");
5055 fprintf(outfile, "** Truncation will probably give the wrong "
5056 "result.\n");
5057 }
5058
5059 *q16++ = c;
5060 }
5061 }
5062 #endif
5063 #ifdef SUPPORT_PCRE32
5064 if (pcre_mode == PCRE32_MODE)
5065 {
5066 *q32++ = c;
5067 }
5068 #endif
5069
5070 }
5071
5072 /* Reached end of subject string */
5073
5074 #ifdef SUPPORT_PCRE8
5075 if (pcre_mode == PCRE8_MODE)
5076 {
5077 *q8 = 0;
5078 len = (int)(q8 - (pcre_uint8 *)dbuffer);
5079 }
5080 #endif
5081 #ifdef SUPPORT_PCRE16
5082 if (pcre_mode == PCRE16_MODE)
5083 {
5084 *q16 = 0;
5085 len = (int)(q16 - (pcre_uint16 *)dbuffer);
5086 }
5087 #endif
5088 #ifdef SUPPORT_PCRE32
5089 if (pcre_mode == PCRE32_MODE)
5090 {
5091 *q32 = 0;
5092 len = (int)(q32 - (pcre_uint32 *)dbuffer);
5093 }
5094 #endif
5095
5096 #if defined SUPPORT_UTF && defined SUPPORT_PCRE32
5097 /* If we're requsted to test UTF-32 masking of high bits, change the data
5098 string to have high bits set, unless the string is invalid UTF-32.
5099 Since the JIT doesn't support this yet, only do it when not JITing. */
5100 if (use_utf && mask_utf32 && (study_options & PCRE_STUDY_ALLJIT) == 0 &&
5101 valid_utf32((pcre_uint32 *)dbuffer, len))
5102 {
5103 for (q32 = (pcre_uint32 *)dbuffer; *q32; q32++)
5104 *q32 |= ~(pcre_uint32)UTF32_MASK;
5105
5106 /* Need to pass NO_UTF32_CHECK so the high bits are allowed */
5107 options |= PCRE_NO_UTF32_CHECK;
5108 }
5109 #endif
5110
5111 /* Move the data to the end of the buffer so that a read over the end of
5112 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
5113 we are using the POSIX interface, we must include the terminating zero. */
5114
5115 bptr = dbuffer;
5116
5117 #if !defined NOPOSIX
5118 if (posix || do_posix)
5119 {
5120 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
5121 bptr += dbuffer_size - len - 1;
5122 }
5123 else
5124 #endif
5125 {
5126 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
5127 }
5128
5129 if ((all_use_dfa || use_dfa) && find_match_limit)
5130 {
5131 printf("**Match limit not relevant for DFA matching: ignored\n");
5132 find_match_limit = 0;
5133 }
5134
5135 /* Handle matching via the POSIX interface, which does not
5136 support timing or playing with the match limit or callout data. */
5137
5138 #if !defined NOPOSIX
5139 if (posix || do_posix)
5140 {
5141 int rc;
5142 int eflags = 0;
5143 regmatch_t *pmatch = NULL;
5144 if (use_size_offsets > 0)
5145 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
5146 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
5147 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
5148 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
5149
5150 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
5151
5152 if (rc != 0)
5153 {
5154 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
5155 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
5156 }
5157 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
5158 {
5159 fprintf(outfile, "Matched with REG_NOSUB\n");
5160 }
5161 else
5162 {
5163 size_t i;
5164 for (i = 0; i < (size_t)use_size_offsets; i++)
5165 {
5166 if (pmatch[i].rm_so >= 0)
5167 {
5168 fprintf(outfile, "%2d: ", (int)i);
5169 PCHARSV(dbuffer, pmatch[i].rm_so,
5170 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
5171 fprintf(outfile, "\n");
5172 if (do_showcaprest || (i == 0 && do_showrest))
5173 {
5174 fprintf(outfile, "%2d+ ", (int)i);
5175 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
5176 outfile);
5177 fprintf(outfile, "\n");
5178 }
5179 }
5180 }
5181 }
5182 free(pmatch);
5183 goto NEXT_DATA;
5184 }
5185
5186 #endif /* !defined NOPOSIX */
5187
5188 /* Handle matching via the native interface - repeats for /g and /G */
5189
5190 /* Ensure that there is a JIT callback if we want to verify that JIT was
5191 actually used. If jit_stack == NULL, no stack has yet been assigned. */
5192
5193 if (verify_jit && jit_stack == NULL && extra != NULL)
5194 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
5195
5196 for (;; gmatched++) /* Loop for /g or /G */
5197 {
5198 markptr = NULL;
5199 jit_was_used = FALSE;
5200
5201 if (timeitm > 0)
5202 {
5203 register int i;
5204 clock_t time_taken;
5205 clock_t start_time = clock();
5206
5207 #if !defined NODFA
5208 if (all_use_dfa || use_dfa)
5209 {
5210 if ((options & PCRE_DFA_RESTART) != 0)
5211 {
5212 fprintf(outfile, "Timing DFA restarts is not supported\n");
5213 break;
5214 }
5215 if (dfa_workspace == NULL)
5216 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5217 for (i = 0; i < timeitm; i++)
5218 {
5219 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5220 (options | g_notempty), use_offsets, use_size_offsets,
5221 dfa_workspace, DFA_WS_DIMENSION);
5222 }
5223 }
5224 else
5225 #endif
5226
5227 for (i = 0; i < timeitm; i++)
5228 {
5229 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5230 (options | g_notempty), use_offsets, use_size_offsets);
5231 }
5232 time_taken = clock() - start_time;
5233 fprintf(outfile, "Execute time %.4f milliseconds\n",
5234 (((double)time_taken * 1000.0) / (double)timeitm) /
5235 (double)CLOCKS_PER_SEC);
5236 }
5237
5238 /* If find_match_limit is set, we want to do repeated matches with
5239 varying limits in order to find the minimum value for the match limit and
5240 for the recursion limit. The match limits are relevant only to the normal
5241 running of pcre_exec(), so disable the JIT optimization. This makes it
5242 possible to run the same set of tests with and without JIT externally
5243 requested. */
5244
5245 if (find_match_limit)
5246 {
5247 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
5248 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5249 extra->flags = 0;
5250
5251 (void)check_match_limit(re, extra, bptr, len, start_offset,
5252 options|g_notempty, use_offsets, use_size_offsets,
5253 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
5254 PCRE_ERROR_MATCHLIMIT, "match()");
5255
5256 count = check_match_limit(re, extra, bptr, len, start_offset,
5257 options|g_notempty, use_offsets, use_size_offsets,
5258 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
5259 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
5260 }
5261
5262 /* If callout_data is set, use the interface with additional data */
5263
5264 else if (callout_data_set)
5265 {
5266 if (extra == NULL)
5267 {
5268 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5269 extra->flags = 0;
5270 }
5271 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
5272 extra->callout_data = &callout_data;
5273 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5274 options | g_notempty, use_offsets, use_size_offsets);
5275 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
5276 }
5277
5278 /* The normal case is just to do the match once, with the default
5279 value of match_limit. */
5280
5281 #if !defined NODFA
5282 else if (all_use_dfa || use_dfa)
5283 {
5284 if (dfa_workspace == NULL)
5285 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5286 if (dfa_matched++ == 0)
5287 dfa_workspace[0] = -1; /* To catch bad restart */
5288 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5289 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5290 DFA_WS_DIMENSION);
5291 if (count == 0)
5292 {
5293 fprintf(outfile, "Matched, but too many subsidiary matches\n");
5294 count = use_size_offsets/2;
5295 }
5296 }
5297 #endif
5298
5299 else
5300 {
5301 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5302 options | g_notempty, use_offsets, use_size_offsets);
5303 if (count == 0)
5304 {
5305 fprintf(outfile, "Matched, but too many substrings\n");
5306 count = use_size_offsets/3;
5307 }
5308 }
5309
5310 /* Matched */
5311
5312 if (count >= 0)
5313 {
5314 int i, maxcount;
5315 void *cnptr, *gnptr;
5316
5317 #if !defined NODFA
5318 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5319 #endif
5320 maxcount = use_size_offsets/3;
5321
5322 /* This is a check against a lunatic return value. */
5323
5324 if (count > maxcount)
5325 {
5326 fprintf(outfile,
5327 "** PCRE error: returned count %d is too big for offset size %d\n",
5328 count, use_size_offsets);
5329 count = use_size_offsets/3;
5330 if (do_g || do_G)
5331 {
5332 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5333 do_g = do_G = FALSE; /* Break g/G loop */
5334 }
5335 }
5336
5337 /* do_allcaps requests showing of all captures in the pattern, to check
5338 unset ones at the end. */
5339
5340 if (do_allcaps)
5341 {
5342 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5343 goto SKIP_DATA;
5344 count++; /* Allow for full match */
5345 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5346 }
5347
5348 /* Output the captured substrings */
5349
5350 for (i = 0; i < count * 2; i += 2)
5351 {
5352 if (use_offsets[i] < 0)
5353 {
5354 if (use_offsets[i] != -1)
5355 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5356 use_offsets[i], i);
5357 if (use_offsets[i+1] != -1)
5358 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5359 use_offsets[i+1], i+1);
5360 fprintf(outfile, "%2d: <unset>\n", i/2);
5361 }
5362 else
5363 {
5364 fprintf(outfile, "%2d: ", i/2);
5365 PCHARSV(bptr, use_offsets[i],
5366 use_offsets[i+1] - use_offsets[i], outfile);
5367 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5368 fprintf(outfile, "\n");
5369 if (do_showcaprest || (i == 0 && do_showrest))
5370 {
5371 fprintf(outfile, "%2d+ ", i/2);
5372 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
5373 outfile);
5374 fprintf(outfile, "\n");
5375 }
5376 }
5377 }
5378
5379 if (markptr != NULL)
5380 {
5381 fprintf(outfile, "MK: ");
5382 PCHARSV(markptr, 0, -1, outfile);
5383 fprintf(outfile, "\n");
5384 }
5385
5386 for (i = 0; i < 32; i++)
5387 {
5388 if ((copystrings & (1 << i)) != 0)
5389 {
5390 int rc;
5391 char copybuffer[256];
5392 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5393 copybuffer, sizeof(copybuffer));
5394 if (rc < 0)
5395 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5396 else
5397 {
5398 fprintf(outfile, "%2dC ", i);
5399 PCHARSV(copybuffer, 0, rc, outfile);
5400 fprintf(outfile, " (%d)\n", rc);
5401 }
5402 }
5403 }
5404
5405 cnptr = copynames;
5406 for (;;)
5407 {
5408 int rc;
5409 char copybuffer[256];
5410
5411 #ifdef SUPPORT_PCRE32
5412 if (pcre_mode == PCRE32_MODE)
5413 {
5414 if (*(pcre_uint32 *)cnptr == 0) break;
5415 }
5416 #endif
5417 #ifdef SUPPORT_PCRE16
5418 if (pcre_mode == PCRE16_MODE)
5419 {
5420 if (*(pcre_uint16 *)cnptr == 0) break;
5421 }
5422 #endif
5423 #ifdef SUPPORT_PCRE8
5424 if (pcre_mode == PCRE8_MODE)
5425 {
5426 if (*(pcre_uint8 *)cnptr == 0) break;
5427 }
5428 #endif
5429
5430 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5431 cnptr, copybuffer, sizeof(copybuffer));
5432
5433 if (rc < 0)
5434 {
5435 fprintf(outfile, "copy substring ");
5436 PCHARSV(cnptr, 0, -1, outfile);
5437 fprintf(outfile, " failed %d\n", rc);
5438 }
5439 else
5440 {
5441 fprintf(outfile, " C ");
5442 PCHARSV(copybuffer, 0, rc, outfile);
5443 fprintf(outfile, " (%d) ", rc);
5444 PCHARSV(cnptr, 0, -1, outfile);
5445 putc('\n', outfile);
5446 }
5447
5448 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
5449 }
5450
5451 for (i = 0; i < 32; i++)
5452 {
5453 if ((getstrings & (1 << i)) != 0)
5454 {
5455 int rc;
5456 const char *substring;
5457 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
5458 if (rc < 0)
5459 fprintf(outfile, "get substring %d failed %d\n", i, rc);
5460 else
5461 {
5462 fprintf(outfile, "%2dG ", i);
5463 PCHARSV(substring, 0, rc, outfile);
5464 fprintf(outfile, " (%d)\n", rc);
5465 PCRE_FREE_SUBSTRING(substring);
5466 }
5467 }
5468 }
5469
5470 gnptr = getnames;
5471 for (;;)
5472 {
5473 int rc;
5474 const char *substring;
5475
5476 #ifdef SUPPORT_PCRE32
5477 if (pcre_mode == PCRE32_MODE)
5478 {
5479 if (*(pcre_uint32 *)gnptr == 0) break;
5480 }
5481 #endif
5482 #ifdef SUPPORT_PCRE16
5483 if (pcre_mode == PCRE16_MODE)
5484 {
5485 if (*(pcre_uint16 *)gnptr == 0) break;
5486 }
5487 #endif
5488 #ifdef SUPPORT_PCRE8
5489 if (pcre_mode == PCRE8_MODE)
5490 {
5491 if (*(pcre_uint8 *)gnptr == 0) break;
5492 }
5493 #endif
5494
5495 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5496 gnptr, &substring);
5497 if (rc < 0)
5498 {
5499 fprintf(outfile, "get substring ");
5500 PCHARSV(gnptr, 0, -1, outfile);
5501 fprintf(outfile, " failed %d\n", rc);
5502 }
5503 else
5504 {
5505 fprintf(outfile, " G ");
5506 PCHARSV(substring, 0, rc, outfile);
5507 fprintf(outfile, " (%d) ", rc);
5508 PCHARSV(gnptr, 0, -1, outfile);
5509 PCRE_FREE_SUBSTRING(substring);
5510 putc('\n', outfile);
5511 }
5512
5513 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5514 }
5515
5516 if (getlist)
5517 {
5518 int rc;
5519 const char **stringlist;
5520 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5521 if (rc < 0)
5522 fprintf(outfile, "get substring list failed %d\n", rc);
5523 else
5524 {
5525 for (i = 0; i < count; i++)
5526 {
5527 fprintf(outfile, "%2dL ", i);
5528 PCHARSV(stringlist[i], 0, -1, outfile);
5529 putc('\n', outfile);
5530 }
5531 if (stringlist[i] != NULL)
5532 fprintf(outfile, "string list not terminated by NULL\n");
5533 PCRE_FREE_SUBSTRING_LIST(stringlist);
5534 }
5535 }
5536 }
5537
5538 /* There was a partial match */
5539
5540 else if (count == PCRE_ERROR_PARTIAL)
5541 {
5542 if (markptr == NULL) fprintf(outfile, "Partial match");
5543 else
5544 {
5545 fprintf(outfile, "Partial match, mark=");
5546 PCHARSV(markptr, 0, -1, outfile);
5547 }
5548 if (use_size_offsets > 1)
5549 {
5550 fprintf(outfile, ": ");
5551 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5552 outfile);
5553 }
5554 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5555 fprintf(outfile, "\n");
5556 break; /* Out of the /g loop */
5557 }
5558
5559 /* Failed to match. If this is a /g or /G loop and we previously set
5560 g_notempty after a null match, this is not necessarily the end. We want
5561 to advance the start offset, and continue. We won't be at the end of the
5562 string - that was checked before setting g_notempty.
5563
5564 Complication arises in the case when the newline convention is "any",
5565 "crlf", or "anycrlf". If the previous match was at the end of a line
5566 terminated by CRLF, an advance of one character just passes the \r,
5567 whereas we should prefer the longer newline sequence, as does the code in
5568 pcre_exec(). Fudge the offset value to achieve this. We check for a
5569 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5570 find the default.
5571
5572 Otherwise, in the case of UTF-8 matching, the advance must be one
5573 character, not one byte. */
5574
5575 else
5576 {
5577 if (g_notempty != 0)
5578 {
5579 int onechar = 1;
5580 unsigned int obits = REAL_PCRE_OPTIONS(re);
5581 use_offsets[0] = start_offset;
5582 if ((obits & PCRE_NEWLINE_BITS) == 0)
5583 {
5584 int d;
5585 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5586 /* Note that these values are always the ASCII ones, even in
5587 EBCDIC environments. CR = 13, NL = 10. */
5588 obits = (d == 13)? PCRE_NEWLINE_CR :
5589 (d == 10)? PCRE_NEWLINE_LF :
5590 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5591 (d == -2)? PCRE_NEWLINE_ANYCRLF :
5592 (d == -1)? PCRE_NEWLINE_ANY : 0;
5593 }
5594 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5595 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5596 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5597 &&
5598 start_offset < len - 1 && (
5599 #ifdef SUPPORT_PCRE8
5600 (pcre_mode == PCRE8_MODE &&
5601 bptr[start_offset] == '\r' &&
5602 bptr[start_offset + 1] == '\n') ||
5603 #endif
5604 #ifdef SUPPORT_PCRE16
5605 (pcre_mode == PCRE16_MODE &&
5606 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5607 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5608 #endif
5609 #ifdef SUPPORT_PCRE32
5610 (pcre_mode == PCRE32_MODE &&
5611 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5612 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5613 #endif
5614 0))
5615 onechar++;
5616 else if (use_utf)
5617 {
5618 while (start_offset + onechar < len)
5619 {
5620 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5621 onechar++;
5622 }
5623 }
5624 use_offsets[1] = start_offset + onechar;
5625 }
5626 else
5627 {
5628 switch(count)
5629 {
5630 case PCRE_ERROR_NOMATCH:
5631 if (gmatched == 0)
5632 {
5633 if (markptr == NULL)
5634 {
5635 fprintf(outfile, "No match");
5636 }
5637 else
5638 {
5639 fprintf(outfile, "No match, mark = ");
5640 PCHARSV(markptr, 0, -1, outfile);
5641 }
5642 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5643 putc('\n', outfile);
5644 }
5645 break;
5646
5647 case PCRE_ERROR_BADUTF8:
5648 case PCRE_ERROR_SHORTUTF8:
5649 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5650 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5651 8 * CHAR_SIZE);
5652 if (use_size_offsets >= 2)
5653 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5654 use_offsets[1]);
5655 fprintf(outfile, "\n");
5656 break;
5657
5658 case PCRE_ERROR_BADUTF8_OFFSET:
5659 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5660 8 * CHAR_SIZE);
5661 break;
5662
5663 default:
5664 if (count < 0 &&
5665 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5666 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5667 else
5668 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5669 break;
5670 }
5671
5672 break; /* Out of the /g loop */
5673 }
5674 }
5675
5676 /* If not /g or /G we are done */
5677
5678 if (!do_g && !do_G) break;
5679
5680 /* If we have matched an empty string, first check to see if we are at
5681 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5682 Perl's /g options does. This turns out to be rather cunning. First we set
5683 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5684 same point. If this fails (picked up above) we advance to the next
5685 character. */
5686
5687 g_notempty = 0;
5688
5689 if (use_offsets[0] == use_offsets[1])
5690 {
5691 if (use_offsets[0] == len) break;
5692 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5693 }
5694
5695 /* For /g, update the start offset, leaving the rest alone */
5696
5697 if (do_g) start_offset = use_offsets[1];
5698
5699 /* For /G, update the pointer and length */
5700
5701 else
5702 {
5703 bptr += use_offsets[1] * CHAR_SIZE;
5704 len -= use_offsets[1];
5705 }
5706 } /* End of loop for /g and /G */
5707
5708 NEXT_DATA: continue;
5709 } /* End of loop for data lines */
5710
5711 CONTINUE:
5712
5713 #if !defined NOPOSIX
5714 if (posix || do_posix) regfree(&preg);
5715 #endif
5716
5717 if (re != NULL) new_free(re);
5718 if (extra != NULL)
5719 {
5720 PCRE_FREE_STUDY(extra);
5721 }
5722 if (locale_set)
5723 {
5724 new_free((void *)tables);
5725 setlocale(LC_CTYPE, "C");
5726 locale_set = 0;
5727 }
5728 if (jit_stack != NULL)
5729 {
5730 PCRE_JIT_STACK_FREE(jit_stack);
5731 jit_stack = NULL;
5732 }
5733 }
5734
5735 if (infile == stdin) fprintf(outfile, "\n");
5736
5737 EXIT:
5738
5739 if (infile != NULL && infile != stdin) fclose(infile);
5740 if (outfile != NULL && outfile != stdout) fclose(outfile);
5741
5742 free(buffer);
5743 free(dbuffer);
5744 free(pbuffer);
5745 free(offsets);
5746
5747 #ifdef SUPPORT_PCRE16
5748 if (buffer16 != NULL) free(buffer16);
5749 #endif
5750 #ifdef SUPPORT_PCRE32
5751 if (buffer32 != NULL) free(buffer32);
5752 #endif
5753
5754 #if !defined NODFA
5755 if (dfa_workspace != NULL)
5756 free(dfa_workspace);
5757 #endif
5758
5759 return yield;
5760 }
5761
5762 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5