/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1117 - (show annotations)
Tue Oct 16 15:57:27 2012 UTC (7 years, 1 month ago) by chpe
File MIME type: text/plain
File size: 157095 byte(s)
pcre32: pcretest: Add -32+ option

Add -32+ option that selects 32-bit mode like -32, but additionally
modifies the characters in the data strings to have the bits > 21 set,
to test that the masking works.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136
137 #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138 /* Configure internal macros to 32 bit mode. */
139 #define COMPILE_PCRE32
140 #endif
141 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142 /* Configure internal macros to 16 bit mode. */
143 #define COMPILE_PCRE16
144 #endif
145 #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146 /* Configure internal macros to 16 bit mode. */
147 #define COMPILE_PCRE8
148 #endif
149
150 #include "pcre_internal.h"
151
152 /* The pcre_printint() function, which prints the internal form of a compiled
153 regex, is held in a separate file so that (a) it can be compiled in either
154 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
155 when that is compiled in debug mode. */
156
157 #ifdef SUPPORT_PCRE8
158 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159 #endif
160 #ifdef SUPPORT_PCRE16
161 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162 #endif
163 #ifdef SUPPORT_PCRE32
164 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165 #endif
166
167 /* We need access to some of the data tables that PCRE uses. So as not to have
168 to keep two copies, we include the source files here, changing the names of the
169 external symbols to prevent clashes. */
170
171 #define PCRE_INCLUDED
172
173 #include "pcre_tables.c"
174 #include "pcre_ucd.c"
175
176 /* The definition of the macro PRINTABLE, which determines whether to print an
177 output character as-is or as a hex value when showing compiled patterns, is
178 the same as in the printint.src file. We uses it here in cases when the locale
179 has not been explicitly changed, so as to get consistent output from systems
180 that differ in their output from isprint() even in the "C" locale. */
181
182 #ifdef EBCDIC
183 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
184 #else
185 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
186 #endif
187
188 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189
190 /* Posix support is disabled in 16 or 32 bit only mode. */
191 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192 #define NOPOSIX
193 #endif
194
195 /* It is possible to compile this test program without including support for
196 testing the POSIX interface, though this is not available via the standard
197 Makefile. */
198
199 #if !defined NOPOSIX
200 #include "pcreposix.h"
201 #endif
202
203 /* It is also possible, originally for the benefit of a version that was
204 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
205 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
206 automatically cut out the UTF support if PCRE is built without it. */
207
208 #ifndef SUPPORT_UTF
209 #ifndef NOUTF
210 #define NOUTF
211 #endif
212 #endif
213
214 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
215 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216 only from one place and is handled differently). I couldn't dream up any way of
217 using a single macro to do this in a generic way, because of the many different
218 argument requirements. We know that at least one of SUPPORT_PCRE8 and
219 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
220 use these in the definitions of generic macros.
221
222 **** Special note about the PCHARSxxx macros: the address of the string to be
223 printed is always given as two arguments: a base address followed by an offset.
224 The base address is cast to the correct data size for 8 or 16 bit data; the
225 offset is in units of this size. If the string were given as base+offset in one
226 argument, the casting might be incorrectly applied. */
227
228 #ifdef SUPPORT_PCRE8
229
230 #define PCHARS8(lv, p, offset, len, f) \
231 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
232
233 #define PCHARSV8(p, offset, len, f) \
234 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235
236 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237 p = read_capture_name8(p, cn8, re)
238
239 #define STRLEN8(p) ((int)strlen((char *)p))
240
241 #define SET_PCRE_CALLOUT8(callout) \
242 pcre_callout = callout
243
244 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
245 pcre_assign_jit_stack(extra, callback, userdata)
246
247 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
248 re = pcre_compile((char *)pat, options, error, erroffset, tables)
249
250 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251 namesptr, cbuffer, size) \
252 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
253 (char *)namesptr, cbuffer, size)
254
255 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
256 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
257
258 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259 offsets, size_offsets, workspace, size_workspace) \
260 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
261 offsets, size_offsets, workspace, size_workspace)
262
263 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
264 offsets, size_offsets) \
265 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
266 offsets, size_offsets)
267
268 #define PCRE_FREE_STUDY8(extra) \
269 pcre_free_study(extra)
270
271 #define PCRE_FREE_SUBSTRING8(substring) \
272 pcre_free_substring(substring)
273
274 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
275 pcre_free_substring_list(listptr)
276
277 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
278 getnamesptr, subsptr) \
279 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
280 (char *)getnamesptr, subsptr)
281
282 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
283 n = pcre_get_stringnumber(re, (char *)ptr)
284
285 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
286 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
287
288 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
289 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
290
291 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
292 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
293
294 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
295 pcre_printint(re, outfile, debug_lengths)
296
297 #define PCRE_STUDY8(extra, re, options, error) \
298 extra = pcre_study(re, options, error)
299
300 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
301 pcre_jit_stack_alloc(startsize, maxsize)
302
303 #define PCRE_JIT_STACK_FREE8(stack) \
304 pcre_jit_stack_free(stack)
305
306 #endif /* SUPPORT_PCRE8 */
307
308 /* -----------------------------------------------------------*/
309
310 #ifdef SUPPORT_PCRE16
311
312 #define PCHARS16(lv, p, offset, len, f) \
313 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
314
315 #define PCHARSV16(p, offset, len, f) \
316 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
317
318 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319 p = read_capture_name16(p, cn16, re)
320
321 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
322
323 #define SET_PCRE_CALLOUT16(callout) \
324 pcre16_callout = (int (*)(pcre16_callout_block *))callout
325
326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327 pcre16_assign_jit_stack((pcre16_extra *)extra, \
328 (pcre16_jit_callback)callback, userdata)
329
330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332 tables)
333
334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335 namesptr, cbuffer, size) \
336 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338
339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341 (PCRE_UCHAR16 *)cbuffer, size/2)
342
343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344 offsets, size_offsets, workspace, size_workspace) \
345 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347 workspace, size_workspace)
348
349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350 offsets, size_offsets) \
351 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352 len, start_offset, options, offsets, size_offsets)
353
354 #define PCRE_FREE_STUDY16(extra) \
355 pcre16_free_study((pcre16_extra *)extra)
356
357 #define PCRE_FREE_SUBSTRING16(substring) \
358 pcre16_free_substring((PCRE_SPTR16)substring)
359
360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362
363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364 getnamesptr, subsptr) \
365 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367
368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370
371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373 (PCRE_SPTR16 *)(void*)subsptr)
374
375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377 (PCRE_SPTR16 **)(void*)listptr)
378
379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381 tables)
382
383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384 pcre16_printint(re, outfile, debug_lengths)
385
386 #define PCRE_STUDY16(extra, re, options, error) \
387 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388
389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391
392 #define PCRE_JIT_STACK_FREE16(stack) \
393 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394
395 #endif /* SUPPORT_PCRE16 */
396
397 /* -----------------------------------------------------------*/
398
399 #ifdef SUPPORT_PCRE32
400
401 #define PCHARS32(lv, p, offset, len, f) \
402 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
403
404 #define PCHARSV32(p, offset, len, f) \
405 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
406
407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408 p = read_capture_name32(p, cn32, re)
409
410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411
412 #define SET_PCRE_CALLOUT32(callout) \
413 pcre32_callout = (int (*)(pcre32_callout_block *))callout
414
415 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416 pcre32_assign_jit_stack((pcre32_extra *)extra, \
417 (pcre32_jit_callback)callback, userdata)
418
419 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421 tables)
422
423 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424 namesptr, cbuffer, size) \
425 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427
428 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430 (PCRE_UCHAR32 *)cbuffer, size/2)
431
432 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433 offsets, size_offsets, workspace, size_workspace) \
434 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436 workspace, size_workspace)
437
438 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439 offsets, size_offsets) \
440 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441 len, start_offset, options, offsets, size_offsets)
442
443 #define PCRE_FREE_STUDY32(extra) \
444 pcre32_free_study((pcre32_extra *)extra)
445
446 #define PCRE_FREE_SUBSTRING32(substring) \
447 pcre32_free_substring((PCRE_SPTR32)substring)
448
449 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
451
452 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453 getnamesptr, subsptr) \
454 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456
457 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
459
460 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462 (PCRE_SPTR32 *)(void*)subsptr)
463
464 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466 (PCRE_SPTR32 **)(void*)listptr)
467
468 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470 tables)
471
472 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473 pcre32_printint(re, outfile, debug_lengths)
474
475 #define PCRE_STUDY32(extra, re, options, error) \
476 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477
478 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480
481 #define PCRE_JIT_STACK_FREE32(stack) \
482 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483
484 #endif /* SUPPORT_PCRE32 */
485
486
487 /* ----- Both modes are supported; a runtime test is needed, except for
488 pcre_config(), and the JIT stack functions, when it doesn't matter which
489 version is called. ----- */
490
491 enum {
492 PCRE8_MODE,
493 PCRE16_MODE,
494 PCRE32_MODE
495 };
496
497 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2
498
499 #define CHAR_SIZE (1 << pcre_mode)
500
501 #define PCHARS(lv, p, offset, len, f) \
502 if (pcre_mode == PCRE32_MODE) \
503 PCHARS32(lv, p, offset, len, f); \
504 else if (pcre_mode == PCRE16_MODE) \
505 PCHARS16(lv, p, offset, len, f); \
506 else \
507 PCHARS8(lv, p, offset, len, f)
508
509 #define PCHARSV(p, offset, len, f) \
510 if (pcre_mode == PCRE32_MODE) \
511 PCHARSV32(p, offset, len, f); \
512 else if (pcre_mode == PCRE16_MODE) \
513 PCHARSV16(p, offset, len, f); \
514 else \
515 PCHARSV8(p, offset, len, f)
516
517 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
518 if (pcre_mode == PCRE32_MODE) \
519 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
520 else if (pcre_mode == PCRE16_MODE) \
521 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
522 else \
523 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
524
525 #define SET_PCRE_CALLOUT(callout) \
526 if (pcre_mode == PCRE32_MODE) \
527 SET_PCRE_CALLOUT32(callout); \
528 else if (pcre_mode == PCRE16_MODE) \
529 SET_PCRE_CALLOUT16(callout); \
530 else \
531 SET_PCRE_CALLOUT8(callout)
532
533 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
534
535 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
536 if (pcre_mode == PCRE32_MODE) \
537 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
538 else if (pcre_mode == PCRE16_MODE) \
539 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
540 else \
541 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
542
543 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
544 if (pcre_mode == PCRE32_MODE) \
545 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
546 else if (pcre_mode == PCRE16_MODE) \
547 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
548 else \
549 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
550
551 #define PCRE_CONFIG pcre_config
552
553 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
554 namesptr, cbuffer, size) \
555 if (pcre_mode == PCRE32_MODE) \
556 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
557 namesptr, cbuffer, size); \
558 else if (pcre_mode == PCRE16_MODE) \
559 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
560 namesptr, cbuffer, size); \
561 else \
562 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
563 namesptr, cbuffer, size)
564
565 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
566 if (pcre_mode == PCRE32_MODE) \
567 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
568 else if (pcre_mode == PCRE16_MODE) \
569 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
570 else \
571 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
572
573 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
574 offsets, size_offsets, workspace, size_workspace) \
575 if (pcre_mode == PCRE32_MODE) \
576 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
577 offsets, size_offsets, workspace, size_workspace); \
578 else if (pcre_mode == PCRE16_MODE) \
579 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
580 offsets, size_offsets, workspace, size_workspace); \
581 else \
582 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets, workspace, size_workspace)
584
585 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets) \
587 if (pcre_mode == PCRE32_MODE) \
588 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets); \
590 else if (pcre_mode == PCRE16_MODE) \
591 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets); \
593 else \
594 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
595 offsets, size_offsets)
596
597 #define PCRE_FREE_STUDY(extra) \
598 if (pcre_mode == PCRE32_MODE) \
599 PCRE_FREE_STUDY32(extra); \
600 else if (pcre_mode == PCRE16_MODE) \
601 PCRE_FREE_STUDY16(extra); \
602 else \
603 PCRE_FREE_STUDY8(extra)
604
605 #define PCRE_FREE_SUBSTRING(substring) \
606 if (pcre_mode == PCRE32_MODE) \
607 PCRE_FREE_SUBSTRING32(substring); \
608 else if (pcre_mode == PCRE16_MODE) \
609 PCRE_FREE_SUBSTRING16(substring); \
610 else \
611 PCRE_FREE_SUBSTRING8(substring)
612
613 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
614 if (pcre_mode == PCRE32_MODE) \
615 PCRE_FREE_SUBSTRING_LIST32(listptr); \
616 else if (pcre_mode == PCRE16_MODE) \
617 PCRE_FREE_SUBSTRING_LIST16(listptr); \
618 else \
619 PCRE_FREE_SUBSTRING_LIST8(listptr)
620
621 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
622 getnamesptr, subsptr) \
623 if (pcre_mode == PCRE32_MODE) \
624 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
625 getnamesptr, subsptr); \
626 else if (pcre_mode == PCRE16_MODE) \
627 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
628 getnamesptr, subsptr); \
629 else \
630 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
631 getnamesptr, subsptr)
632
633 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
634 if (pcre_mode == PCRE32_MODE) \
635 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
636 else if (pcre_mode == PCRE16_MODE) \
637 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
638 else \
639 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
640
641 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
642 if (pcre_mode == PCRE32_MODE) \
643 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
644 else if (pcre_mode == PCRE16_MODE) \
645 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
646 else \
647 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
648
649 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
650 if (pcre_mode == PCRE32_MODE) \
651 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
652 else if (pcre_mode == PCRE16_MODE) \
653 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
654 else \
655 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
656
657 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
658 (pcre_mode == PCRE32_MODE ? \
659 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
660 : pcre_mode == PCRE16_MODE ? \
661 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
662 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
663
664 #define PCRE_JIT_STACK_FREE(stack) \
665 if (pcre_mode == PCRE32_MODE) \
666 PCRE_JIT_STACK_FREE32(stack); \
667 else if (pcre_mode == PCRE16_MODE) \
668 PCRE_JIT_STACK_FREE16(stack); \
669 else \
670 PCRE_JIT_STACK_FREE8(stack)
671
672 #define PCRE_MAKETABLES \
673 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
674
675 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
676 if (pcre_mode == PCRE32_MODE) \
677 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
678 else if (pcre_mode == PCRE16_MODE) \
679 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
680 else \
681 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
682
683 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
684 if (pcre_mode == PCRE32_MODE) \
685 PCRE_PRINTINT32(re, outfile, debug_lengths); \
686 else if (pcre_mode == PCRE16_MODE) \
687 PCRE_PRINTINT16(re, outfile, debug_lengths); \
688 else \
689 PCRE_PRINTINT8(re, outfile, debug_lengths)
690
691 #define PCRE_STUDY(extra, re, options, error) \
692 if (pcre_mode == PCRE32_MODE) \
693 PCRE_STUDY32(extra, re, options, error); \
694 else if (pcre_mode == PCRE16_MODE) \
695 PCRE_STUDY16(extra, re, options, error); \
696 else \
697 PCRE_STUDY8(extra, re, options, error)
698
699 /* ----- Only 8-bit mode is supported ----- */
700
701 #elif defined SUPPORT_PCRE8
702 #define CHAR_SIZE 1
703 #define PCHARS PCHARS8
704 #define PCHARSV PCHARSV8
705 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
706 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
707 #define STRLEN STRLEN8
708 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
709 #define PCRE_COMPILE PCRE_COMPILE8
710 #define PCRE_CONFIG pcre_config
711 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
712 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
713 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
714 #define PCRE_EXEC PCRE_EXEC8
715 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
716 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
717 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
718 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
719 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
720 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
721 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
722 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
723 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
724 #define PCRE_MAKETABLES pcre_maketables()
725 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
726 #define PCRE_PRINTINT PCRE_PRINTINT8
727 #define PCRE_STUDY PCRE_STUDY8
728
729 /* ----- Only 16-bit mode is supported ----- */
730
731 #elif defined SUPPORT_PCRE16
732 #define CHAR_SIZE 2
733 #define PCHARS PCHARS16
734 #define PCHARSV PCHARSV16
735 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
736 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
737 #define STRLEN STRLEN16
738 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
739 #define PCRE_COMPILE PCRE_COMPILE16
740 #define PCRE_CONFIG pcre16_config
741 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
742 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
743 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
744 #define PCRE_EXEC PCRE_EXEC16
745 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
746 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
747 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
748 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
749 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
750 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
751 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
752 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
753 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
754 #define PCRE_MAKETABLES pcre16_maketables()
755 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
756 #define PCRE_PRINTINT PCRE_PRINTINT16
757 #define PCRE_STUDY PCRE_STUDY16
758
759 /* ----- Only 32-bit mode is supported ----- */
760
761 #elif defined SUPPORT_PCRE32
762 #define CHAR_SIZE 4
763 #define PCHARS PCHARS32
764 #define PCHARSV PCHARSV32
765 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
766 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
767 #define STRLEN STRLEN32
768 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
769 #define PCRE_COMPILE PCRE_COMPILE32
770 #define PCRE_CONFIG pcre32_config
771 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
772 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
773 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
774 #define PCRE_EXEC PCRE_EXEC32
775 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
776 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
777 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
778 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
779 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
780 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
781 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
782 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
783 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
784 #define PCRE_MAKETABLES pcre32_maketables()
785 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
786 #define PCRE_PRINTINT PCRE_PRINTINT32
787 #define PCRE_STUDY PCRE_STUDY32
788
789 #endif
790
791 /* ----- End of mode-specific function call macros ----- */
792
793
794 /* Other parameters */
795
796 #ifndef CLOCKS_PER_SEC
797 #ifdef CLK_TCK
798 #define CLOCKS_PER_SEC CLK_TCK
799 #else
800 #define CLOCKS_PER_SEC 100
801 #endif
802 #endif
803
804 #if !defined NODFA
805 #define DFA_WS_DIMENSION 1000
806 #endif
807
808 /* This is the default loop count for timing. */
809
810 #define LOOPREPEAT 500000
811
812 /* Static variables */
813
814 static FILE *outfile;
815 static int log_store = 0;
816 static int callout_count;
817 static int callout_extra;
818 static int callout_fail_count;
819 static int callout_fail_id;
820 static int debug_lengths;
821 static int first_callout;
822 static int jit_was_used;
823 static int locale_set = 0;
824 static int show_malloc;
825 static int use_utf;
826 static size_t gotten_store;
827 static size_t first_gotten_store = 0;
828 static const unsigned char *last_callout_mark = NULL;
829
830 /* The buffers grow automatically if very long input lines are encountered. */
831
832 static int buffer_size = 50000;
833 static pcre_uint8 *buffer = NULL;
834 static pcre_uint8 *pbuffer = NULL;
835
836 /* Another buffer is needed translation to 16/32-bit character strings. It will
837 obtained and extended as required. */
838
839 #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
840
841 /* We need the table of operator lengths that is used for 16/32-bit compiling, in
842 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
843 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
844 appropriately for the 16/32-bit world. Just as a safety check, make sure that
845 COMPILE_PCRE[16|32] is *not* set. */
846
847 #ifdef COMPILE_PCRE16
848 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
849 #endif
850
851 #ifdef COMPILE_PCRE32
852 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
853 #endif
854
855 #if LINK_SIZE == 2
856 #undef LINK_SIZE
857 #define LINK_SIZE 1
858 #elif LINK_SIZE == 3 || LINK_SIZE == 4
859 #undef LINK_SIZE
860 #define LINK_SIZE 2
861 #else
862 #error LINK_SIZE must be either 2, 3, or 4
863 #endif
864
865 #undef IMM2_SIZE
866 #define IMM2_SIZE 1
867
868 #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
869
870 #ifdef SUPPORT_PCRE16
871 static int buffer16_size = 0;
872 static pcre_uint16 *buffer16 = NULL;
873 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
874 #endif /* SUPPORT_PCRE16 */
875
876 #ifdef SUPPORT_PCRE32
877 static int buffer32_size = 0;
878 static pcre_uint32 *buffer32 = NULL;
879 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
880 #endif /* SUPPORT_PCRE32 */
881
882 /* If we have 8-bit support, default to it; if there is also
883 16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
884 there must be 16-or 32-bit support, so default it to 1. */
885
886 #if defined SUPPORT_PCRE8
887 static int pcre_mode = PCRE8_MODE;
888 #elif defined SUPPORT_PCRE16
889 static int pcre_mode = PCRE16_MODE;
890 #elif defined SUPPORT_PCRE32
891 static int pcre_mode = PCRE32_MODE;
892 #endif
893
894 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
895
896 static int jit_study_bits[] =
897 {
898 PCRE_STUDY_JIT_COMPILE,
899 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
900 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
901 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
902 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
903 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
904 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
905 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
906 };
907
908 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
909 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
910
911 /* Textual explanations for runtime error codes */
912
913 static const char *errtexts[] = {
914 NULL, /* 0 is no error */
915 NULL, /* NOMATCH is handled specially */
916 "NULL argument passed",
917 "bad option value",
918 "magic number missing",
919 "unknown opcode - pattern overwritten?",
920 "no more memory",
921 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
922 "match limit exceeded",
923 "callout error code",
924 NULL, /* BADUTF8/16 is handled specially */
925 NULL, /* BADUTF8/16 offset is handled specially */
926 NULL, /* PARTIAL is handled specially */
927 "not used - internal error",
928 "internal error - pattern overwritten?",
929 "bad count value",
930 "item unsupported for DFA matching",
931 "backreference condition or recursion test not supported for DFA matching",
932 "match limit not supported for DFA matching",
933 "workspace size exceeded in DFA matching",
934 "too much recursion for DFA matching",
935 "recursion limit exceeded",
936 "not used - internal error",
937 "invalid combination of newline options",
938 "bad offset value",
939 NULL, /* SHORTUTF8/16 is handled specially */
940 "nested recursion at the same subject position",
941 "JIT stack limit reached",
942 "pattern compiled in wrong mode: 8-bit/16-bit error",
943 "pattern compiled with other endianness",
944 "invalid data in workspace for DFA restart"
945 };
946
947
948 /*************************************************
949 * Alternate character tables *
950 *************************************************/
951
952 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
953 using the default tables of the library. However, the T option can be used to
954 select alternate sets of tables, for different kinds of testing. Note also that
955 the L (locale) option also adjusts the tables. */
956
957 /* This is the set of tables distributed as default with PCRE. It recognizes
958 only ASCII characters. */
959
960 static const pcre_uint8 tables0[] = {
961
962 /* This table is a lower casing table. */
963
964 0, 1, 2, 3, 4, 5, 6, 7,
965 8, 9, 10, 11, 12, 13, 14, 15,
966 16, 17, 18, 19, 20, 21, 22, 23,
967 24, 25, 26, 27, 28, 29, 30, 31,
968 32, 33, 34, 35, 36, 37, 38, 39,
969 40, 41, 42, 43, 44, 45, 46, 47,
970 48, 49, 50, 51, 52, 53, 54, 55,
971 56, 57, 58, 59, 60, 61, 62, 63,
972 64, 97, 98, 99,100,101,102,103,
973 104,105,106,107,108,109,110,111,
974 112,113,114,115,116,117,118,119,
975 120,121,122, 91, 92, 93, 94, 95,
976 96, 97, 98, 99,100,101,102,103,
977 104,105,106,107,108,109,110,111,
978 112,113,114,115,116,117,118,119,
979 120,121,122,123,124,125,126,127,
980 128,129,130,131,132,133,134,135,
981 136,137,138,139,140,141,142,143,
982 144,145,146,147,148,149,150,151,
983 152,153,154,155,156,157,158,159,
984 160,161,162,163,164,165,166,167,
985 168,169,170,171,172,173,174,175,
986 176,177,178,179,180,181,182,183,
987 184,185,186,187,188,189,190,191,
988 192,193,194,195,196,197,198,199,
989 200,201,202,203,204,205,206,207,
990 208,209,210,211,212,213,214,215,
991 216,217,218,219,220,221,222,223,
992 224,225,226,227,228,229,230,231,
993 232,233,234,235,236,237,238,239,
994 240,241,242,243,244,245,246,247,
995 248,249,250,251,252,253,254,255,
996
997 /* This table is a case flipping table. */
998
999 0, 1, 2, 3, 4, 5, 6, 7,
1000 8, 9, 10, 11, 12, 13, 14, 15,
1001 16, 17, 18, 19, 20, 21, 22, 23,
1002 24, 25, 26, 27, 28, 29, 30, 31,
1003 32, 33, 34, 35, 36, 37, 38, 39,
1004 40, 41, 42, 43, 44, 45, 46, 47,
1005 48, 49, 50, 51, 52, 53, 54, 55,
1006 56, 57, 58, 59, 60, 61, 62, 63,
1007 64, 97, 98, 99,100,101,102,103,
1008 104,105,106,107,108,109,110,111,
1009 112,113,114,115,116,117,118,119,
1010 120,121,122, 91, 92, 93, 94, 95,
1011 96, 65, 66, 67, 68, 69, 70, 71,
1012 72, 73, 74, 75, 76, 77, 78, 79,
1013 80, 81, 82, 83, 84, 85, 86, 87,
1014 88, 89, 90,123,124,125,126,127,
1015 128,129,130,131,132,133,134,135,
1016 136,137,138,139,140,141,142,143,
1017 144,145,146,147,148,149,150,151,
1018 152,153,154,155,156,157,158,159,
1019 160,161,162,163,164,165,166,167,
1020 168,169,170,171,172,173,174,175,
1021 176,177,178,179,180,181,182,183,
1022 184,185,186,187,188,189,190,191,
1023 192,193,194,195,196,197,198,199,
1024 200,201,202,203,204,205,206,207,
1025 208,209,210,211,212,213,214,215,
1026 216,217,218,219,220,221,222,223,
1027 224,225,226,227,228,229,230,231,
1028 232,233,234,235,236,237,238,239,
1029 240,241,242,243,244,245,246,247,
1030 248,249,250,251,252,253,254,255,
1031
1032 /* This table contains bit maps for various character classes. Each map is 32
1033 bytes long and the bits run from the least significant end of each byte. The
1034 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1035 graph, print, punct, and cntrl. Other classes are built from combinations. */
1036
1037 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1038 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1039 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1040 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1041
1042 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1043 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1044 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1045 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1046
1047 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1048 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1049 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1050 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1051
1052 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1053 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1054 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1055 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1056
1057 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1058 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1059 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1060 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1061
1062 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1063 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1064 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1065 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1066
1067 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1068 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1069 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1070 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1071
1072 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1073 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1074 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1075 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1076
1077 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1078 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1079 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1080 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1081
1082 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1083 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1084 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1085 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1086
1087 /* This table identifies various classes of character by individual bits:
1088 0x01 white space character
1089 0x02 letter
1090 0x04 decimal digit
1091 0x08 hexadecimal digit
1092 0x10 alphanumeric or '_'
1093 0x80 regular expression metacharacter or binary zero
1094 */
1095
1096 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1097 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1098 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1099 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1100 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1101 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1102 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1103 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1104 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1105 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1106 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1107 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1108 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1109 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1110 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1111 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1112 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1113 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1114 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1115 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1116 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1117 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1118 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1119 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1120 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1121 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1122 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1123 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1124 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1125 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1126 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1127 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1128
1129 /* This is a set of tables that came orginally from a Windows user. It seems to
1130 be at least an approximation of ISO 8859. In particular, there are characters
1131 greater than 128 that are marked as spaces, letters, etc. */
1132
1133 static const pcre_uint8 tables1[] = {
1134 0,1,2,3,4,5,6,7,
1135 8,9,10,11,12,13,14,15,
1136 16,17,18,19,20,21,22,23,
1137 24,25,26,27,28,29,30,31,
1138 32,33,34,35,36,37,38,39,
1139 40,41,42,43,44,45,46,47,
1140 48,49,50,51,52,53,54,55,
1141 56,57,58,59,60,61,62,63,
1142 64,97,98,99,100,101,102,103,
1143 104,105,106,107,108,109,110,111,
1144 112,113,114,115,116,117,118,119,
1145 120,121,122,91,92,93,94,95,
1146 96,97,98,99,100,101,102,103,
1147 104,105,106,107,108,109,110,111,
1148 112,113,114,115,116,117,118,119,
1149 120,121,122,123,124,125,126,127,
1150 128,129,130,131,132,133,134,135,
1151 136,137,138,139,140,141,142,143,
1152 144,145,146,147,148,149,150,151,
1153 152,153,154,155,156,157,158,159,
1154 160,161,162,163,164,165,166,167,
1155 168,169,170,171,172,173,174,175,
1156 176,177,178,179,180,181,182,183,
1157 184,185,186,187,188,189,190,191,
1158 224,225,226,227,228,229,230,231,
1159 232,233,234,235,236,237,238,239,
1160 240,241,242,243,244,245,246,215,
1161 248,249,250,251,252,253,254,223,
1162 224,225,226,227,228,229,230,231,
1163 232,233,234,235,236,237,238,239,
1164 240,241,242,243,244,245,246,247,
1165 248,249,250,251,252,253,254,255,
1166 0,1,2,3,4,5,6,7,
1167 8,9,10,11,12,13,14,15,
1168 16,17,18,19,20,21,22,23,
1169 24,25,26,27,28,29,30,31,
1170 32,33,34,35,36,37,38,39,
1171 40,41,42,43,44,45,46,47,
1172 48,49,50,51,52,53,54,55,
1173 56,57,58,59,60,61,62,63,
1174 64,97,98,99,100,101,102,103,
1175 104,105,106,107,108,109,110,111,
1176 112,113,114,115,116,117,118,119,
1177 120,121,122,91,92,93,94,95,
1178 96,65,66,67,68,69,70,71,
1179 72,73,74,75,76,77,78,79,
1180 80,81,82,83,84,85,86,87,
1181 88,89,90,123,124,125,126,127,
1182 128,129,130,131,132,133,134,135,
1183 136,137,138,139,140,141,142,143,
1184 144,145,146,147,148,149,150,151,
1185 152,153,154,155,156,157,158,159,
1186 160,161,162,163,164,165,166,167,
1187 168,169,170,171,172,173,174,175,
1188 176,177,178,179,180,181,182,183,
1189 184,185,186,187,188,189,190,191,
1190 224,225,226,227,228,229,230,231,
1191 232,233,234,235,236,237,238,239,
1192 240,241,242,243,244,245,246,215,
1193 248,249,250,251,252,253,254,223,
1194 192,193,194,195,196,197,198,199,
1195 200,201,202,203,204,205,206,207,
1196 208,209,210,211,212,213,214,247,
1197 216,217,218,219,220,221,222,255,
1198 0,62,0,0,1,0,0,0,
1199 0,0,0,0,0,0,0,0,
1200 32,0,0,0,1,0,0,0,
1201 0,0,0,0,0,0,0,0,
1202 0,0,0,0,0,0,255,3,
1203 126,0,0,0,126,0,0,0,
1204 0,0,0,0,0,0,0,0,
1205 0,0,0,0,0,0,0,0,
1206 0,0,0,0,0,0,255,3,
1207 0,0,0,0,0,0,0,0,
1208 0,0,0,0,0,0,12,2,
1209 0,0,0,0,0,0,0,0,
1210 0,0,0,0,0,0,0,0,
1211 254,255,255,7,0,0,0,0,
1212 0,0,0,0,0,0,0,0,
1213 255,255,127,127,0,0,0,0,
1214 0,0,0,0,0,0,0,0,
1215 0,0,0,0,254,255,255,7,
1216 0,0,0,0,0,4,32,4,
1217 0,0,0,128,255,255,127,255,
1218 0,0,0,0,0,0,255,3,
1219 254,255,255,135,254,255,255,7,
1220 0,0,0,0,0,4,44,6,
1221 255,255,127,255,255,255,127,255,
1222 0,0,0,0,254,255,255,255,
1223 255,255,255,255,255,255,255,127,
1224 0,0,0,0,254,255,255,255,
1225 255,255,255,255,255,255,255,255,
1226 0,2,0,0,255,255,255,255,
1227 255,255,255,255,255,255,255,127,
1228 0,0,0,0,255,255,255,255,
1229 255,255,255,255,255,255,255,255,
1230 0,0,0,0,254,255,0,252,
1231 1,0,0,248,1,0,0,120,
1232 0,0,0,0,254,255,255,255,
1233 0,0,128,0,0,0,128,0,
1234 255,255,255,255,0,0,0,0,
1235 0,0,0,0,0,0,0,128,
1236 255,255,255,255,0,0,0,0,
1237 0,0,0,0,0,0,0,0,
1238 128,0,0,0,0,0,0,0,
1239 0,1,1,0,1,1,0,0,
1240 0,0,0,0,0,0,0,0,
1241 0,0,0,0,0,0,0,0,
1242 1,0,0,0,128,0,0,0,
1243 128,128,128,128,0,0,128,0,
1244 28,28,28,28,28,28,28,28,
1245 28,28,0,0,0,0,0,128,
1246 0,26,26,26,26,26,26,18,
1247 18,18,18,18,18,18,18,18,
1248 18,18,18,18,18,18,18,18,
1249 18,18,18,128,128,0,128,16,
1250 0,26,26,26,26,26,26,18,
1251 18,18,18,18,18,18,18,18,
1252 18,18,18,18,18,18,18,18,
1253 18,18,18,128,128,0,0,0,
1254 0,0,0,0,0,1,0,0,
1255 0,0,0,0,0,0,0,0,
1256 0,0,0,0,0,0,0,0,
1257 0,0,0,0,0,0,0,0,
1258 1,0,0,0,0,0,0,0,
1259 0,0,18,0,0,0,0,0,
1260 0,0,20,20,0,18,0,0,
1261 0,20,18,0,0,0,0,0,
1262 18,18,18,18,18,18,18,18,
1263 18,18,18,18,18,18,18,18,
1264 18,18,18,18,18,18,18,0,
1265 18,18,18,18,18,18,18,18,
1266 18,18,18,18,18,18,18,18,
1267 18,18,18,18,18,18,18,18,
1268 18,18,18,18,18,18,18,0,
1269 18,18,18,18,18,18,18,18
1270 };
1271
1272
1273
1274
1275 #ifndef HAVE_STRERROR
1276 /*************************************************
1277 * Provide strerror() for non-ANSI libraries *
1278 *************************************************/
1279
1280 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1281 in their libraries, but can provide the same facility by this simple
1282 alternative function. */
1283
1284 extern int sys_nerr;
1285 extern char *sys_errlist[];
1286
1287 char *
1288 strerror(int n)
1289 {
1290 if (n < 0 || n >= sys_nerr) return "unknown error number";
1291 return sys_errlist[n];
1292 }
1293 #endif /* HAVE_STRERROR */
1294
1295
1296
1297 /*************************************************
1298 * Print newline configuration *
1299 *************************************************/
1300
1301 /*
1302 Arguments:
1303 rc the return code from PCRE_CONFIG_NEWLINE
1304 isc TRUE if called from "-C newline"
1305 Returns: nothing
1306 */
1307
1308 static void
1309 print_newline_config(int rc, BOOL isc)
1310 {
1311 const char *s = NULL;
1312 if (!isc) printf(" Newline sequence is ");
1313 switch(rc)
1314 {
1315 case CHAR_CR: s = "CR"; break;
1316 case CHAR_LF: s = "LF"; break;
1317 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1318 case -1: s = "ANY"; break;
1319 case -2: s = "ANYCRLF"; break;
1320
1321 default:
1322 printf("a non-standard value: 0x%04x\n", rc);
1323 return;
1324 }
1325
1326 printf("%s\n", s);
1327 }
1328
1329
1330
1331 /*************************************************
1332 * JIT memory callback *
1333 *************************************************/
1334
1335 static pcre_jit_stack* jit_callback(void *arg)
1336 {
1337 jit_was_used = TRUE;
1338 return (pcre_jit_stack *)arg;
1339 }
1340
1341
1342 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1343 /*************************************************
1344 * Convert UTF-8 string to value *
1345 *************************************************/
1346
1347 /* This function takes one or more bytes that represents a UTF-8 character,
1348 and returns the value of the character.
1349
1350 Argument:
1351 utf8bytes a pointer to the byte vector
1352 vptr a pointer to an int to receive the value
1353
1354 Returns: > 0 => the number of bytes consumed
1355 -6 to 0 => malformed UTF-8 character at offset = (-return)
1356 */
1357
1358 static int
1359 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1360 {
1361 pcre_uint32 c = *utf8bytes++;
1362 pcre_uint32 d = c;
1363 int i, j, s;
1364
1365 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1366 {
1367 if ((d & 0x80) == 0) break;
1368 d <<= 1;
1369 }
1370
1371 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1372 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1373
1374 /* i now has a value in the range 1-5 */
1375
1376 s = 6*i;
1377 d = (c & utf8_table3[i]) << s;
1378
1379 for (j = 0; j < i; j++)
1380 {
1381 c = *utf8bytes++;
1382 if ((c & 0xc0) != 0x80) return -(j+1);
1383 s -= 6;
1384 d |= (c & 0x3f) << s;
1385 }
1386
1387 /* Check that encoding was the correct unique one */
1388
1389 for (j = 0; j < utf8_table1_size; j++)
1390 if (d <= utf8_table1[j]) break;
1391 if (j != i) return -(i+1);
1392
1393 /* Valid value */
1394
1395 *vptr = d;
1396 return i+1;
1397 }
1398 #endif /* NOUTF || SUPPORT_PCRE16 */
1399
1400
1401
1402 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1403 /*************************************************
1404 * Convert character value to UTF-8 *
1405 *************************************************/
1406
1407 /* This function takes an integer value in the range 0 - 0x7fffffff
1408 and encodes it as a UTF-8 character in 0 to 6 bytes.
1409
1410 Arguments:
1411 cvalue the character value
1412 utf8bytes pointer to buffer for result - at least 6 bytes long
1413
1414 Returns: number of characters placed in the buffer
1415 */
1416
1417 static int
1418 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1419 {
1420 register int i, j;
1421 if (cvalue > 0x7fffffffu)
1422 return -1;
1423 for (i = 0; i < utf8_table1_size; i++)
1424 if (cvalue <= utf8_table1[i]) break;
1425 utf8bytes += i;
1426 for (j = i; j > 0; j--)
1427 {
1428 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1429 cvalue >>= 6;
1430 }
1431 *utf8bytes = utf8_table2[i] | cvalue;
1432 return i + 1;
1433 }
1434 #endif
1435
1436
1437 #ifdef SUPPORT_PCRE16
1438 /*************************************************
1439 * Convert a string to 16-bit *
1440 *************************************************/
1441
1442 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1443 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1444 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1445 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1446 result is always left in buffer16.
1447
1448 Note that this function does not object to surrogate values. This is
1449 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1450 for the purpose of testing that they are correctly faulted.
1451
1452 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1453 in UTF-8 so that values greater than 255 can be handled.
1454
1455 Arguments:
1456 data TRUE if converting a data line; FALSE for a regex
1457 p points to a byte string
1458 utf true if UTF-8 (to be converted to UTF-16)
1459 len number of bytes in the string (excluding trailing zero)
1460
1461 Returns: number of 16-bit data items used (excluding trailing zero)
1462 OR -1 if a UTF-8 string is malformed
1463 OR -2 if a value > 0x10ffff is encountered
1464 OR -3 if a value > 0xffff is encountered when not in UTF mode
1465 */
1466
1467 static int
1468 to16(int data, pcre_uint8 *p, int utf, int len)
1469 {
1470 pcre_uint16 *pp;
1471
1472 if (buffer16_size < 2*len + 2)
1473 {
1474 if (buffer16 != NULL) free(buffer16);
1475 buffer16_size = 2*len + 2;
1476 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1477 if (buffer16 == NULL)
1478 {
1479 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1480 exit(1);
1481 }
1482 }
1483
1484 pp = buffer16;
1485
1486 if (!utf && !data)
1487 {
1488 while (len-- > 0) *pp++ = *p++;
1489 }
1490
1491 else
1492 {
1493 pcre_uint32 c = 0;
1494 while (len > 0)
1495 {
1496 int chlen = utf82ord(p, &c);
1497 if (chlen <= 0) return -1;
1498 if (c > 0x10ffff) return -2;
1499 p += chlen;
1500 len -= chlen;
1501 if (c < 0x10000) *pp++ = c; else
1502 {
1503 if (!utf) return -3;
1504 c -= 0x10000;
1505 *pp++ = 0xD800 | (c >> 10);
1506 *pp++ = 0xDC00 | (c & 0x3ff);
1507 }
1508 }
1509 }
1510
1511 *pp = 0;
1512 return pp - buffer16;
1513 }
1514 #endif
1515
1516 #ifdef SUPPORT_PCRE32
1517 /*************************************************
1518 * Convert a string to 32-bit *
1519 *************************************************/
1520
1521 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1522 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1523 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1524 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1525 result is always left in buffer32.
1526
1527 Note that this function does not object to surrogate values. This is
1528 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1529 for the purpose of testing that they are correctly faulted.
1530
1531 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1532 in UTF-8 so that values greater than 255 can be handled.
1533
1534 Arguments:
1535 data TRUE if converting a data line; FALSE for a regex
1536 p points to a byte string
1537 utf true if UTF-8 (to be converted to UTF-32)
1538 len number of bytes in the string (excluding trailing zero)
1539
1540 Returns: number of 32-bit data items used (excluding trailing zero)
1541 OR -1 if a UTF-8 string is malformed
1542 OR -2 if a value > 0x10ffff is encountered
1543 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1544 */
1545
1546 static int
1547 to32(int data, pcre_uint8 *p, int utf, int len)
1548 {
1549 pcre_uint32 *pp;
1550
1551 if (buffer32_size < 4*len + 4)
1552 {
1553 if (buffer32 != NULL) free(buffer32);
1554 buffer32_size = 4*len + 4;
1555 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1556 if (buffer32 == NULL)
1557 {
1558 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1559 exit(1);
1560 }
1561 }
1562
1563 pp = buffer32;
1564
1565 if (!utf && !data)
1566 {
1567 while (len-- > 0) *pp++ = *p++;
1568 }
1569
1570 else
1571 {
1572 pcre_uint32 c = 0;
1573 while (len > 0)
1574 {
1575 int chlen = utf82ord(p, &c);
1576 if (chlen <= 0) return -1;
1577 if (utf)
1578 {
1579 if (c > 0x10ffff) return -2;
1580 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1581 }
1582
1583 p += chlen;
1584 len -= chlen;
1585 *pp++ = c;
1586 }
1587 }
1588
1589 *pp = 0;
1590 return pp - buffer32;
1591 }
1592
1593 /* Check that a 32-bit character string is valid UTF-32.
1594
1595 Arguments:
1596 string points to the string
1597 length length of string, or -1 if the string is zero-terminated
1598
1599 Returns: TRUE if the string is a valid UTF-32 string
1600 FALSE otherwise
1601 */
1602
1603 #ifdef SUPPORT_UTF
1604 static BOOL
1605 valid_utf32(pcre_uint32 *string, int length)
1606 {
1607 register pcre_uint32 *p;
1608 register pcre_uint32 c;
1609
1610 for (p = string; length-- > 0; p++)
1611 {
1612 c = *p;
1613
1614 if (c > 0x10ffffu)
1615 return FALSE;
1616
1617 /* A surrogate */
1618 if ((c & 0xfffff800u) == 0xd800u)
1619 return FALSE;
1620
1621 /* Non-character */
1622 if ((c & 0xfffeu) == 0xfffeu ||
1623 c >= 0xfdd0u && c <= 0xfdefu)
1624 return FALSE;
1625 }
1626
1627 return TRUE;
1628 }
1629 #endif /* SUPPORT_UTF */
1630
1631 #endif
1632
1633 /*************************************************
1634 * Read or extend an input line *
1635 *************************************************/
1636
1637 /* Input lines are read into buffer, but both patterns and data lines can be
1638 continued over multiple input lines. In addition, if the buffer fills up, we
1639 want to automatically expand it so as to be able to handle extremely large
1640 lines that are needed for certain stress tests. When the input buffer is
1641 expanded, the other two buffers must also be expanded likewise, and the
1642 contents of pbuffer, which are a copy of the input for callouts, must be
1643 preserved (for when expansion happens for a data line). This is not the most
1644 optimal way of handling this, but hey, this is just a test program!
1645
1646 Arguments:
1647 f the file to read
1648 start where in buffer to start (this *must* be within buffer)
1649 prompt for stdin or readline()
1650
1651 Returns: pointer to the start of new data
1652 could be a copy of start, or could be moved
1653 NULL if no data read and EOF reached
1654 */
1655
1656 static pcre_uint8 *
1657 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1658 {
1659 pcre_uint8 *here = start;
1660
1661 for (;;)
1662 {
1663 size_t rlen = (size_t)(buffer_size - (here - buffer));
1664
1665 if (rlen > 1000)
1666 {
1667 int dlen;
1668
1669 /* If libreadline or libedit support is required, use readline() to read a
1670 line if the input is a terminal. Note that readline() removes the trailing
1671 newline, so we must put it back again, to be compatible with fgets(). */
1672
1673 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1674 if (isatty(fileno(f)))
1675 {
1676 size_t len;
1677 char *s = readline(prompt);
1678 if (s == NULL) return (here == start)? NULL : start;
1679 len = strlen(s);
1680 if (len > 0) add_history(s);
1681 if (len > rlen - 1) len = rlen - 1;
1682 memcpy(here, s, len);
1683 here[len] = '\n';
1684 here[len+1] = 0;
1685 free(s);
1686 }
1687 else
1688 #endif
1689
1690 /* Read the next line by normal means, prompting if the file is stdin. */
1691
1692 {
1693 if (f == stdin) printf("%s", prompt);
1694 if (fgets((char *)here, rlen, f) == NULL)
1695 return (here == start)? NULL : start;
1696 }
1697
1698 dlen = (int)strlen((char *)here);
1699 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1700 here += dlen;
1701 }
1702
1703 else
1704 {
1705 int new_buffer_size = 2*buffer_size;
1706 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1707 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1708
1709 if (new_buffer == NULL || new_pbuffer == NULL)
1710 {
1711 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1712 exit(1);
1713 }
1714
1715 memcpy(new_buffer, buffer, buffer_size);
1716 memcpy(new_pbuffer, pbuffer, buffer_size);
1717
1718 buffer_size = new_buffer_size;
1719
1720 start = new_buffer + (start - buffer);
1721 here = new_buffer + (here - buffer);
1722
1723 free(buffer);
1724 free(pbuffer);
1725
1726 buffer = new_buffer;
1727 pbuffer = new_pbuffer;
1728 }
1729 }
1730
1731 return NULL; /* Control never gets here */
1732 }
1733
1734
1735
1736 /*************************************************
1737 * Read number from string *
1738 *************************************************/
1739
1740 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1741 around with conditional compilation, just do the job by hand. It is only used
1742 for unpicking arguments, so just keep it simple.
1743
1744 Arguments:
1745 str string to be converted
1746 endptr where to put the end pointer
1747
1748 Returns: the unsigned long
1749 */
1750
1751 static int
1752 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1753 {
1754 int result = 0;
1755 while(*str != 0 && isspace(*str)) str++;
1756 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1757 *endptr = str;
1758 return(result);
1759 }
1760
1761
1762
1763 /*************************************************
1764 * Print one character *
1765 *************************************************/
1766
1767 /* Print a single character either literally, or as a hex escape. */
1768
1769 static int pchar(pcre_uint32 c, FILE *f)
1770 {
1771 int n;
1772 if (PRINTOK(c))
1773 {
1774 if (f != NULL) fprintf(f, "%c", c);
1775 return 1;
1776 }
1777
1778 if (c < 0x100)
1779 {
1780 if (use_utf)
1781 {
1782 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1783 return 6;
1784 }
1785 else
1786 {
1787 if (f != NULL) fprintf(f, "\\x%02x", c);
1788 return 4;
1789 }
1790 }
1791
1792 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1793 return n >= 0 ? n : 0;
1794 }
1795
1796
1797
1798 #ifdef SUPPORT_PCRE8
1799 /*************************************************
1800 * Print 8-bit character string *
1801 *************************************************/
1802
1803 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1804 If handed a NULL file, just counts chars without printing. */
1805
1806 static int pchars(pcre_uint8 *p, int length, FILE *f)
1807 {
1808 pcre_uint32 c = 0;
1809 int yield = 0;
1810
1811 if (length < 0)
1812 length = strlen((char *)p);
1813
1814 while (length-- > 0)
1815 {
1816 #if !defined NOUTF
1817 if (use_utf)
1818 {
1819 int rc = utf82ord(p, &c);
1820 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1821 {
1822 length -= rc - 1;
1823 p += rc;
1824 yield += pchar(c, f);
1825 continue;
1826 }
1827 }
1828 #endif
1829 c = *p++;
1830 yield += pchar(c, f);
1831 }
1832
1833 return yield;
1834 }
1835 #endif
1836
1837
1838
1839 #ifdef SUPPORT_PCRE16
1840 /*************************************************
1841 * Find length of 0-terminated 16-bit string *
1842 *************************************************/
1843
1844 static int strlen16(PCRE_SPTR16 p)
1845 {
1846 int len = 0;
1847 while (*p++ != 0) len++;
1848 return len;
1849 }
1850 #endif /* SUPPORT_PCRE16 */
1851
1852
1853
1854 #ifdef SUPPORT_PCRE32
1855 /*************************************************
1856 * Find length of 0-terminated 32-bit string *
1857 *************************************************/
1858
1859 static int strlen32(PCRE_SPTR32 p)
1860 {
1861 int len = 0;
1862 while (*p++ != 0) len++;
1863 return len;
1864 }
1865 #endif /* SUPPORT_PCRE32 */
1866
1867
1868
1869 #ifdef SUPPORT_PCRE16
1870 /*************************************************
1871 * Print 16-bit character string *
1872 *************************************************/
1873
1874 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1875 If handed a NULL file, just counts chars without printing. */
1876
1877 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1878 {
1879 int yield = 0;
1880
1881 if (length < 0)
1882 length = strlen16(p);
1883
1884 while (length-- > 0)
1885 {
1886 pcre_uint32 c = *p++ & 0xffff;
1887 #if !defined NOUTF
1888 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1889 {
1890 int d = *p & 0xffff;
1891 if (d >= 0xDC00 && d < 0xDFFF)
1892 {
1893 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1894 length--;
1895 p++;
1896 }
1897 }
1898 #endif
1899 yield += pchar(c, f);
1900 }
1901
1902 return yield;
1903 }
1904 #endif /* SUPPORT_PCRE16 */
1905
1906
1907
1908 #ifdef SUPPORT_PCRE32
1909 /*************************************************
1910 * Print 32-bit character string *
1911 *************************************************/
1912
1913 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
1914 If handed a NULL file, just counts chars without printing. */
1915
1916 #define UTF32_MASK (0x1fffffu)
1917
1918 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
1919 {
1920 int yield = 0;
1921
1922 if (length < 0)
1923 length = strlen32(p);
1924
1925 while (length-- > 0)
1926 {
1927 pcre_uint32 c = *p++;
1928 if (utf) c &= UTF32_MASK;
1929 yield += pchar(c, f);
1930 }
1931
1932 return yield;
1933 }
1934 #endif /* SUPPORT_PCRE32 */
1935
1936
1937
1938 #ifdef SUPPORT_PCRE8
1939 /*************************************************
1940 * Read a capture name (8-bit) and check it *
1941 *************************************************/
1942
1943 static pcre_uint8 *
1944 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1945 {
1946 pcre_uint8 *npp = *pp;
1947 while (isalnum(*p)) *npp++ = *p++;
1948 *npp++ = 0;
1949 *npp = 0;
1950 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1951 {
1952 fprintf(outfile, "no parentheses with name \"");
1953 PCHARSV(*pp, 0, -1, outfile);
1954 fprintf(outfile, "\"\n");
1955 }
1956
1957 *pp = npp;
1958 return p;
1959 }
1960 #endif /* SUPPORT_PCRE8 */
1961
1962
1963
1964 #ifdef SUPPORT_PCRE16
1965 /*************************************************
1966 * Read a capture name (16-bit) and check it *
1967 *************************************************/
1968
1969 /* Note that the text being read is 8-bit. */
1970
1971 static pcre_uint8 *
1972 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1973 {
1974 pcre_uint16 *npp = *pp;
1975 while (isalnum(*p)) *npp++ = *p++;
1976 *npp++ = 0;
1977 *npp = 0;
1978 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1979 {
1980 fprintf(outfile, "no parentheses with name \"");
1981 PCHARSV(*pp, 0, -1, outfile);
1982 fprintf(outfile, "\"\n");
1983 }
1984 *pp = npp;
1985 return p;
1986 }
1987 #endif /* SUPPORT_PCRE16 */
1988
1989
1990
1991 #ifdef SUPPORT_PCRE32
1992 /*************************************************
1993 * Read a capture name (32-bit) and check it *
1994 *************************************************/
1995
1996 /* Note that the text being read is 8-bit. */
1997
1998 static pcre_uint8 *
1999 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2000 {
2001 pcre_uint32 *npp = *pp;
2002 while (isalnum(*p)) *npp++ = *p++;
2003 *npp++ = 0;
2004 *npp = 0;
2005 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2006 {
2007 fprintf(outfile, "no parentheses with name \"");
2008 PCHARSV(*pp, 0, -1, outfile);
2009 fprintf(outfile, "\"\n");
2010 }
2011 *pp = npp;
2012 return p;
2013 }
2014 #endif /* SUPPORT_PCRE32 */
2015
2016
2017
2018 /*************************************************
2019 * Callout function *
2020 *************************************************/
2021
2022 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2023 the match. Yield zero unless more callouts than the fail count, or the callout
2024 data is not zero. */
2025
2026 static int callout(pcre_callout_block *cb)
2027 {
2028 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2029 int i, pre_start, post_start, subject_length;
2030
2031 if (callout_extra)
2032 {
2033 fprintf(f, "Callout %d: last capture = %d\n",
2034 cb->callout_number, cb->capture_last);
2035
2036 for (i = 0; i < cb->capture_top * 2; i += 2)
2037 {
2038 if (cb->offset_vector[i] < 0)
2039 fprintf(f, "%2d: <unset>\n", i/2);
2040 else
2041 {
2042 fprintf(f, "%2d: ", i/2);
2043 PCHARSV(cb->subject, cb->offset_vector[i],
2044 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2045 fprintf(f, "\n");
2046 }
2047 }
2048 }
2049
2050 /* Re-print the subject in canonical form, the first time or if giving full
2051 datails. On subsequent calls in the same match, we use pchars just to find the
2052 printed lengths of the substrings. */
2053
2054 if (f != NULL) fprintf(f, "--->");
2055
2056 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2057 PCHARS(post_start, cb->subject, cb->start_match,
2058 cb->current_position - cb->start_match, f);
2059
2060 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2061
2062 PCHARSV(cb->subject, cb->current_position,
2063 cb->subject_length - cb->current_position, f);
2064
2065 if (f != NULL) fprintf(f, "\n");
2066
2067 /* Always print appropriate indicators, with callout number if not already
2068 shown. For automatic callouts, show the pattern offset. */
2069
2070 if (cb->callout_number == 255)
2071 {
2072 fprintf(outfile, "%+3d ", cb->pattern_position);
2073 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2074 }
2075 else
2076 {
2077 if (callout_extra) fprintf(outfile, " ");
2078 else fprintf(outfile, "%3d ", cb->callout_number);
2079 }
2080
2081 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2082 fprintf(outfile, "^");
2083
2084 if (post_start > 0)
2085 {
2086 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2087 fprintf(outfile, "^");
2088 }
2089
2090 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2091 fprintf(outfile, " ");
2092
2093 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2094 pbuffer + cb->pattern_position);
2095
2096 fprintf(outfile, "\n");
2097 first_callout = 0;
2098
2099 if (cb->mark != last_callout_mark)
2100 {
2101 if (cb->mark == NULL)
2102 fprintf(outfile, "Latest Mark: <unset>\n");
2103 else
2104 {
2105 fprintf(outfile, "Latest Mark: ");
2106 PCHARSV(cb->mark, 0, -1, outfile);
2107 putc('\n', outfile);
2108 }
2109 last_callout_mark = cb->mark;
2110 }
2111
2112 if (cb->callout_data != NULL)
2113 {
2114 int callout_data = *((int *)(cb->callout_data));
2115 if (callout_data != 0)
2116 {
2117 fprintf(outfile, "Callout data = %d\n", callout_data);
2118 return callout_data;
2119 }
2120 }
2121
2122 return (cb->callout_number != callout_fail_id)? 0 :
2123 (++callout_count >= callout_fail_count)? 1 : 0;
2124 }
2125
2126
2127 /*************************************************
2128 * Local malloc functions *
2129 *************************************************/
2130
2131 /* Alternative malloc function, to test functionality and save the size of a
2132 compiled re, which is the first store request that pcre_compile() makes. The
2133 show_malloc variable is set only during matching. */
2134
2135 static void *new_malloc(size_t size)
2136 {
2137 void *block = malloc(size);
2138 gotten_store = size;
2139 if (first_gotten_store == 0) first_gotten_store = size;
2140 if (show_malloc)
2141 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2142 return block;
2143 }
2144
2145 static void new_free(void *block)
2146 {
2147 if (show_malloc)
2148 fprintf(outfile, "free %p\n", block);
2149 free(block);
2150 }
2151
2152 /* For recursion malloc/free, to test stacking calls */
2153
2154 static void *stack_malloc(size_t size)
2155 {
2156 void *block = malloc(size);
2157 if (show_malloc)
2158 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2159 return block;
2160 }
2161
2162 static void stack_free(void *block)
2163 {
2164 if (show_malloc)
2165 fprintf(outfile, "stack_free %p\n", block);
2166 free(block);
2167 }
2168
2169
2170 /*************************************************
2171 * Call pcre_fullinfo() *
2172 *************************************************/
2173
2174 /* Get one piece of information from the pcre_fullinfo() function. When only
2175 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2176 value, but the code is defensive.
2177
2178 Arguments:
2179 re compiled regex
2180 study study data
2181 option PCRE_INFO_xxx option
2182 ptr where to put the data
2183
2184 Returns: 0 when OK, < 0 on error
2185 */
2186
2187 static int
2188 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2189 {
2190 int rc;
2191
2192 if (pcre_mode == PCRE32_MODE)
2193 #ifdef SUPPORT_PCRE32
2194 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2195 #else
2196 rc = PCRE_ERROR_BADMODE;
2197 #endif
2198 else if (pcre_mode == PCRE16_MODE)
2199 #ifdef SUPPORT_PCRE16
2200 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2201 #else
2202 rc = PCRE_ERROR_BADMODE;
2203 #endif
2204 else
2205 #ifdef SUPPORT_PCRE8
2206 rc = pcre_fullinfo(re, study, option, ptr);
2207 #else
2208 rc = PCRE_ERROR_BADMODE;
2209 #endif
2210
2211 if (rc < 0)
2212 {
2213 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2214 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2215 if (rc == PCRE_ERROR_BADMODE)
2216 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2217 "%d-bit mode\n", 8 * CHAR_SIZE,
2218 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2219 }
2220
2221 return rc;
2222 }
2223
2224
2225
2226 /*************************************************
2227 * Swap byte functions *
2228 *************************************************/
2229
2230 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2231 value, respectively.
2232
2233 Arguments:
2234 value any number
2235
2236 Returns: the byte swapped value
2237 */
2238
2239 static pcre_uint32
2240 swap_uint32(pcre_uint32 value)
2241 {
2242 return ((value & 0x000000ff) << 24) |
2243 ((value & 0x0000ff00) << 8) |
2244 ((value & 0x00ff0000) >> 8) |
2245 (value >> 24);
2246 }
2247
2248 static pcre_uint16
2249 swap_uint16(pcre_uint16 value)
2250 {
2251 return (value >> 8) | (value << 8);
2252 }
2253
2254
2255
2256 /*************************************************
2257 * Flip bytes in a compiled pattern *
2258 *************************************************/
2259
2260 /* This function is called if the 'F' option was present on a pattern that is
2261 to be written to a file. We flip the bytes of all the integer fields in the
2262 regex data block and the study block. In 16-bit mode this also flips relevant
2263 bytes in the pattern itself. This is to make it possible to test PCRE's
2264 ability to reload byte-flipped patterns, e.g. those compiled on a different
2265 architecture. */
2266
2267 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2268 static void
2269 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2270 {
2271 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2272 #ifdef SUPPORT_PCRE16
2273 int op;
2274 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2275 int length = re->name_count * re->name_entry_size;
2276 #ifdef SUPPORT_UTF
2277 BOOL utf = (re->options & PCRE_UTF16) != 0;
2278 BOOL utf16_char = FALSE;
2279 #endif /* SUPPORT_UTF */
2280 #endif /* SUPPORT_PCRE16 */
2281
2282 /* Always flip the bytes in the main data block and study blocks. */
2283
2284 re->magic_number = REVERSED_MAGIC_NUMBER;
2285 re->size = swap_uint32(re->size);
2286 re->options = swap_uint32(re->options);
2287 re->flags = swap_uint16(re->flags);
2288 re->top_bracket = swap_uint16(re->top_bracket);
2289 re->top_backref = swap_uint16(re->top_backref);
2290 re->first_char = swap_uint16(re->first_char);
2291 re->req_char = swap_uint16(re->req_char);
2292 re->name_table_offset = swap_uint16(re->name_table_offset);
2293 re->name_entry_size = swap_uint16(re->name_entry_size);
2294 re->name_count = swap_uint16(re->name_count);
2295
2296 if (extra != NULL)
2297 {
2298 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2299 rsd->size = swap_uint32(rsd->size);
2300 rsd->flags = swap_uint32(rsd->flags);
2301 rsd->minlength = swap_uint32(rsd->minlength);
2302 }
2303
2304 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2305 in the name table, if present, and then in the pattern itself. */
2306
2307 #ifdef SUPPORT_PCRE16
2308 if (pcre_mode != PCRE16_MODE) return;
2309
2310 while(TRUE)
2311 {
2312 /* Swap previous characters. */
2313 while (length-- > 0)
2314 {
2315 *ptr = swap_uint16(*ptr);
2316 ptr++;
2317 }
2318 #ifdef SUPPORT_UTF
2319 if (utf16_char)
2320 {
2321 if ((ptr[-1] & 0xfc00) == 0xd800)
2322 {
2323 /* We know that there is only one extra character in UTF-16. */
2324 *ptr = swap_uint16(*ptr);
2325 ptr++;
2326 }
2327 }
2328 utf16_char = FALSE;
2329 #endif /* SUPPORT_UTF */
2330
2331 /* Get next opcode. */
2332
2333 length = 0;
2334 op = *ptr;
2335 *ptr++ = swap_uint16(op);
2336
2337 switch (op)
2338 {
2339 case OP_END:
2340 return;
2341
2342 #ifdef SUPPORT_UTF
2343 case OP_CHAR:
2344 case OP_CHARI:
2345 case OP_NOT:
2346 case OP_NOTI:
2347 case OP_STAR:
2348 case OP_MINSTAR:
2349 case OP_PLUS:
2350 case OP_MINPLUS:
2351 case OP_QUERY:
2352 case OP_MINQUERY:
2353 case OP_UPTO:
2354 case OP_MINUPTO:
2355 case OP_EXACT:
2356 case OP_POSSTAR:
2357 case OP_POSPLUS:
2358 case OP_POSQUERY:
2359 case OP_POSUPTO:
2360 case OP_STARI:
2361 case OP_MINSTARI:
2362 case OP_PLUSI:
2363 case OP_MINPLUSI:
2364 case OP_QUERYI:
2365 case OP_MINQUERYI:
2366 case OP_UPTOI:
2367 case OP_MINUPTOI:
2368 case OP_EXACTI:
2369 case OP_POSSTARI:
2370 case OP_POSPLUSI:
2371 case OP_POSQUERYI:
2372 case OP_POSUPTOI:
2373 case OP_NOTSTAR:
2374 case OP_NOTMINSTAR:
2375 case OP_NOTPLUS:
2376 case OP_NOTMINPLUS:
2377 case OP_NOTQUERY:
2378 case OP_NOTMINQUERY:
2379 case OP_NOTUPTO:
2380 case OP_NOTMINUPTO:
2381 case OP_NOTEXACT:
2382 case OP_NOTPOSSTAR:
2383 case OP_NOTPOSPLUS:
2384 case OP_NOTPOSQUERY:
2385 case OP_NOTPOSUPTO:
2386 case OP_NOTSTARI:
2387 case OP_NOTMINSTARI:
2388 case OP_NOTPLUSI:
2389 case OP_NOTMINPLUSI:
2390 case OP_NOTQUERYI:
2391 case OP_NOTMINQUERYI:
2392 case OP_NOTUPTOI:
2393 case OP_NOTMINUPTOI:
2394 case OP_NOTEXACTI:
2395 case OP_NOTPOSSTARI:
2396 case OP_NOTPOSPLUSI:
2397 case OP_NOTPOSQUERYI:
2398 case OP_NOTPOSUPTOI:
2399 if (utf) utf16_char = TRUE;
2400 #endif
2401 /* Fall through. */
2402
2403 default:
2404 length = OP_lengths16[op] - 1;
2405 break;
2406
2407 case OP_CLASS:
2408 case OP_NCLASS:
2409 /* Skip the character bit map. */
2410 ptr += 32/sizeof(pcre_uint16);
2411 length = 0;
2412 break;
2413
2414 case OP_XCLASS:
2415 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2416 if (LINK_SIZE > 1)
2417 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2418 - (1 + LINK_SIZE + 1));
2419 else
2420 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2421
2422 /* Reverse the size of the XCLASS instance. */
2423 *ptr = swap_uint16(*ptr);
2424 ptr++;
2425 if (LINK_SIZE > 1)
2426 {
2427 *ptr = swap_uint16(*ptr);
2428 ptr++;
2429 }
2430
2431 op = *ptr;
2432 *ptr = swap_uint16(op);
2433 ptr++;
2434 if ((op & XCL_MAP) != 0)
2435 {
2436 /* Skip the character bit map. */
2437 ptr += 32/sizeof(pcre_uint16);
2438 length -= 32/sizeof(pcre_uint16);
2439 }
2440 break;
2441 }
2442 }
2443 /* Control should never reach here in 16 bit mode. */
2444 #endif /* SUPPORT_PCRE16 */
2445 }
2446 #endif /* SUPPORT_PCRE[8|16] */
2447
2448
2449
2450 #if defined SUPPORT_PCRE32
2451 static void
2452 regexflip_32(pcre *ere, pcre_extra *extra)
2453 {
2454 real_pcre32 *re = (real_pcre32 *)ere;
2455 int op;
2456 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2457 int length = re->name_count * re->name_entry_size;
2458 #ifdef SUPPORT_UTF
2459 BOOL utf = (re->options & PCRE_UTF32) != 0;
2460 #endif /* SUPPORT_UTF */
2461
2462 /* Always flip the bytes in the main data block and study blocks. */
2463
2464 re->magic_number = REVERSED_MAGIC_NUMBER;
2465 re->size = swap_uint32(re->size);
2466 re->options = swap_uint32(re->options);
2467 re->flags = swap_uint16(re->flags);
2468 re->top_bracket = swap_uint16(re->top_bracket);
2469 re->top_backref = swap_uint16(re->top_backref);
2470 re->first_char = swap_uint32(re->first_char);
2471 re->req_char = swap_uint32(re->req_char);
2472 re->name_table_offset = swap_uint16(re->name_table_offset);
2473 re->name_entry_size = swap_uint16(re->name_entry_size);
2474 re->name_count = swap_uint16(re->name_count);
2475
2476 if (extra != NULL)
2477 {
2478 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2479 rsd->size = swap_uint32(rsd->size);
2480 rsd->flags = swap_uint32(rsd->flags);
2481 rsd->minlength = swap_uint32(rsd->minlength);
2482 }
2483
2484 /* In 32-bit mode we must swap bytes
2485 in the name table, if present, and then in the pattern itself. */
2486
2487 while(TRUE)
2488 {
2489 /* Swap previous characters. */
2490 while (length-- > 0)
2491 {
2492 *ptr = swap_uint32(*ptr);
2493 ptr++;
2494 }
2495
2496 /* Get next opcode. */
2497
2498 length = 0;
2499 op = *ptr;
2500 *ptr++ = swap_uint32(op);
2501
2502 switch (op)
2503 {
2504 case OP_END:
2505 return;
2506
2507 default:
2508 length = OP_lengths32[op] - 1;
2509 break;
2510
2511 case OP_CLASS:
2512 case OP_NCLASS:
2513 /* Skip the character bit map. */
2514 ptr += 32/sizeof(pcre_uint32);
2515 length = 0;
2516 break;
2517
2518 case OP_XCLASS:
2519 /* LINK_SIZE can only be 1 in 32-bit mode. */
2520 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2521
2522 /* Reverse the size of the XCLASS instance. */
2523 *ptr = swap_uint32(*ptr);
2524 ptr++;
2525
2526 op = *ptr;
2527 *ptr = swap_uint32(op);
2528 ptr++;
2529 if ((op & XCL_MAP) != 0)
2530 {
2531 /* Skip the character bit map. */
2532 ptr += 32/sizeof(pcre_uint32);
2533 length -= 32/sizeof(pcre_uint32);
2534 }
2535 break;
2536 }
2537 }
2538 /* Control should never reach here in 32 bit mode. */
2539 }
2540
2541 #endif /* SUPPORT_PCRE32 */
2542
2543
2544
2545 static void
2546 regexflip(pcre *ere, pcre_extra *extra)
2547 {
2548 #if defined SUPPORT_PCRE32
2549 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2550 regexflip_32(ere, extra);
2551 #endif
2552 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2553 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2554 regexflip8_or_16(ere, extra);
2555 #endif
2556 }
2557
2558
2559
2560 /*************************************************
2561 * Check match or recursion limit *
2562 *************************************************/
2563
2564 static int
2565 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2566 int start_offset, int options, int *use_offsets, int use_size_offsets,
2567 int flag, unsigned long int *limit, int errnumber, const char *msg)
2568 {
2569 int count;
2570 int min = 0;
2571 int mid = 64;
2572 int max = -1;
2573
2574 extra->flags |= flag;
2575
2576 for (;;)
2577 {
2578 *limit = mid;
2579
2580 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2581 use_offsets, use_size_offsets);
2582
2583 if (count == errnumber)
2584 {
2585 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2586 min = mid;
2587 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2588 }
2589
2590 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2591 count == PCRE_ERROR_PARTIAL)
2592 {
2593 if (mid == min + 1)
2594 {
2595 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2596 break;
2597 }
2598 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2599 max = mid;
2600 mid = (min + mid)/2;
2601 }
2602 else break; /* Some other error */
2603 }
2604
2605 extra->flags &= ~flag;
2606 return count;
2607 }
2608
2609
2610
2611 /*************************************************
2612 * Case-independent strncmp() function *
2613 *************************************************/
2614
2615 /*
2616 Arguments:
2617 s first string
2618 t second string
2619 n number of characters to compare
2620
2621 Returns: < 0, = 0, or > 0, according to the comparison
2622 */
2623
2624 static int
2625 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2626 {
2627 while (n--)
2628 {
2629 int c = tolower(*s++) - tolower(*t++);
2630 if (c) return c;
2631 }
2632 return 0;
2633 }
2634
2635
2636
2637 /*************************************************
2638 * Check newline indicator *
2639 *************************************************/
2640
2641 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2642 a message and return 0 if there is no match.
2643
2644 Arguments:
2645 p points after the leading '<'
2646 f file for error message
2647
2648 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2649 */
2650
2651 static int
2652 check_newline(pcre_uint8 *p, FILE *f)
2653 {
2654 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2655 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2656 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2657 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2658 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2659 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2660 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2661 fprintf(f, "Unknown newline type at: <%s\n", p);
2662 return 0;
2663 }
2664
2665
2666
2667 /*************************************************
2668 * Usage function *
2669 *************************************************/
2670
2671 static void
2672 usage(void)
2673 {
2674 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2675 printf("Input and output default to stdin and stdout.\n");
2676 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2677 printf("If input is a terminal, readline() is used to read from it.\n");
2678 #else
2679 printf("This version of pcretest is not linked with readline().\n");
2680 #endif
2681 printf("\nOptions:\n");
2682 #ifdef SUPPORT_PCRE16
2683 printf(" -16 use the 16-bit library\n");
2684 #endif
2685 #ifdef SUPPORT_PCRE32
2686 printf(" -32 use the 32-bit library\n");
2687 #endif
2688 printf(" -b show compiled code\n");
2689 printf(" -C show PCRE compile-time options and exit\n");
2690 printf(" -C arg show a specific compile-time option\n");
2691 printf(" and exit with its value. The arg can be:\n");
2692 printf(" linksize internal link size [2, 3, 4]\n");
2693 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2694 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2695 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2696 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2697 printf(" ucp Unicode Properties supported [0, 1]\n");
2698 printf(" jit Just-in-time compiler supported [0, 1]\n");
2699 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2700 printf(" -d debug: show compiled code and information (-b and -i)\n");
2701 #if !defined NODFA
2702 printf(" -dfa force DFA matching for all subjects\n");
2703 #endif
2704 printf(" -help show usage information\n");
2705 printf(" -i show information about compiled patterns\n"
2706 " -M find MATCH_LIMIT minimum for each subject\n"
2707 " -m output memory used information\n"
2708 " -o <n> set size of offsets vector to <n>\n");
2709 #if !defined NOPOSIX
2710 printf(" -p use POSIX interface\n");
2711 #endif
2712 printf(" -q quiet: do not output PCRE version number at start\n");
2713 printf(" -S <n> set stack size to <n> megabytes\n");
2714 printf(" -s force each pattern to be studied at basic level\n"
2715 " -s+ force each pattern to be studied, using JIT if available\n"
2716 " -s++ ditto, verifying when JIT was actually used\n"
2717 " -s+n force each pattern to be studied, using JIT if available,\n"
2718 " where 1 <= n <= 7 selects JIT options\n"
2719 " -s++n ditto, verifying when JIT was actually used\n"
2720 " -t time compilation and execution\n");
2721 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2722 printf(" -tm time execution (matching) only\n");
2723 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2724 }
2725
2726
2727
2728 /*************************************************
2729 * Main Program *
2730 *************************************************/
2731
2732 /* Read lines from named file or stdin and write to named file or stdout; lines
2733 consist of a regular expression, in delimiters and optionally followed by
2734 options, followed by a set of test data, terminated by an empty line. */
2735
2736 int main(int argc, char **argv)
2737 {
2738 FILE *infile = stdin;
2739 const char *version;
2740 int options = 0;
2741 int study_options = 0;
2742 int default_find_match_limit = FALSE;
2743 int op = 1;
2744 int timeit = 0;
2745 int timeitm = 0;
2746 int showinfo = 0;
2747 int showstore = 0;
2748 int force_study = -1;
2749 int force_study_options = 0;
2750 int quiet = 0;
2751 int size_offsets = 45;
2752 int size_offsets_max;
2753 int *offsets = NULL;
2754 int debug = 0;
2755 int done = 0;
2756 int all_use_dfa = 0;
2757 int verify_jit = 0;
2758 int yield = 0;
2759 int mask_utf32 = 0;
2760 int stack_size;
2761 pcre_uint8 *dbuffer = NULL;
2762 size_t dbuffer_size = 1u << 14;
2763
2764 #if !defined NOPOSIX
2765 int posix = 0;
2766 #endif
2767 #if !defined NODFA
2768 int *dfa_workspace = NULL;
2769 #endif
2770
2771 pcre_jit_stack *jit_stack = NULL;
2772
2773 /* These vectors store, end-to-end, a list of zero-terminated captured
2774 substring names, each list itself being terminated by an empty name. Assume
2775 that 1024 is plenty long enough for the few names we'll be testing. It is
2776 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2777 for the actual memory, to ensure alignment. */
2778
2779 pcre_uint32 copynames[1024];
2780 pcre_uint32 getnames[1024];
2781
2782 #ifdef SUPPORT_PCRE32
2783 pcre_uint32 *cn32ptr;
2784 pcre_uint32 *gn32ptr;
2785 #endif
2786
2787 #ifdef SUPPORT_PCRE16
2788 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2789 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2790 pcre_uint16 *cn16ptr;
2791 pcre_uint16 *gn16ptr;
2792 #endif
2793
2794 #ifdef SUPPORT_PCRE8
2795 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2796 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2797 pcre_uint8 *cn8ptr;
2798 pcre_uint8 *gn8ptr;
2799 #endif
2800
2801 /* Get buffers from malloc() so that valgrind will check their misuse when
2802 debugging. They grow automatically when very long lines are read. The 16-
2803 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2804
2805 buffer = (pcre_uint8 *)malloc(buffer_size);
2806 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2807
2808 /* The outfile variable is static so that new_malloc can use it. */
2809
2810 outfile = stdout;
2811
2812 /* The following _setmode() stuff is some Windows magic that tells its runtime
2813 library to translate CRLF into a single LF character. At least, that's what
2814 I've been told: never having used Windows I take this all on trust. Originally
2815 it set 0x8000, but then I was advised that _O_BINARY was better. */
2816
2817 #if defined(_WIN32) || defined(WIN32)
2818 _setmode( _fileno( stdout ), _O_BINARY );
2819 #endif
2820
2821 /* Get the version number: both pcre_version() and pcre16_version() give the
2822 same answer. We just need to ensure that we call one that is available. */
2823
2824 #if defined SUPPORT_PCRE8
2825 version = pcre_version();
2826 #elif defined SUPPORT_PCRE16
2827 version = pcre16_version();
2828 #elif defined SUPPORT_PCRE32
2829 version = pcre32_version();
2830 #endif
2831
2832 /* Scan options */
2833
2834 while (argc > 1 && argv[op][0] == '-')
2835 {
2836 pcre_uint8 *endptr;
2837 char *arg = argv[op];
2838
2839 if (strcmp(arg, "-m") == 0) showstore = 1;
2840 else if (strcmp(arg, "-s") == 0) force_study = 0;
2841
2842 else if (strncmp(arg, "-s+", 3) == 0)
2843 {
2844 arg += 3;
2845 if (*arg == '+') { arg++; verify_jit = TRUE; }
2846 force_study = 1;
2847 if (*arg == 0)
2848 force_study_options = jit_study_bits[6];
2849 else if (*arg >= '1' && *arg <= '7')
2850 force_study_options = jit_study_bits[*arg - '1'];
2851 else goto BAD_ARG;
2852 }
2853 else if (strcmp(arg, "-8") == 0)
2854 {
2855 #ifdef SUPPORT_PCRE8
2856 pcre_mode = PCRE8_MODE;
2857 #else
2858 printf("** This version of PCRE was built without 8-bit support\n");
2859 exit(1);
2860 #endif
2861 }
2862 else if (strcmp(arg, "-16") == 0)
2863 {
2864 #ifdef SUPPORT_PCRE16
2865 pcre_mode = PCRE16_MODE;
2866 #else
2867 printf("** This version of PCRE was built without 16-bit support\n");
2868 exit(1);
2869 #endif
2870 }
2871 else if (strcmp(arg, "-32") == 0 || strcmp(arg, "-32+") == 0)
2872 {
2873 #ifdef SUPPORT_PCRE32
2874 pcre_mode = PCRE32_MODE;
2875 mask_utf32 = (strcmp(arg, "-32+") == 0);
2876 #else
2877 printf("** This version of PCRE was built without 32-bit support\n");
2878 exit(1);
2879 #endif
2880 }
2881 else if (strcmp(arg, "-q") == 0) quiet = 1;
2882 else if (strcmp(arg, "-b") == 0) debug = 1;
2883 else if (strcmp(arg, "-i") == 0) showinfo = 1;
2884 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2885 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2886 #if !defined NODFA
2887 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2888 #endif
2889 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2890 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2891 *endptr == 0))
2892 {
2893 op++;
2894 argc--;
2895 }
2896 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2897 {
2898 int both = arg[2] == 0;
2899 int temp;
2900 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2901 *endptr == 0))
2902 {
2903 timeitm = temp;
2904 op++;
2905 argc--;
2906 }
2907 else timeitm = LOOPREPEAT;
2908 if (both) timeit = timeitm;
2909 }
2910 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2911 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2912 *endptr == 0))
2913 {
2914 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2915 printf("PCRE: -S not supported on this OS\n");
2916 exit(1);
2917 #else
2918 int rc;
2919 struct rlimit rlim;
2920 getrlimit(RLIMIT_STACK, &rlim);
2921 rlim.rlim_cur = stack_size * 1024 * 1024;
2922 rc = setrlimit(RLIMIT_STACK, &rlim);
2923 if (rc != 0)
2924 {
2925 printf("PCRE: setrlimit() failed with error %d\n", rc);
2926 exit(1);
2927 }
2928 op++;
2929 argc--;
2930 #endif
2931 }
2932 #if !defined NOPOSIX
2933 else if (strcmp(arg, "-p") == 0) posix = 1;
2934 #endif
2935 else if (strcmp(arg, "-C") == 0)
2936 {
2937 int rc;
2938 unsigned long int lrc;
2939
2940 if (argc > 2)
2941 {
2942 if (strcmp(argv[op + 1], "linksize") == 0)
2943 {
2944 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2945 printf("%d\n", rc);
2946 yield = rc;
2947 }
2948 else if (strcmp(argv[op + 1], "pcre8") == 0)
2949 {
2950 #ifdef SUPPORT_PCRE8
2951 printf("1\n");
2952 yield = 1;
2953 #else
2954 printf("0\n");
2955 yield = 0;
2956 #endif
2957 }
2958 else if (strcmp(argv[op + 1], "pcre16") == 0)
2959 {
2960 #ifdef SUPPORT_PCRE16
2961 printf("1\n");
2962 yield = 1;
2963 #else
2964 printf("0\n");
2965 yield = 0;
2966 #endif
2967 }
2968 else if (strcmp(argv[op + 1], "pcre32") == 0)
2969 {
2970 #ifdef SUPPORT_PCRE32
2971 printf("1\n");
2972 yield = 1;
2973 #else
2974 printf("0\n");
2975 yield = 0;
2976 #endif
2977 goto EXIT;
2978 }
2979 if (strcmp(argv[op + 1], "utf") == 0)
2980 {
2981 #ifdef SUPPORT_PCRE8
2982 if (pcre_mode == PCRE8_MODE)
2983 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2984 #endif
2985 #ifdef SUPPORT_PCRE16
2986 if (pcre_mode == PCRE16_MODE)
2987 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2988 #endif
2989 #ifdef SUPPORT_PCRE32
2990 if (pcre_mode == PCRE32_MODE)
2991 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
2992 #endif
2993 printf("%d\n", rc);
2994 yield = rc;
2995 goto EXIT;
2996 }
2997 else if (strcmp(argv[op + 1], "ucp") == 0)
2998 {
2999 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3000 printf("%d\n", rc);
3001 yield = rc;
3002 }
3003 else if (strcmp(argv[op + 1], "jit") == 0)
3004 {
3005 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3006 printf("%d\n", rc);
3007 yield = rc;
3008 }
3009 else if (strcmp(argv[op + 1], "newline") == 0)
3010 {
3011 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3012 print_newline_config(rc, TRUE);
3013 }
3014 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3015 {
3016 #ifdef EBCDIC
3017 printf("1\n");
3018 yield = 1;
3019 #else
3020 printf("0\n");
3021 #endif
3022 }
3023 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3024 {
3025 #ifdef EBCDIC
3026 printf("0x%02x\n", CHAR_LF);
3027 #else
3028 printf("0\n");
3029 #endif
3030 }
3031 else
3032 {
3033 printf("Unknown -C option: %s\n", argv[op + 1]);
3034 }
3035 goto EXIT;
3036 }
3037
3038 /* No argument for -C: output all configuration information. */
3039
3040 printf("PCRE version %s\n", version);
3041 printf("Compiled with\n");
3042
3043 #ifdef EBCDIC
3044 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3045 #endif
3046
3047 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3048 are set, either both UTFs are supported or both are not supported. */
3049
3050 #ifdef SUPPORT_PCRE8
3051 printf(" 8-bit support\n");
3052 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3053 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3054 #endif
3055 #ifdef SUPPORT_PCRE16
3056 printf(" 16-bit support\n");
3057 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3058 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3059 #endif
3060 #ifdef SUPPORT_PCRE32
3061 printf(" 32-bit support\n");
3062 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3063 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3064 #endif
3065
3066 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3067 printf(" %sUnicode properties support\n", rc? "" : "No ");
3068 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3069 if (rc)
3070 {
3071 const char *arch;
3072 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3073 printf(" Just-in-time compiler support: %s\n", arch);
3074 }
3075 else
3076 printf(" No just-in-time compiler support\n");
3077 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3078 print_newline_config(rc, FALSE);
3079 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3080 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3081 "all Unicode newlines");
3082 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3083 printf(" Internal link size = %d\n", rc);
3084 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3085 printf(" POSIX malloc threshold = %d\n", rc);
3086 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3087 printf(" Default match limit = %ld\n", lrc);
3088 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3089 printf(" Default recursion depth limit = %ld\n", lrc);
3090 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3091 printf(" Match recursion uses %s", rc? "stack" : "heap");
3092 if (showstore)
3093 {
3094 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3095 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3096 }
3097 printf("\n");
3098 goto EXIT;
3099 }
3100 else if (strcmp(arg, "-help") == 0 ||
3101 strcmp(arg, "--help") == 0)
3102 {
3103 usage();
3104 goto EXIT;
3105 }
3106 else
3107 {
3108 BAD_ARG:
3109 printf("** Unknown or malformed option %s\n", arg);
3110 usage();
3111 yield = 1;
3112 goto EXIT;
3113 }
3114 op++;
3115 argc--;
3116 }
3117
3118 /* Get the store for the offsets vector, and remember what it was */
3119
3120 size_offsets_max = size_offsets;
3121 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3122 if (offsets == NULL)
3123 {
3124 printf("** Failed to get %d bytes of memory for offsets vector\n",
3125 (int)(size_offsets_max * sizeof(int)));
3126 yield = 1;
3127 goto EXIT;
3128 }
3129
3130 /* Sort out the input and output files */
3131
3132 if (argc > 1)
3133 {
3134 infile = fopen(argv[op], INPUT_MODE);
3135 if (infile == NULL)
3136 {
3137 printf("** Failed to open %s\n", argv[op]);
3138 yield = 1;
3139 goto EXIT;
3140 }
3141 }
3142
3143 if (argc > 2)
3144 {
3145 outfile = fopen(argv[op+1], OUTPUT_MODE);
3146 if (outfile == NULL)
3147 {
3148 printf("** Failed to open %s\n", argv[op+1]);
3149 yield = 1;
3150 goto EXIT;
3151 }
3152 }
3153
3154 /* Set alternative malloc function */
3155
3156 #ifdef SUPPORT_PCRE8
3157 pcre_malloc = new_malloc;
3158 pcre_free = new_free;
3159 pcre_stack_malloc = stack_malloc;
3160 pcre_stack_free = stack_free;
3161 #endif
3162
3163 #ifdef SUPPORT_PCRE16
3164 pcre16_malloc = new_malloc;
3165 pcre16_free = new_free;
3166 pcre16_stack_malloc = stack_malloc;
3167 pcre16_stack_free = stack_free;
3168 #endif
3169
3170 #ifdef SUPPORT_PCRE32
3171 pcre32_malloc = new_malloc;
3172 pcre32_free = new_free;
3173 pcre32_stack_malloc = stack_malloc;
3174 pcre32_stack_free = stack_free;
3175 #endif
3176
3177 /* Heading line unless quiet, then prompt for first regex if stdin */
3178
3179 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3180
3181 /* Main loop */
3182
3183 while (!done)
3184 {
3185 pcre *re = NULL;
3186 pcre_extra *extra = NULL;
3187
3188 #if !defined NOPOSIX /* There are still compilers that require no indent */
3189 regex_t preg;
3190 int do_posix = 0;
3191 #endif
3192
3193 const char *error;
3194 pcre_uint8 *markptr;
3195 pcre_uint8 *p, *pp, *ppp;
3196 pcre_uint8 *to_file = NULL;
3197 const pcre_uint8 *tables = NULL;
3198 unsigned long int get_options;
3199 unsigned long int true_size, true_study_size = 0;
3200 size_t size, regex_gotten_store;
3201 int do_allcaps = 0;
3202 int do_mark = 0;
3203 int do_study = 0;
3204 int no_force_study = 0;
3205 int do_debug = debug;
3206 int do_G = 0;
3207 int do_g = 0;
3208 int do_showinfo = showinfo;
3209 int do_showrest = 0;
3210 int do_showcaprest = 0;
3211 int do_flip = 0;
3212 int erroroffset, len, delimiter, poffset;
3213
3214 #if !defined NODFA
3215 int dfa_matched = 0;
3216 #endif
3217
3218 use_utf = 0;
3219 debug_lengths = 1;
3220
3221 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3222 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3223 fflush(outfile);
3224
3225 p = buffer;
3226 while (isspace(*p)) p++;
3227 if (*p == 0) continue;
3228
3229 /* See if the pattern is to be loaded pre-compiled from a file. */
3230
3231 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3232 {
3233 pcre_uint32 magic;
3234 pcre_uint8 sbuf[8];
3235 FILE *f;
3236
3237 p++;
3238 if (*p == '!')
3239 {
3240 do_debug = TRUE;
3241 do_showinfo = TRUE;
3242 p++;
3243 }
3244
3245 pp = p + (int)strlen((char *)p);
3246 while (isspace(pp[-1])) pp--;
3247 *pp = 0;
3248
3249 f = fopen((char *)p, "rb");
3250 if (f == NULL)
3251 {
3252 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3253 continue;
3254 }
3255
3256 first_gotten_store = 0;
3257 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3258
3259 true_size =
3260 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3261 true_study_size =
3262 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3263
3264 re = (pcre *)new_malloc(true_size);
3265 if (re == NULL)
3266 {
3267 printf("** Failed to get %d bytes of memory for pcre object\n",
3268 (int)true_size);
3269 yield = 1;
3270 goto EXIT;
3271 }
3272 regex_gotten_store = first_gotten_store;
3273
3274 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3275
3276 magic = REAL_PCRE_MAGIC(re);
3277 if (magic != MAGIC_NUMBER)
3278 {
3279 if (swap_uint32(magic) == MAGIC_NUMBER)
3280 {
3281 do_flip = 1;
3282 }
3283 else
3284 {
3285 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3286 new_free(re);
3287 fclose(f);
3288 continue;
3289 }
3290 }
3291
3292 /* We hide the byte-invert info for little and big endian tests. */
3293 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3294 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3295
3296 /* Now see if there is any following study data. */
3297
3298 if (true_study_size != 0)
3299 {
3300 pcre_study_data *psd;
3301
3302 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3303 extra->flags = PCRE_EXTRA_STUDY_DATA;
3304
3305 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3306 extra->study_data = psd;
3307
3308 if (fread(psd, 1, true_study_size, f) != true_study_size)
3309 {
3310 FAIL_READ:
3311 fprintf(outfile, "Failed to read data from %s\n", p);
3312 if (extra != NULL)
3313 {
3314 PCRE_FREE_STUDY(extra);
3315 }
3316 new_free(re);
3317 fclose(f);
3318 continue;
3319 }
3320 fprintf(outfile, "Study data loaded from %s\n", p);
3321 do_study = 1; /* To get the data output if requested */
3322 }
3323 else fprintf(outfile, "No study data\n");
3324
3325 /* Flip the necessary bytes. */
3326 if (do_flip)
3327 {
3328 int rc;
3329 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3330 if (rc == PCRE_ERROR_BADMODE)
3331 {
3332 /* Simulate the result of the function call below. */
3333 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3334 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3335 PCRE_INFO_OPTIONS);
3336 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3337 "%d-bit mode\n", 8 * CHAR_SIZE,
3338 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3339 new_free(re);
3340 fclose(f);
3341 continue;
3342 }
3343 }
3344
3345 /* Need to know if UTF-8 for printing data strings. */
3346
3347 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3348 {
3349 new_free(re);
3350 fclose(f);
3351 continue;
3352 }
3353 use_utf = (get_options & PCRE_UTF8) != 0;
3354
3355 fclose(f);
3356 goto SHOW_INFO;
3357 }
3358
3359 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3360 the pattern; if it isn't complete, read more. */
3361
3362 delimiter = *p++;
3363
3364 if (isalnum(delimiter) || delimiter == '\\')
3365 {
3366 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3367 goto SKIP_DATA;
3368 }
3369
3370 pp = p;
3371 poffset = (int)(p - buffer);
3372
3373 for(;;)
3374 {
3375 while (*pp != 0)
3376 {
3377 if (*pp == '\\' && pp[1] != 0) pp++;
3378 else if (*pp == delimiter) break;
3379 pp++;
3380 }
3381 if (*pp != 0) break;
3382 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3383 {
3384 fprintf(outfile, "** Unexpected EOF\n");
3385 done = 1;
3386 goto CONTINUE;
3387 }
3388 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3389 }
3390
3391 /* The buffer may have moved while being extended; reset the start of data
3392 pointer to the correct relative point in the buffer. */
3393
3394 p = buffer + poffset;
3395
3396 /* If the first character after the delimiter is backslash, make
3397 the pattern end with backslash. This is purely to provide a way
3398 of testing for the error message when a pattern ends with backslash. */
3399
3400 if (pp[1] == '\\') *pp++ = '\\';
3401
3402 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3403 for callouts. */
3404
3405 *pp++ = 0;
3406 strcpy((char *)pbuffer, (char *)p);
3407
3408 /* Look for options after final delimiter */
3409
3410 options = 0;
3411 study_options = force_study_options;
3412 log_store = showstore; /* default from command line */
3413
3414 while (*pp != 0)
3415 {
3416 switch (*pp++)
3417 {
3418 case 'f': options |= PCRE_FIRSTLINE; break;
3419 case 'g': do_g = 1; break;
3420 case 'i': options |= PCRE_CASELESS; break;
3421 case 'm': options |= PCRE_MULTILINE; break;
3422 case 's': options |= PCRE_DOTALL; break;
3423 case 'x': options |= PCRE_EXTENDED; break;
3424
3425 case '+':
3426 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3427 break;
3428
3429 case '=': do_allcaps = 1; break;
3430 case 'A': options |= PCRE_ANCHORED; break;
3431 case 'B': do_debug = 1; break;
3432 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3433 case 'D': do_debug = do_showinfo = 1; break;
3434 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3435 case 'F': do_flip = 1; break;
3436 case 'G': do_G = 1; break;
3437 case 'I': do_showinfo = 1; break;
3438 case 'J': options |= PCRE_DUPNAMES; break;
3439 case 'K': do_mark = 1; break;
3440 case 'M': log_store = 1; break;
3441 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3442
3443 #if !defined NOPOSIX
3444 case 'P': do_posix = 1; break;
3445 #endif
3446
3447 case 'S':
3448 do_study = 1;
3449 for (;;)
3450 {
3451 switch (*pp++)
3452 {
3453 case 'S':
3454 do_study = 0;
3455 no_force_study = 1;
3456 break;
3457
3458 case '!':
3459 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3460 break;
3461
3462 case '+':
3463 if (*pp == '+')
3464 {
3465 verify_jit = TRUE;
3466 pp++;
3467 }
3468 if (*pp >= '1' && *pp <= '7')
3469 study_options |= jit_study_bits[*pp++ - '1'];
3470 else
3471 study_options |= jit_study_bits[6];
3472 break;
3473
3474 case '-':
3475 study_options &= ~PCRE_STUDY_ALLJIT;
3476 break;
3477
3478 default:
3479 pp--;
3480 goto ENDLOOP;
3481 }
3482 }
3483 ENDLOOP:
3484 break;
3485
3486 case 'U': options |= PCRE_UNGREEDY; break;
3487 case 'W': options |= PCRE_UCP; break;
3488 case 'X': options |= PCRE_EXTRA; break;
3489 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3490 case 'Z': debug_lengths = 0; break;
3491 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3492 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3493
3494 case 'T':
3495 switch (*pp++)
3496 {
3497 case '0': tables = tables0; break;
3498 case '1': tables = tables1; break;
3499
3500 case '\r':
3501 case '\n':
3502 case ' ':
3503 case 0:
3504 fprintf(outfile, "** Missing table number after /T\n");
3505 goto SKIP_DATA;
3506
3507 default:
3508 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3509 goto SKIP_DATA;
3510 }
3511 break;
3512
3513 case 'L':
3514 ppp = pp;
3515 /* The '\r' test here is so that it works on Windows. */
3516 /* The '0' test is just in case this is an unterminated line. */
3517 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3518 *ppp = 0;
3519 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3520 {
3521 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3522 goto SKIP_DATA;
3523 }
3524 locale_set = 1;
3525 tables = PCRE_MAKETABLES;
3526 pp = ppp;
3527 break;
3528
3529 case '>':
3530 to_file = pp;
3531 while (*pp != 0) pp++;
3532 while (isspace(pp[-1])) pp--;
3533 *pp = 0;
3534 break;
3535
3536 case '<':
3537 {
3538 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3539 {
3540 options |= PCRE_JAVASCRIPT_COMPAT;
3541 pp += 3;
3542 }
3543 else
3544 {
3545 int x = check_newline(pp, outfile);
3546 if (x == 0) goto SKIP_DATA;
3547 options |= x;
3548 while (*pp++ != '>');
3549 }
3550 }
3551 break;
3552
3553 case '\r': /* So that it works in Windows */
3554 case '\n':
3555 case ' ':
3556 break;
3557
3558 default:
3559 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3560 goto SKIP_DATA;
3561 }
3562 }
3563
3564 /* Handle compiling via the POSIX interface, which doesn't support the
3565 timing, showing, or debugging options, nor the ability to pass over
3566 local character tables. Neither does it have 16-bit support. */
3567
3568 #if !defined NOPOSIX
3569 if (posix || do_posix)
3570 {
3571 int rc;
3572 int cflags = 0;
3573
3574 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3575 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3576 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3577 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3578 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3579 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3580 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3581
3582 first_gotten_store = 0;
3583 rc = regcomp(&preg, (char *)p, cflags);
3584
3585 /* Compilation failed; go back for another re, skipping to blank line
3586 if non-interactive. */
3587
3588 if (rc != 0)
3589 {
3590 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3591 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3592 goto SKIP_DATA;
3593 }
3594 }
3595
3596 /* Handle compiling via the native interface */
3597
3598 else
3599 #endif /* !defined NOPOSIX */
3600
3601 {
3602 /* In 16- or 32-bit mode, convert the input. */
3603
3604 #ifdef SUPPORT_PCRE16
3605 if (pcre_mode == PCRE16_MODE)
3606 {
3607 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3608 {
3609 case -1:
3610 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3611 "converted to UTF-16\n");
3612 goto SKIP_DATA;
3613
3614 case -2:
3615 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3616 "cannot be converted to UTF-16\n");
3617 goto SKIP_DATA;
3618
3619 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3620 fprintf(outfile, "**Failed: character value greater than 0xffff "
3621 "cannot be converted to 16-bit in non-UTF mode\n");
3622 goto SKIP_DATA;
3623
3624 default:
3625 break;
3626 }
3627 p = (pcre_uint8 *)buffer16;
3628 }
3629 #endif
3630
3631 #ifdef SUPPORT_PCRE32
3632 if (pcre_mode == PCRE32_MODE)
3633 {
3634 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3635 {
3636 case -1:
3637 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3638 "converted to UTF-32\n");
3639 goto SKIP_DATA;
3640
3641 case -2:
3642 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3643 "cannot be converted to UTF-32\n");
3644 goto SKIP_DATA;
3645
3646 case -3:
3647 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3648 goto SKIP_DATA;
3649
3650 default:
3651 break;
3652 }
3653 p = (pcre_uint8 *)buffer32;
3654 }
3655 #endif
3656
3657 /* Compile many times when timing */
3658
3659 if (timeit > 0)
3660 {
3661 register int i;
3662 clock_t time_taken;
3663 clock_t start_time = clock();
3664 for (i = 0; i < timeit; i++)
3665 {
3666 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3667 if (re != NULL) free(re);
3668 }
3669 time_taken = clock() - start_time;
3670 fprintf(outfile, "Compile time %.4f milliseconds\n",
3671 (((double)time_taken * 1000.0) / (double)timeit) /
3672 (double)CLOCKS_PER_SEC);
3673 }
3674
3675 first_gotten_store = 0;
3676 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3677
3678 /* Compilation failed; go back for another re, skipping to blank line
3679 if non-interactive. */
3680
3681 if (re == NULL)
3682 {
3683 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3684 SKIP_DATA:
3685 if (infile != stdin)
3686 {
3687 for (;;)
3688 {
3689 if (extend_inputline(infile, buffer, NULL) == NULL)
3690 {
3691 done = 1;
3692 goto CONTINUE;
3693 }
3694 len = (int)strlen((char *)buffer);
3695 while (len > 0 && isspace(buffer[len-1])) len--;
3696 if (len == 0) break;
3697 }
3698 fprintf(outfile, "\n");
3699 }
3700 goto CONTINUE;
3701 }
3702
3703 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3704 within the regex; check for this so that we know how to process the data
3705 lines. */
3706
3707 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3708 goto SKIP_DATA;
3709 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3710
3711 /* Extract the size for possible writing before possibly flipping it,
3712 and remember the store that was got. */
3713
3714 true_size = REAL_PCRE_SIZE(re);
3715 regex_gotten_store = first_gotten_store;
3716
3717 /* Output code size information if requested */
3718
3719 if (log_store)
3720 {
3721 int name_count, name_entry_size, real_pcre_size;
3722
3723 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3724 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3725 #ifdef SUPPORT_PCRE8
3726 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3727 real_pcre_size = sizeof(real_pcre);
3728 #endif
3729 #ifdef SUPPORT_PCRE16
3730 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3731 real_pcre_size = sizeof(real_pcre16);
3732 #endif
3733 #ifdef SUPPORT_PCRE32
3734 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3735 real_pcre_size = sizeof(real_pcre32);
3736 #endif
3737 fprintf(outfile, "Memory allocation (code space): %d\n",
3738 (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3739 }
3740
3741 /* If -s or /S was present, study the regex to generate additional info to
3742 help with the matching, unless the pattern has the SS option, which
3743 suppresses the effect of /S (used for a few test patterns where studying is
3744 never sensible). */
3745
3746 if (do_study || (force_study >= 0 && !no_force_study))
3747 {
3748 if (timeit > 0)
3749 {
3750 register int i;
3751 clock_t time_taken;
3752 clock_t start_time = clock();
3753 for (i = 0; i < timeit; i++)
3754 {
3755 PCRE_STUDY(extra, re, study_options, &error);
3756 }
3757 time_taken = clock() - start_time;
3758 if (extra != NULL)
3759 {
3760 PCRE_FREE_STUDY(extra);
3761 }
3762 fprintf(outfile, " Study time %.4f milliseconds\n",
3763 (((double)time_taken * 1000.0) / (double)timeit) /
3764 (double)CLOCKS_PER_SEC);
3765 }
3766 PCRE_STUDY(extra, re, study_options, &error);
3767 if (error != NULL)
3768 fprintf(outfile, "Failed to study: %s\n", error);
3769 else if (extra != NULL)
3770 {
3771 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3772 if (log_store)
3773 {
3774 size_t jitsize;
3775 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3776 jitsize != 0)
3777 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3778 }
3779 }
3780 }
3781
3782 /* If /K was present, we set up for handling MARK data. */
3783
3784 if (do_mark)
3785 {
3786 if (extra == NULL)
3787 {
3788 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3789 extra->flags = 0;
3790 }
3791 extra->mark = &markptr;
3792 extra->flags |= PCRE_EXTRA_MARK;
3793 }
3794
3795 /* Extract and display information from the compiled data if required. */
3796
3797 SHOW_INFO:
3798
3799 if (do_debug)
3800 {
3801 fprintf(outfile, "------------------------------------------------------------------\n");
3802 PCRE_PRINTINT(re, outfile, debug_lengths);
3803 }
3804
3805 /* We already have the options in get_options (see above) */
3806
3807 if (do_showinfo)
3808 {
3809 unsigned long int all_options;
3810 pcre_uint32 first_char, need_char;
3811 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
3812 hascrorlf, maxlookbehind;
3813 int nameentrysize, namecount;
3814 const pcre_uint8 *nametable;
3815
3816 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3817 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3818 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3819 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
3820 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
3821 new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
3822 new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
3823 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3824 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3825 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3826 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3827 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3828 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3829 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3830 != 0)
3831 goto SKIP_DATA;
3832
3833 if (size != regex_gotten_store) fprintf(outfile,
3834 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3835 (int)size, (int)regex_gotten_store);
3836
3837 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3838 if (backrefmax > 0)
3839 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3840
3841 if (namecount > 0)
3842 {
3843 fprintf(outfile, "Named capturing subpatterns:\n");
3844 while (namecount-- > 0)
3845 {
3846 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
3847 int length = (int)STRLEN(nametable + imm2_size);
3848 fprintf(outfile, " ");
3849 PCHARSV(nametable, imm2_size, length, outfile);
3850 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3851 #ifdef SUPPORT_PCRE32
3852 if (pcre_mode == PCRE32_MODE)
3853 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
3854 #endif
3855 #ifdef SUPPORT_PCRE16
3856 if (pcre_mode == PCRE16_MODE)
3857 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
3858 #endif
3859 #ifdef SUPPORT_PCRE8
3860 if (pcre_mode == PCRE8_MODE)
3861 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
3862 #endif
3863 nametable += nameentrysize * CHAR_SIZE;
3864 }
3865 }
3866
3867 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3868 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3869
3870 all_options = REAL_PCRE_OPTIONS(re);
3871 if (do_flip) all_options = swap_uint32(all_options);
3872
3873 if (get_options == 0) fprintf(outfile, "No options\n");
3874 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3875 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3876 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3877 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3878 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3879 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3880 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3881 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3882 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3883 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3884 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3885 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3886 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3887 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3888 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3889 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3890 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3891 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3892
3893 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3894
3895 switch (get_options & PCRE_NEWLINE_BITS)
3896 {
3897 case PCRE_NEWLINE_CR:
3898 fprintf(outfile, "Forced newline sequence: CR\n");
3899 break;
3900
3901 case PCRE_NEWLINE_LF:
3902 fprintf(outfile, "Forced newline sequence: LF\n");
3903 break;
3904
3905 case PCRE_NEWLINE_CRLF:
3906 fprintf(outfile, "Forced newline sequence: CRLF\n");
3907 break;
3908
3909 case PCRE_NEWLINE_ANYCRLF:
3910 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3911 break;
3912
3913 case PCRE_NEWLINE_ANY:
3914 fprintf(outfile, "Forced newline sequence: ANY\n");
3915 break;
3916
3917 default:
3918 break;
3919 }
3920
3921 if (first_char_set == 2)
3922 {
3923 fprintf(outfile, "First char at start or follows newline\n");
3924 }
3925 else if (first_char_set == 1)
3926 {
3927 const char *caseless =
3928 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
3929 "" : " (caseless)";
3930
3931 if (PRINTOK(first_char))
3932 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3933 else
3934 {
3935 fprintf(outfile, "First char = ");
3936 pchar(first_char, outfile);
3937 fprintf(outfile, "%s\n", caseless);
3938 }
3939 }
3940 else
3941 {
3942 fprintf(outfile, "No first char\n");
3943 }
3944
3945 if (need_char_set == 0)
3946 {
3947 fprintf(outfile, "No need char\n");
3948 }
3949 else
3950 {
3951 const char *caseless =
3952 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
3953 "" : " (caseless)";
3954
3955 if (PRINTOK(need_char))
3956 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3957 else
3958 {
3959 fprintf(outfile, "Need char = ");
3960 pchar(need_char, outfile);
3961 fprintf(outfile, "%s\n", caseless);
3962 }
3963 }
3964
3965 if (maxlookbehind > 0)
3966 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3967
3968 /* Don't output study size; at present it is in any case a fixed
3969 value, but it varies, depending on the computer architecture, and
3970 so messes up the test suite. (And with the /F option, it might be
3971 flipped.) If study was forced by an external -s, don't show this
3972 information unless -i or -d was also present. This means that, except
3973 when auto-callouts are involved, the output from runs with and without
3974 -s should be identical. */
3975
3976 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3977 {
3978 if (extra == NULL)
3979 fprintf(outfile, "Study returned NULL\n");
3980 else
3981 {
3982 pcre_uint8 *start_bits = NULL;
3983 int minlength;
3984
3985 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3986 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3987
3988 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3989 {
3990 if (start_bits == NULL)
3991 fprintf(outfile, "No set of starting bytes\n");
3992 else
3993 {
3994 int i;
3995 int c = 24;
3996 fprintf(outfile, "Starting byte set: ");
3997 for (i = 0; i < 256; i++)
3998 {
3999 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4000 {
4001 if (c > 75)
4002 {
4003 fprintf(outfile, "\n ");
4004 c = 2;
4005 }
4006 if (PRINTOK(i) && i != ' ')
4007 {
4008 fprintf(outfile, "%c ", i);
4009 c += 2;
4010 }
4011 else
4012 {
4013 fprintf(outfile, "\\x%02x ", i);
4014 c += 5;
4015 }
4016 }
4017 }
4018 fprintf(outfile, "\n");
4019 }
4020 }
4021 }
4022
4023 /* Show this only if the JIT was set by /S, not by -s. */
4024
4025 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4026 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4027 {
4028 int jit;
4029 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4030 {
4031 if (jit)
4032 fprintf(outfile, "JIT study was successful\n");
4033 else
4034 #ifdef SUPPORT_JIT
4035 fprintf(outfile, "JIT study was not successful\n");
4036 #else
4037 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4038 #endif
4039 }
4040 }
4041 }
4042 }
4043
4044 /* If the '>' option was present, we write out the regex to a file, and
4045 that is all. The first 8 bytes of the file are the regex length and then
4046 the study length, in big-endian order. */
4047
4048 if (to_file != NULL)
4049 {
4050 FILE *f = fopen((char *)to_file, "wb");
4051 if (f == NULL)
4052 {
4053 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4054 }
4055 else
4056 {
4057 pcre_uint8 sbuf[8];
4058
4059 if (do_flip) regexflip(re, extra);
4060 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4061 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4062 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4063 sbuf[3] = (pcre_uint8)((true_size) & 255);
4064 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4065 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4066 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4067 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4068
4069 if (fwrite(sbuf, 1, 8, f) < 8 ||
4070 fwrite(re, 1, true_size, f) < true_size)
4071 {
4072 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4073 }
4074 else
4075 {
4076 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4077
4078 /* If there is study data, write it. */
4079
4080 if (extra != NULL)
4081 {
4082 if (fwrite(extra->study_data, 1, true_study_size, f) <
4083 true_study_size)
4084 {
4085 fprintf(outfile, "Write error on %s: %s\n", to_file,
4086 strerror(errno));
4087 }
4088 else fprintf(outfile, "Study data written to %s\n", to_file);
4089 }
4090 }
4091 fclose(f);
4092 }
4093
4094 new_free(re);
4095 if (extra != NULL)
4096 {
4097 PCRE_FREE_STUDY(extra);
4098 }
4099 if (locale_set)
4100 {
4101 new_free((void *)tables);
4102 setlocale(LC_CTYPE, "C");
4103 locale_set = 0;
4104 }
4105 continue; /* With next regex */
4106 }
4107 } /* End of non-POSIX compile */
4108
4109 /* Read data lines and test them */
4110
4111 for (;;)
4112 {
4113 #ifdef SUPPORT_PCRE8
4114 pcre_uint8 *q8;
4115 #endif
4116 #ifdef SUPPORT_PCRE16
4117 pcre_uint16 *q16;
4118 #endif
4119 #ifdef SUPPORT_PCRE32
4120 pcre_uint32 *q32;
4121 #endif
4122 pcre_uint8 *bptr;
4123 int *use_offsets = offsets;
4124 int use_size_offsets = size_offsets;
4125 int callout_data = 0;
4126 int callout_data_set = 0;
4127 int count;
4128 pcre_uint32 c;
4129 int copystrings = 0;
4130 int find_match_limit = default_find_match_limit;
4131 int getstrings = 0;
4132 int getlist = 0;
4133 int gmatched = 0;
4134 int start_offset = 0;
4135 int start_offset_sign = 1;
4136 int g_notempty = 0;
4137 int use_dfa = 0;
4138
4139 *copynames = 0;
4140 *getnames = 0;
4141
4142 #ifdef SUPPORT_PCRE32
4143 cn32ptr = copynames;
4144 gn32ptr = getnames;
4145 #endif
4146 #ifdef SUPPORT_PCRE16
4147 cn16ptr = copynames16;
4148 gn16ptr = getnames16;
4149 #endif
4150 #ifdef SUPPORT_PCRE8
4151 cn8ptr = copynames8;
4152 gn8ptr = getnames8;
4153 #endif
4154
4155 SET_PCRE_CALLOUT(callout);
4156 first_callout = 1;
4157 last_callout_mark = NULL;
4158 callout_extra = 0;
4159 callout_count = 0;
4160 callout_fail_count = 999999;
4161 callout_fail_id = -1;
4162 show_malloc = 0;
4163 options = 0;
4164
4165 if (extra != NULL) extra->flags &=
4166 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4167
4168 len = 0;
4169 for (;;)
4170 {
4171 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4172 {
4173 if (len > 0) /* Reached EOF without hitting a newline */
4174 {
4175 fprintf(outfile, "\n");
4176 break;
4177 }
4178 done = 1;
4179 goto CONTINUE;
4180 }
4181 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4182 len = (int)strlen((char *)buffer);
4183 if (buffer[len-1] == '\n') break;
4184 }
4185
4186 while (len > 0 && isspace(buffer[len-1])) len--;
4187 buffer[len] = 0;
4188 if (len == 0) break;
4189
4190 p = buffer;
4191 while (isspace(*p)) p++;
4192
4193 #ifndef NOUTF
4194 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4195 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4196 if (use_utf)
4197 {
4198 char *q;
4199 pcre_uint32 c;
4200 int n = 1;
4201
4202 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &c);
4203 if (n <= 0)
4204 {
4205 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4206 goto NEXT_DATA;
4207 }
4208 }
4209 #endif
4210
4211 /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4212 the number of pcre_uchar units that will be needed. */
4213 if (dbuffer == NULL || len >= dbuffer_size)
4214 {
4215 dbuffer_size *= 2;
4216 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4217 if (dbuffer == NULL)
4218 {
4219 fprintf(stderr, "pcretest: malloc(%d) failed\n", dbuffer_size);
4220 exit(1);
4221 }
4222 }
4223
4224 #ifdef SUPPORT_PCRE8
4225 q8 = (pcre_uint8 *) dbuffer;
4226 #endif
4227 #ifdef SUPPORT_PCRE16
4228 q16 = (pcre_uint16 *) dbuffer;
4229 #endif
4230 #ifdef SUPPORT_PCRE32
4231 q32 = (pcre_uint32 *) dbuffer;
4232 #endif
4233
4234 while ((c = *p++) != 0)
4235 {
4236 int i = 0;
4237 int n = 0;
4238
4239 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4240 In non-UTF mode, allow the value of the byte to fall through to later,
4241 where values greater than 127 are turned into UTF-8 when running in
4242 16-bit or 32-bit mode. */
4243
4244 if (c != '\\')
4245 {
4246 #ifndef NOUTF
4247 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4248 #endif
4249 }
4250
4251 /* Handle backslash escapes */
4252
4253 else switch ((c = *p++))
4254 {
4255 case 'a': c = 7; break;
4256 case 'b': c = '\b'; break;
4257 case 'e': c = 27; break;
4258 case 'f': c = '\f'; break;
4259 case 'n': c = '\n'; break;
4260 case 'r': c = '\r'; break;
4261 case 't': c = '\t'; break;
4262 case 'v': c = '\v'; break;
4263
4264 case '0': case '1': case '2': case '3':
4265 case '4': case '5': case '6': case '7':
4266 c -= '0';
4267 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4268 c = c * 8 + *p++ - '0';
4269 break;
4270
4271 case 'x':
4272 if (*p == '{')
4273 {
4274 pcre_uint8 *pt = p;
4275 c = 0;
4276
4277 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4278 when isxdigit() is a macro that refers to its argument more than
4279 once. This is banned by the C Standard, but apparently happens in at
4280 least one MacOS environment. */
4281
4282 for (pt++; isxdigit(*pt); pt++)
4283 {
4284 if (++i == 9)
4285 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4286 "using only the first eight.\n");
4287 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4288 }
4289 if (*pt == '}')
4290 {
4291 p = pt + 1;
4292 break;
4293 }
4294 /* Not correct form for \x{...}; fall through */
4295 }
4296
4297 /* \x without {} always defines just one byte in 8-bit mode. This
4298 allows UTF-8 characters to be constructed byte by byte, and also allows
4299 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4300 Otherwise, pass it down to later code so that it can be turned into
4301 UTF-8 when running in 16/32-bit mode. */
4302
4303 c = 0;
4304 while (i++ < 2 && isxdigit(*p))
4305 {
4306 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4307 p++;
4308 }
4309 #if !defined NOUTF && defined SUPPORT_PCRE8
4310 if (use_utf && (pcre_mode == PCRE8_MODE))
4311 {
4312 *q8++ = c;
4313 continue;
4314 }
4315 #endif
4316 break;
4317
4318 case 0: /* \ followed by EOF allows for an empty line */
4319 p--;
4320 continue;
4321
4322 case '>':
4323 if (*p == '-')
4324 {
4325 start_offset_sign = -1;
4326 p++;
4327 }
4328 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4329 start_offset *= start_offset_sign;
4330 continue;
4331
4332 case 'A': /* Option setting */
4333 options |= PCRE_ANCHORED;
4334 continue;
4335
4336 case 'B':
4337 options |= PCRE_NOTBOL;
4338 continue;
4339
4340 case 'C':
4341 if (isdigit(*p)) /* Set copy string */
4342 {
4343 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4344 copystrings |= 1 << n;
4345 }
4346 else if (isalnum(*p))
4347 {
4348 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4349 }
4350 else if (*p == '+')
4351 {
4352 callout_extra = 1;
4353 p++;
4354 }
4355 else if (*p == '-')
4356 {
4357 SET_PCRE_CALLOUT(NULL);
4358 p++;
4359 }
4360 else if (*p == '!')
4361 {
4362 callout_fail_id = 0;
4363 p++;
4364 while(isdigit(*p))
4365 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4366 callout_fail_count = 0;
4367 if (*p == '!')
4368 {
4369 p++;
4370 while(isdigit(*p))
4371 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4372 }
4373 }
4374 else if (*p == '*')
4375 {
4376 int sign = 1;
4377 callout_data = 0;
4378 if (*(++p) == '-') { sign = -1; p++; }
4379 while(isdigit(*p))
4380 callout_data = callout_data * 10 + *p++ - '0';
4381 callout_data *= sign;
4382 callout_data_set = 1;
4383 }
4384 continue;
4385
4386 #if !defined NODFA
4387 case 'D':
4388 #if !defined NOPOSIX
4389 if (posix || do_posix)
4390 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4391 else
4392 #endif
4393 use_dfa = 1;
4394 continue;
4395 #endif
4396
4397 #if !defined NODFA
4398 case 'F':
4399 options |= PCRE_DFA_SHORTEST;
4400 continue;
4401 #endif
4402
4403 case 'G':
4404 if (isdigit(*p))
4405 {
4406 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4407 getstrings |= 1 << n;
4408 }
4409 else if (isalnum(*p))
4410 {
4411 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4412 }
4413 continue;
4414
4415 case 'J':
4416 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4417 if (extra != NULL
4418 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4419 && extra->executable_jit != NULL)
4420 {
4421 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4422 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4423 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4424 }
4425 continue;
4426
4427 case 'L':
4428 getlist = 1;
4429 continue;
4430
4431 case 'M':
4432 find_match_limit = 1;
4433 continue;
4434
4435 case 'N':
4436 if ((options & PCRE_NOTEMPTY) != 0)
4437 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4438 else
4439 options |= PCRE_NOTEMPTY;
4440 continue;
4441
4442 case 'O':
4443 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4444 if (n > size_offsets_max)
4445 {
4446 size_offsets_max = n;
4447 free(offsets);
4448 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4449 if (offsets == NULL)
4450 {
4451 printf("** Failed to get %d bytes of memory for offsets vector\n",
4452 (int)(size_offsets_max * sizeof(int)));
4453 yield = 1;
4454 goto EXIT;
4455 }
4456 }
4457 use_size_offsets = n;
4458 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4459 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4460 continue;
4461
4462 case 'P':
4463 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4464 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4465 continue;
4466
4467 case 'Q':
4468 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4469 if (extra == NULL)
4470 {
4471 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4472 extra->flags = 0;
4473 }
4474 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4475 extra->match_limit_recursion = n;
4476 continue;
4477
4478 case 'q':
4479 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4480 if (extra == NULL)
4481 {
4482 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4483 extra->flags = 0;
4484 }
4485 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4486 extra->match_limit = n;
4487 continue;
4488
4489 #if !defined NODFA
4490 case 'R':
4491 options |= PCRE_DFA_RESTART;
4492 continue;
4493 #endif
4494
4495 case 'S':
4496 show_malloc = 1;
4497 continue;
4498
4499 case 'Y':
4500 options |= PCRE_NO_START_OPTIMIZE;
4501 continue;
4502
4503 case 'Z':
4504 options |= PCRE_NOTEOL;
4505 continue;
4506
4507 case '?':
4508 options |= PCRE_NO_UTF8_CHECK;
4509 continue;
4510
4511 case '<':
4512 {
4513 int x = check_newline(p, outfile);
4514 if (x == 0) goto NEXT_DATA;
4515 options |= x;
4516 while (*p++ != '>');
4517 }
4518 continue;
4519 }
4520
4521 /* We now have a character value in c that may be greater than 255. In
4522 16-bit or 32-bit mode, we always convert characters to UTF-8 so that
4523 values greater than 255 can be passed to non-UTF 16- or 32-bit strings.
4524 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4525 than 127 in UTF mode must have come from \x{...} or octal constructs
4526 because values from \x.. get this far only in non-UTF mode. */
4527
4528 #ifdef SUPPORT_PCRE8
4529 if (pcre_mode == PCRE8_MODE)
4530 {
4531 #ifndef NOUTF
4532 if (use_utf)
4533 {
4534 q8 += ord2utf8(c, q8);
4535 }
4536 else
4537 #endif
4538 {
4539 if (c > 0xffu)
4540 {
4541 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4542 "and UTF-8 mode is not enabled.\n", c);
4543 fprintf(outfile, "** Truncation will probably give the wrong "
4544 "result.\n");
4545 }
4546
4547 *q8++ = c;
4548 }
4549 }
4550 #endif
4551 #ifdef SUPPORT_PCRE16
4552 if (pcre_mode == PCRE16_MODE)
4553 {
4554 #ifndef NOUTF
4555 if (use_utf)
4556 {
4557 if (c > 0x10ffffu)
4558 {
4559 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4560 "cannot be converted to UTF-16\n");
4561 goto NEXT_DATA;
4562 }
4563 else if (c >= 0x10000u)
4564 {
4565 c-= 0x10000u;
4566 *q16++ = 0xD800 | (c >> 10);
4567 *q16++ = 0xDC00 | (c & 0x3ff);
4568 }
4569 else
4570 *q16++ = c;
4571 }
4572 else
4573 #endif
4574 {
4575 if (c > 0xffffu)
4576 {
4577 fprintf(outfile, "** Character value is greater than 0xffff "
4578 "and UTF-16 mode is not enabled.\n", c);
4579 fprintf(outfile, "** Truncation will probably give the wrong "
4580 "result.\n");
4581 }
4582
4583 *q16++ = c;
4584 }
4585 }
4586 #endif
4587 #ifdef SUPPORT_PCRE32
4588 if (pcre_mode == PCRE32_MODE)
4589 {
4590 *q32++ = c;
4591 }
4592 #endif
4593
4594 }
4595
4596 /* Reached end of subject string */
4597
4598 #ifdef SUPPORT_PCRE8
4599 if (pcre_mode == PCRE8_MODE)
4600 {
4601 *q8 = 0;
4602 len = (int)(q8 - (pcre_uint8 *)dbuffer);
4603 }
4604 #endif
4605 #ifdef SUPPORT_PCRE16
4606 if (pcre_mode == PCRE16_MODE)
4607 {
4608 *q16 = 0;
4609 len = (int)(q16 - (pcre_uint16 *)dbuffer);
4610 }
4611 #endif
4612 #ifdef SUPPORT_PCRE32
4613 if (pcre_mode == PCRE32_MODE)
4614 {
4615 *q32 = 0;
4616 len = (int)(q32 - (pcre_uint32 *)dbuffer);
4617 }
4618 #endif
4619
4620 #if defined SUPPORT_UTF && defined SUPPORT_PCRE32
4621 /* If we're requsted to test UTF-32 masking of high bits, change the data
4622 string to have high bits set, unless the string is invalid UTF-32.
4623 Since the JIT doesn't support this yet, only do it when not JITing. */
4624 if (use_utf && mask_utf32 && (study_options & PCRE_STUDY_ALLJIT) == 0 &&
4625 valid_utf32((pcre_uint32 *)dbuffer, len))
4626 {
4627 for (q32 = (pcre_uint32 *)dbuffer; *q32; q32++)
4628 *q32 |= ~(pcre_uint32)UTF32_MASK;
4629
4630 options |= PCRE_NO_UTF32_CHECK;
4631 }
4632 #endif
4633
4634 /* Move the data to the end of the buffer so that a read over the end of
4635 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
4636 we are using the POSIX interface, we must include the terminating zero. */
4637
4638 bptr = dbuffer;
4639
4640 #if !defined NOPOSIX
4641 if (posix || do_posix)
4642 {
4643 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4644 bptr += dbuffer_size - len - 1;
4645 }
4646 else
4647 #endif
4648 {
4649 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
4650 }
4651
4652 if ((all_use_dfa || use_dfa) && find_match_limit)
4653 {
4654 printf("**Match limit not relevant for DFA matching: ignored\n");
4655 find_match_limit = 0;
4656 }
4657
4658 /* Handle matching via the POSIX interface, which does not
4659 support timing or playing with the match limit or callout data. */
4660
4661 #if !defined NOPOSIX
4662 if (posix || do_posix)
4663 {
4664 int rc;
4665 int eflags = 0;
4666 regmatch_t *pmatch = NULL;
4667 if (use_size_offsets > 0)
4668 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4669 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4670 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4671 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
4672
4673 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
4674
4675 if (rc != 0)
4676 {
4677 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4678 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4679 }
4680 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
4681 {
4682 fprintf(outfile, "Matched with REG_NOSUB\n");
4683 }
4684 else
4685 {
4686 size_t i;
4687 for (i = 0; i < (size_t)use_size_offsets; i++)
4688 {
4689 if (pmatch[i].rm_so >= 0)
4690 {
4691 fprintf(outfile, "%2d: ", (int)i);
4692 PCHARSV(dbuffer, pmatch[i].rm_so,
4693 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
4694 fprintf(outfile, "\n");
4695 if (do_showcaprest || (i == 0 && do_showrest))
4696 {
4697 fprintf(outfile, "%2d+ ", (int)i);
4698 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
4699 outfile);
4700 fprintf(outfile, "\n");
4701 }
4702 }
4703 }
4704 }
4705 free(pmatch);
4706 goto NEXT_DATA;
4707 }
4708
4709 #endif /* !defined NOPOSIX */
4710
4711 /* Handle matching via the native interface - repeats for /g and /G */
4712
4713 /* Ensure that there is a JIT callback if we want to verify that JIT was
4714 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4715
4716 if (verify_jit && jit_stack == NULL && extra != NULL)
4717 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4718
4719 for (;; gmatched++) /* Loop for /g or /G */
4720 {
4721 markptr = NULL;
4722 jit_was_used = FALSE;
4723
4724 if (timeitm > 0)
4725 {
4726 register int i;
4727 clock_t time_taken;
4728 clock_t start_time = clock();
4729
4730 #if !defined NODFA
4731 if (all_use_dfa || use_dfa)
4732 {
4733 if ((options & PCRE_DFA_RESTART) != 0)
4734 {
4735 fprintf(outfile, "Timing DFA restarts is not supported\n");
4736 break;
4737 }
4738 if (dfa_workspace == NULL)
4739 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4740 for (i = 0; i < timeitm; i++)
4741 {
4742 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4743 (options | g_notempty), use_offsets, use_size_offsets,
4744 dfa_workspace, DFA_WS_DIMENSION);
4745 }
4746 }
4747 else
4748 #endif
4749
4750 for (i = 0; i < timeitm; i++)
4751 {
4752 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4753 (options | g_notempty), use_offsets, use_size_offsets);
4754 }
4755 time_taken = clock() - start_time;
4756 fprintf(outfile, "Execute time %.4f milliseconds\n",
4757 (((double)time_taken * 1000.0) / (double)timeitm) /
4758 (double)CLOCKS_PER_SEC);
4759 }
4760
4761 /* If find_match_limit is set, we want to do repeated matches with
4762 varying limits in order to find the minimum value for the match limit and
4763 for the recursion limit. The match limits are relevant only to the normal
4764 running of pcre_exec(), so disable the JIT optimization. This makes it
4765 possible to run the same set of tests with and without JIT externally
4766 requested. */
4767
4768 if (find_match_limit)
4769 {
4770 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4771 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4772 extra->flags = 0;
4773
4774 (void)check_match_limit(re, extra, bptr, len, start_offset,
4775 options|g_notempty, use_offsets, use_size_offsets,
4776 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4777 PCRE_ERROR_MATCHLIMIT, "match()");
4778
4779 count = check_match_limit(re, extra, bptr, len, start_offset,
4780 options|g_notempty, use_offsets, use_size_offsets,
4781 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4782 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4783 }
4784
4785 /* If callout_data is set, use the interface with additional data */
4786
4787 else if (callout_data_set)
4788 {
4789 if (extra == NULL)
4790 {
4791 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4792 extra->flags = 0;
4793 }
4794 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4795 extra->callout_data = &callout_data;
4796 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4797 options | g_notempty, use_offsets, use_size_offsets);
4798 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4799 }
4800
4801 /* The normal case is just to do the match once, with the default
4802 value of match_limit. */
4803
4804 #if !defined NODFA
4805 else if (all_use_dfa || use_dfa)
4806 {
4807 if (dfa_workspace == NULL)
4808 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4809 if (dfa_matched++ == 0)
4810 dfa_workspace[0] = -1; /* To catch bad restart */
4811 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4812 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4813 DFA_WS_DIMENSION);
4814 if (count == 0)
4815 {
4816 fprintf(outfile, "Matched, but too many subsidiary matches\n");
4817 count = use_size_offsets/2;
4818 }
4819 }
4820 #endif
4821
4822 else
4823 {
4824 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4825 options | g_notempty, use_offsets, use_size_offsets);
4826 if (count == 0)
4827 {
4828 fprintf(outfile, "Matched, but too many substrings\n");
4829 count = use_size_offsets/3;
4830 }
4831 }
4832
4833 /* Matched */
4834
4835 if (count >= 0)
4836 {
4837 int i, maxcount;
4838 void *cnptr, *gnptr;
4839
4840 #if !defined NODFA
4841 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4842 #endif
4843 maxcount = use_size_offsets/3;
4844
4845 /* This is a check against a lunatic return value. */
4846
4847 if (count > maxcount)
4848 {
4849 fprintf(outfile,
4850 "** PCRE error: returned count %d is too big for offset size %d\n",
4851 count, use_size_offsets);
4852 count = use_size_offsets/3;
4853 if (do_g || do_G)
4854 {
4855 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4856 do_g = do_G = FALSE; /* Break g/G loop */
4857 }
4858 }
4859
4860 /* do_allcaps requests showing of all captures in the pattern, to check
4861 unset ones at the end. */
4862
4863 if (do_allcaps)
4864 {
4865 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4866 goto SKIP_DATA;
4867 count++; /* Allow for full match */
4868 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4869 }
4870
4871 /* Output the captured substrings */
4872
4873 for (i = 0; i < count * 2; i += 2)
4874 {
4875 if (use_offsets[i] < 0)
4876 {
4877 if (use_offsets[i] != -1)
4878 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4879 use_offsets[i], i);
4880 if (use_offsets[i+1] != -1)
4881 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4882 use_offsets[i+1], i+1);
4883 fprintf(outfile, "%2d: <unset>\n", i/2);
4884 }
4885 else
4886 {
4887 fprintf(outfile, "%2d: ", i/2);
4888 PCHARSV(bptr, use_offsets[i],
4889 use_offsets[i+1] - use_offsets[i], outfile);
4890 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4891 fprintf(outfile, "\n");
4892 if (do_showcaprest || (i == 0 && do_showrest))
4893 {
4894 fprintf(outfile, "%2d+ ", i/2);
4895 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4896 outfile);
4897 fprintf(outfile, "\n");
4898 }
4899 }
4900 }
4901
4902 if (markptr != NULL)
4903 {
4904 fprintf(outfile, "MK: ");
4905 PCHARSV(markptr, 0, -1, outfile);
4906 fprintf(outfile, "\n");
4907 }
4908
4909 for (i = 0; i < 32; i++)
4910 {
4911 if ((copystrings & (1 << i)) != 0)
4912 {
4913 int rc;
4914 char copybuffer[256];
4915 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4916 copybuffer, sizeof(copybuffer));
4917 if (rc < 0)
4918 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4919 else
4920 {
4921 fprintf(outfile, "%2dC ", i);
4922 PCHARSV(copybuffer, 0, rc, outfile);
4923 fprintf(outfile, " (%d)\n", rc);
4924 }
4925 }
4926 }
4927
4928 cnptr = copynames;
4929 for (;;)
4930 {
4931 int rc;
4932 char copybuffer[256];
4933
4934 #ifdef SUPPORT_PCRE32
4935 if (pcre_mode == PCRE32_MODE)
4936 {
4937 if (*(pcre_uint32 *)cnptr == 0) break;
4938 }
4939 #endif
4940 #ifdef SUPPORT_PCRE16
4941 if (pcre_mode == PCRE16_MODE)
4942 {
4943 if (*(pcre_uint16 *)cnptr == 0) break;
4944 }
4945 #endif
4946 #ifdef SUPPORT_PCRE8
4947 if (pcre_mode == PCRE8_MODE)
4948 {
4949 if (*(pcre_uint8 *)cnptr == 0) break;
4950 }
4951 #endif
4952
4953 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4954 cnptr, copybuffer, sizeof(copybuffer));
4955
4956 if (rc < 0)
4957 {
4958 fprintf(outfile, "copy substring ");
4959 PCHARSV(cnptr, 0, -1, outfile);
4960 fprintf(outfile, " failed %d\n", rc);
4961 }
4962 else
4963 {
4964 fprintf(outfile, " C ");
4965 PCHARSV(copybuffer, 0, rc, outfile);
4966 fprintf(outfile, " (%d) ", rc);
4967 PCHARSV(cnptr, 0, -1, outfile);
4968 putc('\n', outfile);
4969 }
4970
4971 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4972 }
4973
4974 for (i = 0; i < 32; i++)
4975 {
4976 if ((getstrings & (1 << i)) != 0)
4977 {
4978 int rc;
4979 const char *substring;
4980 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4981 if (rc < 0)
4982 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4983 else
4984 {
4985 fprintf(outfile, "%2dG ", i);
4986 PCHARSV(substring, 0, rc, outfile);
4987 fprintf(outfile, " (%d)\n", rc);
4988 PCRE_FREE_SUBSTRING(substring);
4989 }
4990 }
4991 }
4992
4993 gnptr = getnames;
4994 for (;;)
4995 {
4996 int rc;
4997 const char *substring;
4998
4999 #ifdef SUPPORT_PCRE32
5000 if (pcre_mode == PCRE32_MODE)
5001 {
5002 if (*(pcre_uint32 *)gnptr == 0) break;
5003 }
5004 #endif
5005 #ifdef SUPPORT_PCRE16
5006 if (pcre_mode == PCRE16_MODE)
5007 {
5008 if (*(pcre_uint16 *)gnptr == 0) break;
5009 }
5010 #endif
5011 #ifdef SUPPORT_PCRE8
5012 if (pcre_mode == PCRE8_MODE)
5013 {
5014 if (*(pcre_uint8 *)gnptr == 0) break;
5015 }
5016 #endif
5017
5018 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5019 gnptr, &substring);
5020 if (rc < 0)
5021 {
5022 fprintf(outfile, "get substring ");
5023 PCHARSV(gnptr, 0, -1, outfile);
5024 fprintf(outfile, " failed %d\n", rc);
5025 }
5026 else
5027 {
5028 fprintf(outfile, " G ");
5029 PCHARSV(substring, 0, rc, outfile);
5030 fprintf(outfile, " (%d) ", rc);
5031 PCHARSV(gnptr, 0, -1, outfile);
5032 PCRE_FREE_SUBSTRING(substring);
5033 putc('\n', outfile);
5034 }
5035
5036 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5037 }
5038
5039 if (getlist)
5040 {
5041 int rc;
5042 const char **stringlist;
5043 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5044 if (rc < 0)
5045 fprintf(outfile, "get substring list failed %d\n", rc);
5046 else
5047 {
5048 for (i = 0; i < count; i++)
5049 {
5050 fprintf(outfile, "%2dL ", i);
5051 PCHARSV(stringlist[i], 0, -1, outfile);
5052 putc('\n', outfile);
5053 }
5054 if (stringlist[i] != NULL)
5055 fprintf(outfile, "string list not terminated by NULL\n");
5056 PCRE_FREE_SUBSTRING_LIST(stringlist);
5057 }
5058 }
5059 }
5060
5061 /* There was a partial match */
5062
5063 else if (count == PCRE_ERROR_PARTIAL)
5064 {
5065 if (markptr == NULL) fprintf(outfile, "Partial match");
5066 else
5067 {
5068 fprintf(outfile, "Partial match, mark=");
5069 PCHARSV(markptr, 0, -1, outfile);
5070 }
5071 if (use_size_offsets > 1)
5072 {
5073 fprintf(outfile, ": ");
5074 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5075 outfile);
5076 }
5077 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5078 fprintf(outfile, "\n");
5079 break; /* Out of the /g loop */
5080 }
5081
5082 /* Failed to match. If this is a /g or /G loop and we previously set
5083 g_notempty after a null match, this is not necessarily the end. We want
5084 to advance the start offset, and continue. We won't be at the end of the
5085 string - that was checked before setting g_notempty.
5086
5087 Complication arises in the case when the newline convention is "any",
5088 "crlf", or "anycrlf". If the previous match was at the end of a line
5089 terminated by CRLF, an advance of one character just passes the \r,
5090 whereas we should prefer the longer newline sequence, as does the code in
5091 pcre_exec(). Fudge the offset value to achieve this. We check for a
5092 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5093 find the default.
5094
5095 Otherwise, in the case of UTF-8 matching, the advance must be one
5096 character, not one byte. */
5097
5098 else
5099 {
5100 if (g_notempty != 0)
5101 {
5102 int onechar = 1;
5103 unsigned int obits = REAL_PCRE_OPTIONS(re);
5104 use_offsets[0] = start_offset;
5105 if ((obits & PCRE_NEWLINE_BITS) == 0)
5106 {
5107 int d;
5108 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5109 /* Note that these values are always the ASCII ones, even in
5110 EBCDIC environments. CR = 13, NL = 10. */
5111 obits = (d == 13)? PCRE_NEWLINE_CR :
5112 (d == 10)? PCRE_NEWLINE_LF :
5113 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5114 (d == -2)? PCRE_NEWLINE_ANYCRLF :
5115 (d == -1)? PCRE_NEWLINE_ANY : 0;
5116 }
5117 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5118 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5119 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5120 &&
5121 start_offset < len - 1 && (
5122 #ifdef SUPPORT_PCRE8
5123 (pcre_mode == PCRE8_MODE &&
5124 bptr[start_offset] == '\r' &&
5125 bptr[start_offset + 1] == '\n') ||
5126 #endif
5127 #ifdef SUPPORT_PCRE16
5128 (pcre_mode == PCRE16_MODE &&
5129 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5130 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5131 #endif
5132 #ifdef SUPPORT_PCRE32
5133 (pcre_mode == PCRE32_MODE &&
5134 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5135 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5136 #endif
5137 0))
5138 onechar++;
5139 else if (use_utf)
5140 {
5141 while (start_offset + onechar < len)
5142 {
5143 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5144 onechar++;
5145 }
5146 }
5147 use_offsets[1] = start_offset + onechar;
5148 }
5149 else
5150 {
5151 switch(count)
5152 {
5153 case PCRE_ERROR_NOMATCH:
5154 if (gmatched == 0)
5155 {
5156 if (markptr == NULL)
5157 {
5158 fprintf(outfile, "No match");
5159 }
5160 else
5161 {
5162 fprintf(outfile, "No match, mark = ");
5163 PCHARSV(markptr, 0, -1, outfile);
5164 }
5165 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5166 putc('\n', outfile);
5167 }
5168 break;
5169
5170 case PCRE_ERROR_BADUTF8:
5171 case PCRE_ERROR_SHORTUTF8:
5172 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5173 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5174 8 * CHAR_SIZE);
5175 if (use_size_offsets >= 2)
5176 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5177 use_offsets[1]);
5178 fprintf(outfile, "\n");
5179 break;
5180
5181 case PCRE_ERROR_BADUTF8_OFFSET:
5182 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5183 8 * CHAR_SIZE);
5184 break;
5185
5186 default:
5187 if (count < 0 &&
5188 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5189 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5190 else
5191 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5192 break;
5193 }
5194
5195 break; /* Out of the /g loop */
5196 }
5197 }
5198
5199 /* If not /g or /G we are done */
5200
5201 if (!do_g && !do_G) break;
5202
5203 /* If we have matched an empty string, first check to see if we are at
5204 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5205 Perl's /g options does. This turns out to be rather cunning. First we set
5206 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5207 same point. If this fails (picked up above) we advance to the next
5208 character. */
5209
5210 g_notempty = 0;
5211
5212 if (use_offsets[0] == use_offsets[1])
5213 {
5214 if (use_offsets[0] == len) break;
5215 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5216 }
5217
5218 /* For /g, update the start offset, leaving the rest alone */
5219
5220 if (do_g) start_offset = use_offsets[1];
5221
5222 /* For /G, update the pointer and length */
5223
5224 else
5225 {
5226 bptr += use_offsets[1] * CHAR_SIZE;
5227 len -= use_offsets[1];
5228 }
5229 } /* End of loop for /g and /G */
5230
5231 NEXT_DATA: continue;
5232 } /* End of loop for data lines */
5233
5234 CONTINUE:
5235
5236 #if !defined NOPOSIX
5237 if (posix || do_posix) regfree(&preg);
5238 #endif
5239
5240 if (re != NULL) new_free(re);
5241 if (extra != NULL)
5242 {
5243 PCRE_FREE_STUDY(extra);
5244 }
5245 if (locale_set)
5246 {
5247 new_free((void *)tables);
5248 setlocale(LC_CTYPE, "C");
5249 locale_set = 0;
5250 }
5251 if (jit_stack != NULL)
5252 {
5253 PCRE_JIT_STACK_FREE(jit_stack);
5254 jit_stack = NULL;
5255 }
5256 }
5257
5258 if (infile == stdin) fprintf(outfile, "\n");
5259
5260 EXIT:
5261
5262 if (infile != NULL && infile != stdin) fclose(infile);
5263 if (outfile != NULL && outfile != stdout) fclose(outfile);
5264
5265 free(buffer);
5266 free(dbuffer);
5267 free(pbuffer);
5268 free(offsets);
5269
5270 #ifdef SUPPORT_PCRE16
5271 if (buffer16 != NULL) free(buffer16);
5272 #endif
5273 #ifdef SUPPORT_PCRE32
5274 if (buffer32 != NULL) free(buffer32);
5275 #endif
5276
5277 #if !defined NODFA
5278 if (dfa_workspace != NULL)
5279 free(dfa_workspace);
5280 #endif
5281
5282 return yield;
5283 }
5284
5285 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5