/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1097 - (show annotations)
Tue Oct 16 15:56:13 2012 UTC (7 years ago) by chpe
File MIME type: text/plain
File size: 155146 byte(s)
pcre32: pcretest: Add explicit -8 option

Even though 8-bit mode is the default, it's still useful to have an -8
option for symmetry.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136
137 #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138 /* Configure internal macros to 32 bit mode. */
139 #define COMPILE_PCRE32
140 #endif
141 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142 /* Configure internal macros to 16 bit mode. */
143 #define COMPILE_PCRE16
144 #endif
145 #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146 /* Configure internal macros to 16 bit mode. */
147 #define COMPILE_PCRE8
148 #endif
149
150 #include "pcre_internal.h"
151
152 /* The pcre_printint() function, which prints the internal form of a compiled
153 regex, is held in a separate file so that (a) it can be compiled in either
154 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
155 when that is compiled in debug mode. */
156
157 #ifdef SUPPORT_PCRE8
158 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159 #endif
160 #ifdef SUPPORT_PCRE16
161 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162 #endif
163 #ifdef SUPPORT_PCRE32
164 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165 #endif
166
167 /* We need access to some of the data tables that PCRE uses. So as not to have
168 to keep two copies, we include the source files here, changing the names of the
169 external symbols to prevent clashes. */
170
171 #define PCRE_INCLUDED
172
173 #include "pcre_tables.c"
174 #include "pcre_ucd.c"
175
176 /* The definition of the macro PRINTABLE, which determines whether to print an
177 output character as-is or as a hex value when showing compiled patterns, is
178 the same as in the printint.src file. We uses it here in cases when the locale
179 has not been explicitly changed, so as to get consistent output from systems
180 that differ in their output from isprint() even in the "C" locale. */
181
182 #ifdef EBCDIC
183 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
184 #else
185 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
186 #endif
187
188 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189
190 /* Posix support is disabled in 16 or 32 bit only mode. */
191 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192 #define NOPOSIX
193 #endif
194
195 /* It is possible to compile this test program without including support for
196 testing the POSIX interface, though this is not available via the standard
197 Makefile. */
198
199 #if !defined NOPOSIX
200 #include "pcreposix.h"
201 #endif
202
203 /* It is also possible, originally for the benefit of a version that was
204 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
205 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
206 automatically cut out the UTF support if PCRE is built without it. */
207
208 #ifndef SUPPORT_UTF
209 #ifndef NOUTF
210 #define NOUTF
211 #endif
212 #endif
213
214 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
215 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216 only from one place and is handled differently). I couldn't dream up any way of
217 using a single macro to do this in a generic way, because of the many different
218 argument requirements. We know that at least one of SUPPORT_PCRE8 and
219 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
220 use these in the definitions of generic macros.
221
222 **** Special note about the PCHARSxxx macros: the address of the string to be
223 printed is always given as two arguments: a base address followed by an offset.
224 The base address is cast to the correct data size for 8 or 16 bit data; the
225 offset is in units of this size. If the string were given as base+offset in one
226 argument, the casting might be incorrectly applied. */
227
228 #ifdef SUPPORT_PCRE8
229
230 #define PCHARS8(lv, p, offset, len, f) \
231 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
232
233 #define PCHARSV8(p, offset, len, f) \
234 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235
236 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237 p = read_capture_name8(p, cn8, re)
238
239 #define STRLEN8(p) ((int)strlen((char *)p))
240
241 #define SET_PCRE_CALLOUT8(callout) \
242 pcre_callout = callout
243
244 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
245 pcre_assign_jit_stack(extra, callback, userdata)
246
247 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
248 re = pcre_compile((char *)pat, options, error, erroffset, tables)
249
250 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251 namesptr, cbuffer, size) \
252 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
253 (char *)namesptr, cbuffer, size)
254
255 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
256 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
257
258 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259 offsets, size_offsets, workspace, size_workspace) \
260 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
261 offsets, size_offsets, workspace, size_workspace)
262
263 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
264 offsets, size_offsets) \
265 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
266 offsets, size_offsets)
267
268 #define PCRE_FREE_STUDY8(extra) \
269 pcre_free_study(extra)
270
271 #define PCRE_FREE_SUBSTRING8(substring) \
272 pcre_free_substring(substring)
273
274 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
275 pcre_free_substring_list(listptr)
276
277 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
278 getnamesptr, subsptr) \
279 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
280 (char *)getnamesptr, subsptr)
281
282 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
283 n = pcre_get_stringnumber(re, (char *)ptr)
284
285 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
286 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
287
288 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
289 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
290
291 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
292 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
293
294 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
295 pcre_printint(re, outfile, debug_lengths)
296
297 #define PCRE_STUDY8(extra, re, options, error) \
298 extra = pcre_study(re, options, error)
299
300 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
301 pcre_jit_stack_alloc(startsize, maxsize)
302
303 #define PCRE_JIT_STACK_FREE8(stack) \
304 pcre_jit_stack_free(stack)
305
306 #endif /* SUPPORT_PCRE8 */
307
308 /* -----------------------------------------------------------*/
309
310 #ifdef SUPPORT_PCRE16
311
312 #define PCHARS16(lv, p, offset, len, f) \
313 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
314
315 #define PCHARSV16(p, offset, len, f) \
316 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
317
318 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319 p = read_capture_name16(p, cn16, re)
320
321 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
322
323 #define SET_PCRE_CALLOUT16(callout) \
324 pcre16_callout = (int (*)(pcre16_callout_block *))callout
325
326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327 pcre16_assign_jit_stack((pcre16_extra *)extra, \
328 (pcre16_jit_callback)callback, userdata)
329
330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332 tables)
333
334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335 namesptr, cbuffer, size) \
336 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338
339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341 (PCRE_UCHAR16 *)cbuffer, size/2)
342
343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344 offsets, size_offsets, workspace, size_workspace) \
345 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347 workspace, size_workspace)
348
349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350 offsets, size_offsets) \
351 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352 len, start_offset, options, offsets, size_offsets)
353
354 #define PCRE_FREE_STUDY16(extra) \
355 pcre16_free_study((pcre16_extra *)extra)
356
357 #define PCRE_FREE_SUBSTRING16(substring) \
358 pcre16_free_substring((PCRE_SPTR16)substring)
359
360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362
363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364 getnamesptr, subsptr) \
365 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367
368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370
371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373 (PCRE_SPTR16 *)(void*)subsptr)
374
375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377 (PCRE_SPTR16 **)(void*)listptr)
378
379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381 tables)
382
383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384 pcre16_printint(re, outfile, debug_lengths)
385
386 #define PCRE_STUDY16(extra, re, options, error) \
387 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388
389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391
392 #define PCRE_JIT_STACK_FREE16(stack) \
393 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394
395 #endif /* SUPPORT_PCRE16 */
396
397 /* -----------------------------------------------------------*/
398
399 #ifdef SUPPORT_PCRE32
400
401 #define PCHARS32(lv, p, offset, len, f) \
402 lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)
403
404 #define PCHARSV32(p, offset, len, f) \
405 (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)
406
407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408 p = read_capture_name32(p, cn32, re)
409
410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411
412 #define SET_PCRE_CALLOUT32(callout) \
413 pcre32_callout = (int (*)(pcre32_callout_block *))callout
414
415 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416 pcre32_assign_jit_stack((pcre32_extra *)extra, \
417 (pcre32_jit_callback)callback, userdata)
418
419 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421 tables)
422
423 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424 namesptr, cbuffer, size) \
425 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427
428 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430 (PCRE_UCHAR32 *)cbuffer, size/2)
431
432 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433 offsets, size_offsets, workspace, size_workspace) \
434 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436 workspace, size_workspace)
437
438 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439 offsets, size_offsets) \
440 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441 len, start_offset, options, offsets, size_offsets)
442
443 #define PCRE_FREE_STUDY32(extra) \
444 pcre32_free_study((pcre32_extra *)extra)
445
446 #define PCRE_FREE_SUBSTRING32(substring) \
447 pcre32_free_substring((PCRE_SPTR32)substring)
448
449 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
451
452 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453 getnamesptr, subsptr) \
454 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456
457 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
459
460 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462 (PCRE_SPTR32 *)(void*)subsptr)
463
464 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466 (PCRE_SPTR32 **)(void*)listptr)
467
468 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470 tables)
471
472 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473 pcre32_printint(re, outfile, debug_lengths)
474
475 #define PCRE_STUDY32(extra, re, options, error) \
476 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477
478 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480
481 #define PCRE_JIT_STACK_FREE32(stack) \
482 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483
484 #endif /* SUPPORT_PCRE32 */
485
486
487 /* ----- Both modes are supported; a runtime test is needed, except for
488 pcre_config(), and the JIT stack functions, when it doesn't matter which
489 version is called. ----- */
490
491 enum {
492 PCRE8_MODE,
493 PCRE16_MODE,
494 PCRE32_MODE
495 };
496
497 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2
498
499 #define CHAR_SIZE (1 << pcre_mode)
500
501 #define PCHARS(lv, p, offset, len, f) \
502 if (pcre_mode == PCRE32_MODE) \
503 PCHARS32(lv, p, offset, len, f); \
504 else if (pcre_mode == PCRE16_MODE) \
505 PCHARS16(lv, p, offset, len, f); \
506 else \
507 PCHARS8(lv, p, offset, len, f)
508
509 #define PCHARSV(p, offset, len, f) \
510 if (pcre_mode == PCRE32_MODE) \
511 PCHARSV32(p, offset, len, f); \
512 else if (pcre_mode == PCRE16_MODE) \
513 PCHARSV16(p, offset, len, f); \
514 else \
515 PCHARSV8(p, offset, len, f)
516
517 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
518 if (pcre_mode == PCRE32_MODE) \
519 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
520 else if (pcre_mode == PCRE16_MODE) \
521 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
522 else \
523 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
524
525 #define SET_PCRE_CALLOUT(callout) \
526 if (pcre_mode == PCRE32_MODE) \
527 SET_PCRE_CALLOUT32(callout); \
528 else if (pcre_mode == PCRE16_MODE) \
529 SET_PCRE_CALLOUT16(callout); \
530 else \
531 SET_PCRE_CALLOUT8(callout)
532
533 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
534
535 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
536 if (pcre_mode == PCRE32_MODE) \
537 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
538 else if (pcre_mode == PCRE16_MODE) \
539 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
540 else \
541 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
542
543 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
544 if (pcre_mode == PCRE32_MODE) \
545 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
546 else if (pcre_mode == PCRE16_MODE) \
547 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
548 else \
549 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
550
551 #define PCRE_CONFIG pcre_config
552
553 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
554 namesptr, cbuffer, size) \
555 if (pcre_mode == PCRE32_MODE) \
556 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
557 namesptr, cbuffer, size); \
558 else if (pcre_mode == PCRE16_MODE) \
559 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
560 namesptr, cbuffer, size); \
561 else \
562 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
563 namesptr, cbuffer, size)
564
565 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
566 if (pcre_mode == PCRE32_MODE) \
567 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
568 else if (pcre_mode == PCRE16_MODE) \
569 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
570 else \
571 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
572
573 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
574 offsets, size_offsets, workspace, size_workspace) \
575 if (pcre_mode == PCRE32_MODE) \
576 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
577 offsets, size_offsets, workspace, size_workspace); \
578 else if (pcre_mode == PCRE16_MODE) \
579 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
580 offsets, size_offsets, workspace, size_workspace); \
581 else \
582 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets, workspace, size_workspace)
584
585 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets) \
587 if (pcre_mode == PCRE32_MODE) \
588 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets); \
590 else if (pcre_mode == PCRE16_MODE) \
591 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets); \
593 else \
594 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
595 offsets, size_offsets)
596
597 #define PCRE_FREE_STUDY(extra) \
598 if (pcre_mode == PCRE32_MODE) \
599 PCRE_FREE_STUDY32(extra); \
600 else if (pcre_mode == PCRE16_MODE) \
601 PCRE_FREE_STUDY16(extra); \
602 else \
603 PCRE_FREE_STUDY8(extra)
604
605 #define PCRE_FREE_SUBSTRING(substring) \
606 if (pcre_mode == PCRE32_MODE) \
607 PCRE_FREE_SUBSTRING32(substring); \
608 else if (pcre_mode == PCRE16_MODE) \
609 PCRE_FREE_SUBSTRING16(substring); \
610 else \
611 PCRE_FREE_SUBSTRING8(substring)
612
613 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
614 if (pcre_mode == PCRE32_MODE) \
615 PCRE_FREE_SUBSTRING_LIST32(listptr); \
616 else if (pcre_mode == PCRE16_MODE) \
617 PCRE_FREE_SUBSTRING_LIST16(listptr); \
618 else \
619 PCRE_FREE_SUBSTRING_LIST8(listptr)
620
621 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
622 getnamesptr, subsptr) \
623 if (pcre_mode == PCRE32_MODE) \
624 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
625 getnamesptr, subsptr); \
626 else if (pcre_mode == PCRE16_MODE) \
627 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
628 getnamesptr, subsptr); \
629 else \
630 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
631 getnamesptr, subsptr)
632
633 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
634 if (pcre_mode == PCRE32_MODE) \
635 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
636 else if (pcre_mode == PCRE16_MODE) \
637 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
638 else \
639 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
640
641 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
642 if (pcre_mode == PCRE32_MODE) \
643 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
644 else if (pcre_mode == PCRE16_MODE) \
645 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
646 else \
647 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
648
649 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
650 if (pcre_mode == PCRE32_MODE) \
651 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
652 else if (pcre_mode == PCRE16_MODE) \
653 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
654 else \
655 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
656
657 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
658 (pcre_mode == PCRE32_MODE ? \
659 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
660 : pcre_mode == PCRE16_MODE ? \
661 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
662 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
663
664 #define PCRE_JIT_STACK_FREE(stack) \
665 if (pcre_mode == PCRE32_MODE) \
666 PCRE_JIT_STACK_FREE32(stack); \
667 else if (pcre_mode == PCRE16_MODE) \
668 PCRE_JIT_STACK_FREE16(stack); \
669 else \
670 PCRE_JIT_STACK_FREE8(stack)
671
672 #define PCRE_MAKETABLES \
673 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
674
675 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
676 if (pcre_mode == PCRE32_MODE) \
677 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
678 else if (pcre_mode == PCRE16_MODE) \
679 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
680 else \
681 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
682
683 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
684 if (pcre_mode == PCRE32_MODE) \
685 PCRE_PRINTINT32(re, outfile, debug_lengths); \
686 else if (pcre_mode == PCRE16_MODE) \
687 PCRE_PRINTINT16(re, outfile, debug_lengths); \
688 else \
689 PCRE_PRINTINT8(re, outfile, debug_lengths)
690
691 #define PCRE_STUDY(extra, re, options, error) \
692 if (pcre_mode == PCRE32_MODE) \
693 PCRE_STUDY32(extra, re, options, error); \
694 else if (pcre_mode == PCRE16_MODE) \
695 PCRE_STUDY16(extra, re, options, error); \
696 else \
697 PCRE_STUDY8(extra, re, options, error)
698
699 /* ----- Only 8-bit mode is supported ----- */
700
701 #elif defined SUPPORT_PCRE8
702 #define CHAR_SIZE 1
703 #define PCHARS PCHARS8
704 #define PCHARSV PCHARSV8
705 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
706 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
707 #define STRLEN STRLEN8
708 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
709 #define PCRE_COMPILE PCRE_COMPILE8
710 #define PCRE_CONFIG pcre_config
711 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
712 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
713 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
714 #define PCRE_EXEC PCRE_EXEC8
715 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
716 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
717 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
718 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
719 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
720 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
721 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
722 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
723 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
724 #define PCRE_MAKETABLES pcre_maketables()
725 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
726 #define PCRE_PRINTINT PCRE_PRINTINT8
727 #define PCRE_STUDY PCRE_STUDY8
728
729 /* ----- Only 16-bit mode is supported ----- */
730
731 #elif defined SUPPORT_PCRE16
732 #define CHAR_SIZE 2
733 #define PCHARS PCHARS16
734 #define PCHARSV PCHARSV16
735 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
736 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
737 #define STRLEN STRLEN16
738 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
739 #define PCRE_COMPILE PCRE_COMPILE16
740 #define PCRE_CONFIG pcre16_config
741 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
742 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
743 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
744 #define PCRE_EXEC PCRE_EXEC16
745 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
746 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
747 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
748 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
749 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
750 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
751 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
752 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
753 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
754 #define PCRE_MAKETABLES pcre16_maketables()
755 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
756 #define PCRE_PRINTINT PCRE_PRINTINT16
757 #define PCRE_STUDY PCRE_STUDY16
758
759 /* ----- Only 32-bit mode is supported ----- */
760
761 #elif defined SUPPORT_PCRE32
762 #define CHAR_SIZE 4
763 #define PCHARS PCHARS32
764 #define PCHARSV PCHARSV32
765 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
766 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
767 #define STRLEN STRLEN32
768 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
769 #define PCRE_COMPILE PCRE_COMPILE32
770 #define PCRE_CONFIG pcre32_config
771 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
772 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
773 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
774 #define PCRE_EXEC PCRE_EXEC32
775 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
776 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
777 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
778 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
779 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
780 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
781 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
782 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
783 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
784 #define PCRE_MAKETABLES pcre32_maketables()
785 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
786 #define PCRE_PRINTINT PCRE_PRINTINT32
787 #define PCRE_STUDY PCRE_STUDY32
788
789 #endif
790
791 /* ----- End of mode-specific function call macros ----- */
792
793
794 /* Other parameters */
795
796 #ifndef CLOCKS_PER_SEC
797 #ifdef CLK_TCK
798 #define CLOCKS_PER_SEC CLK_TCK
799 #else
800 #define CLOCKS_PER_SEC 100
801 #endif
802 #endif
803
804 #if !defined NODFA
805 #define DFA_WS_DIMENSION 1000
806 #endif
807
808 /* This is the default loop count for timing. */
809
810 #define LOOPREPEAT 500000
811
812 /* Static variables */
813
814 static FILE *outfile;
815 static int log_store = 0;
816 static int callout_count;
817 static int callout_extra;
818 static int callout_fail_count;
819 static int callout_fail_id;
820 static int debug_lengths;
821 static int first_callout;
822 static int jit_was_used;
823 static int locale_set = 0;
824 static int show_malloc;
825 static int use_utf;
826 static size_t gotten_store;
827 static size_t first_gotten_store = 0;
828 static const unsigned char *last_callout_mark = NULL;
829
830 /* The buffers grow automatically if very long input lines are encountered. */
831
832 static int buffer_size = 50000;
833 static pcre_uint8 *buffer = NULL;
834 static pcre_uint8 *pbuffer = NULL;
835
836 /* Another buffer is needed translation to 16/32-bit character strings. It will
837 obtained and extended as required. */
838
839 #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
840
841 /* We need the table of operator lengths that is used for 16/32-bit compiling, in
842 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
843 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
844 appropriately for the 16/32-bit world. Just as a safety check, make sure that
845 COMPILE_PCRE[16|32] is *not* set. */
846
847 #ifdef COMPILE_PCRE16
848 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
849 #endif
850
851 #ifdef COMPILE_PCRE32
852 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
853 #endif
854
855 #if LINK_SIZE == 2
856 #undef LINK_SIZE
857 #define LINK_SIZE 1
858 #elif LINK_SIZE == 3 || LINK_SIZE == 4
859 #undef LINK_SIZE
860 #define LINK_SIZE 2
861 #else
862 #error LINK_SIZE must be either 2, 3, or 4
863 #endif
864
865 #undef IMM2_SIZE
866 #define IMM2_SIZE 1
867
868 #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
869
870 #ifdef SUPPORT_PCRE16
871 static int buffer16_size = 0;
872 static pcre_uint16 *buffer16 = NULL;
873 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
874 #endif /* SUPPORT_PCRE16 */
875
876 #ifdef SUPPORT_PCRE32
877 static int buffer32_size = 0;
878 static pcre_uint32 *buffer32 = NULL;
879 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
880 #endif /* SUPPORT_PCRE32 */
881
882 /* If we have 8-bit support, default to it; if there is also
883 16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
884 there must be 16-or 32-bit support, so default it to 1. */
885
886 #if defined SUPPORT_PCRE8
887 static int pcre_mode = PCRE8_MODE;
888 #elif defined SUPPORT_PCRE16
889 static int pcre_mode = PCRE16_MODE;
890 #elif defined SUPPORT_PCRE32
891 static int pcre_mode = PCRE32_MODE;
892 #endif
893
894 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
895
896 static int jit_study_bits[] =
897 {
898 PCRE_STUDY_JIT_COMPILE,
899 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
900 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
901 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
902 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
903 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
904 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
905 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
906 };
907
908 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
909 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
910
911 /* Textual explanations for runtime error codes */
912
913 static const char *errtexts[] = {
914 NULL, /* 0 is no error */
915 NULL, /* NOMATCH is handled specially */
916 "NULL argument passed",
917 "bad option value",
918 "magic number missing",
919 "unknown opcode - pattern overwritten?",
920 "no more memory",
921 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
922 "match limit exceeded",
923 "callout error code",
924 NULL, /* BADUTF8/16 is handled specially */
925 NULL, /* BADUTF8/16 offset is handled specially */
926 NULL, /* PARTIAL is handled specially */
927 "not used - internal error",
928 "internal error - pattern overwritten?",
929 "bad count value",
930 "item unsupported for DFA matching",
931 "backreference condition or recursion test not supported for DFA matching",
932 "match limit not supported for DFA matching",
933 "workspace size exceeded in DFA matching",
934 "too much recursion for DFA matching",
935 "recursion limit exceeded",
936 "not used - internal error",
937 "invalid combination of newline options",
938 "bad offset value",
939 NULL, /* SHORTUTF8/16 is handled specially */
940 "nested recursion at the same subject position",
941 "JIT stack limit reached",
942 "pattern compiled in wrong mode: 8-bit/16-bit error",
943 "pattern compiled with other endianness",
944 "invalid data in workspace for DFA restart"
945 };
946
947
948 /*************************************************
949 * Alternate character tables *
950 *************************************************/
951
952 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
953 using the default tables of the library. However, the T option can be used to
954 select alternate sets of tables, for different kinds of testing. Note also that
955 the L (locale) option also adjusts the tables. */
956
957 /* This is the set of tables distributed as default with PCRE. It recognizes
958 only ASCII characters. */
959
960 static const pcre_uint8 tables0[] = {
961
962 /* This table is a lower casing table. */
963
964 0, 1, 2, 3, 4, 5, 6, 7,
965 8, 9, 10, 11, 12, 13, 14, 15,
966 16, 17, 18, 19, 20, 21, 22, 23,
967 24, 25, 26, 27, 28, 29, 30, 31,
968 32, 33, 34, 35, 36, 37, 38, 39,
969 40, 41, 42, 43, 44, 45, 46, 47,
970 48, 49, 50, 51, 52, 53, 54, 55,
971 56, 57, 58, 59, 60, 61, 62, 63,
972 64, 97, 98, 99,100,101,102,103,
973 104,105,106,107,108,109,110,111,
974 112,113,114,115,116,117,118,119,
975 120,121,122, 91, 92, 93, 94, 95,
976 96, 97, 98, 99,100,101,102,103,
977 104,105,106,107,108,109,110,111,
978 112,113,114,115,116,117,118,119,
979 120,121,122,123,124,125,126,127,
980 128,129,130,131,132,133,134,135,
981 136,137,138,139,140,141,142,143,
982 144,145,146,147,148,149,150,151,
983 152,153,154,155,156,157,158,159,
984 160,161,162,163,164,165,166,167,
985 168,169,170,171,172,173,174,175,
986 176,177,178,179,180,181,182,183,
987 184,185,186,187,188,189,190,191,
988 192,193,194,195,196,197,198,199,
989 200,201,202,203,204,205,206,207,
990 208,209,210,211,212,213,214,215,
991 216,217,218,219,220,221,222,223,
992 224,225,226,227,228,229,230,231,
993 232,233,234,235,236,237,238,239,
994 240,241,242,243,244,245,246,247,
995 248,249,250,251,252,253,254,255,
996
997 /* This table is a case flipping table. */
998
999 0, 1, 2, 3, 4, 5, 6, 7,
1000 8, 9, 10, 11, 12, 13, 14, 15,
1001 16, 17, 18, 19, 20, 21, 22, 23,
1002 24, 25, 26, 27, 28, 29, 30, 31,
1003 32, 33, 34, 35, 36, 37, 38, 39,
1004 40, 41, 42, 43, 44, 45, 46, 47,
1005 48, 49, 50, 51, 52, 53, 54, 55,
1006 56, 57, 58, 59, 60, 61, 62, 63,
1007 64, 97, 98, 99,100,101,102,103,
1008 104,105,106,107,108,109,110,111,
1009 112,113,114,115,116,117,118,119,
1010 120,121,122, 91, 92, 93, 94, 95,
1011 96, 65, 66, 67, 68, 69, 70, 71,
1012 72, 73, 74, 75, 76, 77, 78, 79,
1013 80, 81, 82, 83, 84, 85, 86, 87,
1014 88, 89, 90,123,124,125,126,127,
1015 128,129,130,131,132,133,134,135,
1016 136,137,138,139,140,141,142,143,
1017 144,145,146,147,148,149,150,151,
1018 152,153,154,155,156,157,158,159,
1019 160,161,162,163,164,165,166,167,
1020 168,169,170,171,172,173,174,175,
1021 176,177,178,179,180,181,182,183,
1022 184,185,186,187,188,189,190,191,
1023 192,193,194,195,196,197,198,199,
1024 200,201,202,203,204,205,206,207,
1025 208,209,210,211,212,213,214,215,
1026 216,217,218,219,220,221,222,223,
1027 224,225,226,227,228,229,230,231,
1028 232,233,234,235,236,237,238,239,
1029 240,241,242,243,244,245,246,247,
1030 248,249,250,251,252,253,254,255,
1031
1032 /* This table contains bit maps for various character classes. Each map is 32
1033 bytes long and the bits run from the least significant end of each byte. The
1034 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1035 graph, print, punct, and cntrl. Other classes are built from combinations. */
1036
1037 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1038 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1039 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1040 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1041
1042 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1043 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1044 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1045 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1046
1047 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1048 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1049 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1050 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1051
1052 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1053 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1054 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1055 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1056
1057 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1058 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1059 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1060 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1061
1062 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1063 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1064 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1065 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1066
1067 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1068 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1069 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1070 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1071
1072 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1073 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1074 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1075 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1076
1077 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1078 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1079 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1080 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1081
1082 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1083 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1084 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1085 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1086
1087 /* This table identifies various classes of character by individual bits:
1088 0x01 white space character
1089 0x02 letter
1090 0x04 decimal digit
1091 0x08 hexadecimal digit
1092 0x10 alphanumeric or '_'
1093 0x80 regular expression metacharacter or binary zero
1094 */
1095
1096 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1097 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1098 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1099 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1100 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1101 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1102 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1103 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1104 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1105 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1106 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1107 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1108 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1109 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1110 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1111 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1112 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1113 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1114 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1115 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1116 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1117 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1118 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1119 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1120 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1121 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1122 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1123 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1124 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1125 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1126 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1127 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1128
1129 /* This is a set of tables that came orginally from a Windows user. It seems to
1130 be at least an approximation of ISO 8859. In particular, there are characters
1131 greater than 128 that are marked as spaces, letters, etc. */
1132
1133 static const pcre_uint8 tables1[] = {
1134 0,1,2,3,4,5,6,7,
1135 8,9,10,11,12,13,14,15,
1136 16,17,18,19,20,21,22,23,
1137 24,25,26,27,28,29,30,31,
1138 32,33,34,35,36,37,38,39,
1139 40,41,42,43,44,45,46,47,
1140 48,49,50,51,52,53,54,55,
1141 56,57,58,59,60,61,62,63,
1142 64,97,98,99,100,101,102,103,
1143 104,105,106,107,108,109,110,111,
1144 112,113,114,115,116,117,118,119,
1145 120,121,122,91,92,93,94,95,
1146 96,97,98,99,100,101,102,103,
1147 104,105,106,107,108,109,110,111,
1148 112,113,114,115,116,117,118,119,
1149 120,121,122,123,124,125,126,127,
1150 128,129,130,131,132,133,134,135,
1151 136,137,138,139,140,141,142,143,
1152 144,145,146,147,148,149,150,151,
1153 152,153,154,155,156,157,158,159,
1154 160,161,162,163,164,165,166,167,
1155 168,169,170,171,172,173,174,175,
1156 176,177,178,179,180,181,182,183,
1157 184,185,186,187,188,189,190,191,
1158 224,225,226,227,228,229,230,231,
1159 232,233,234,235,236,237,238,239,
1160 240,241,242,243,244,245,246,215,
1161 248,249,250,251,252,253,254,223,
1162 224,225,226,227,228,229,230,231,
1163 232,233,234,235,236,237,238,239,
1164 240,241,242,243,244,245,246,247,
1165 248,249,250,251,252,253,254,255,
1166 0,1,2,3,4,5,6,7,
1167 8,9,10,11,12,13,14,15,
1168 16,17,18,19,20,21,22,23,
1169 24,25,26,27,28,29,30,31,
1170 32,33,34,35,36,37,38,39,
1171 40,41,42,43,44,45,46,47,
1172 48,49,50,51,52,53,54,55,
1173 56,57,58,59,60,61,62,63,
1174 64,97,98,99,100,101,102,103,
1175 104,105,106,107,108,109,110,111,
1176 112,113,114,115,116,117,118,119,
1177 120,121,122,91,92,93,94,95,
1178 96,65,66,67,68,69,70,71,
1179 72,73,74,75,76,77,78,79,
1180 80,81,82,83,84,85,86,87,
1181 88,89,90,123,124,125,126,127,
1182 128,129,130,131,132,133,134,135,
1183 136,137,138,139,140,141,142,143,
1184 144,145,146,147,148,149,150,151,
1185 152,153,154,155,156,157,158,159,
1186 160,161,162,163,164,165,166,167,
1187 168,169,170,171,172,173,174,175,
1188 176,177,178,179,180,181,182,183,
1189 184,185,186,187,188,189,190,191,
1190 224,225,226,227,228,229,230,231,
1191 232,233,234,235,236,237,238,239,
1192 240,241,242,243,244,245,246,215,
1193 248,249,250,251,252,253,254,223,
1194 192,193,194,195,196,197,198,199,
1195 200,201,202,203,204,205,206,207,
1196 208,209,210,211,212,213,214,247,
1197 216,217,218,219,220,221,222,255,
1198 0,62,0,0,1,0,0,0,
1199 0,0,0,0,0,0,0,0,
1200 32,0,0,0,1,0,0,0,
1201 0,0,0,0,0,0,0,0,
1202 0,0,0,0,0,0,255,3,
1203 126,0,0,0,126,0,0,0,
1204 0,0,0,0,0,0,0,0,
1205 0,0,0,0,0,0,0,0,
1206 0,0,0,0,0,0,255,3,
1207 0,0,0,0,0,0,0,0,
1208 0,0,0,0,0,0,12,2,
1209 0,0,0,0,0,0,0,0,
1210 0,0,0,0,0,0,0,0,
1211 254,255,255,7,0,0,0,0,
1212 0,0,0,0,0,0,0,0,
1213 255,255,127,127,0,0,0,0,
1214 0,0,0,0,0,0,0,0,
1215 0,0,0,0,254,255,255,7,
1216 0,0,0,0,0,4,32,4,
1217 0,0,0,128,255,255,127,255,
1218 0,0,0,0,0,0,255,3,
1219 254,255,255,135,254,255,255,7,
1220 0,0,0,0,0,4,44,6,
1221 255,255,127,255,255,255,127,255,
1222 0,0,0,0,254,255,255,255,
1223 255,255,255,255,255,255,255,127,
1224 0,0,0,0,254,255,255,255,
1225 255,255,255,255,255,255,255,255,
1226 0,2,0,0,255,255,255,255,
1227 255,255,255,255,255,255,255,127,
1228 0,0,0,0,255,255,255,255,
1229 255,255,255,255,255,255,255,255,
1230 0,0,0,0,254,255,0,252,
1231 1,0,0,248,1,0,0,120,
1232 0,0,0,0,254,255,255,255,
1233 0,0,128,0,0,0,128,0,
1234 255,255,255,255,0,0,0,0,
1235 0,0,0,0,0,0,0,128,
1236 255,255,255,255,0,0,0,0,
1237 0,0,0,0,0,0,0,0,
1238 128,0,0,0,0,0,0,0,
1239 0,1,1,0,1,1,0,0,
1240 0,0,0,0,0,0,0,0,
1241 0,0,0,0,0,0,0,0,
1242 1,0,0,0,128,0,0,0,
1243 128,128,128,128,0,0,128,0,
1244 28,28,28,28,28,28,28,28,
1245 28,28,0,0,0,0,0,128,
1246 0,26,26,26,26,26,26,18,
1247 18,18,18,18,18,18,18,18,
1248 18,18,18,18,18,18,18,18,
1249 18,18,18,128,128,0,128,16,
1250 0,26,26,26,26,26,26,18,
1251 18,18,18,18,18,18,18,18,
1252 18,18,18,18,18,18,18,18,
1253 18,18,18,128,128,0,0,0,
1254 0,0,0,0,0,1,0,0,
1255 0,0,0,0,0,0,0,0,
1256 0,0,0,0,0,0,0,0,
1257 0,0,0,0,0,0,0,0,
1258 1,0,0,0,0,0,0,0,
1259 0,0,18,0,0,0,0,0,
1260 0,0,20,20,0,18,0,0,
1261 0,20,18,0,0,0,0,0,
1262 18,18,18,18,18,18,18,18,
1263 18,18,18,18,18,18,18,18,
1264 18,18,18,18,18,18,18,0,
1265 18,18,18,18,18,18,18,18,
1266 18,18,18,18,18,18,18,18,
1267 18,18,18,18,18,18,18,18,
1268 18,18,18,18,18,18,18,0,
1269 18,18,18,18,18,18,18,18
1270 };
1271
1272
1273
1274
1275 #ifndef HAVE_STRERROR
1276 /*************************************************
1277 * Provide strerror() for non-ANSI libraries *
1278 *************************************************/
1279
1280 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1281 in their libraries, but can provide the same facility by this simple
1282 alternative function. */
1283
1284 extern int sys_nerr;
1285 extern char *sys_errlist[];
1286
1287 char *
1288 strerror(int n)
1289 {
1290 if (n < 0 || n >= sys_nerr) return "unknown error number";
1291 return sys_errlist[n];
1292 }
1293 #endif /* HAVE_STRERROR */
1294
1295
1296
1297 /*************************************************
1298 * Print newline configuration *
1299 *************************************************/
1300
1301 /*
1302 Arguments:
1303 rc the return code from PCRE_CONFIG_NEWLINE
1304 isc TRUE if called from "-C newline"
1305 Returns: nothing
1306 */
1307
1308 static void
1309 print_newline_config(int rc, BOOL isc)
1310 {
1311 const char *s = NULL;
1312 if (!isc) printf(" Newline sequence is ");
1313 switch(rc)
1314 {
1315 case CHAR_CR: s = "CR"; break;
1316 case CHAR_LF: s = "LF"; break;
1317 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1318 case -1: s = "ANY"; break;
1319 case -2: s = "ANYCRLF"; break;
1320
1321 default:
1322 printf("a non-standard value: 0x%04x\n", rc);
1323 return;
1324 }
1325
1326 printf("%s\n", s);
1327 }
1328
1329
1330
1331 /*************************************************
1332 * JIT memory callback *
1333 *************************************************/
1334
1335 static pcre_jit_stack* jit_callback(void *arg)
1336 {
1337 jit_was_used = TRUE;
1338 return (pcre_jit_stack *)arg;
1339 }
1340
1341
1342 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1343 /*************************************************
1344 * Convert UTF-8 string to value *
1345 *************************************************/
1346
1347 /* This function takes one or more bytes that represents a UTF-8 character,
1348 and returns the value of the character.
1349
1350 Argument:
1351 utf8bytes a pointer to the byte vector
1352 vptr a pointer to an int to receive the value
1353
1354 Returns: > 0 => the number of bytes consumed
1355 -6 to 0 => malformed UTF-8 character at offset = (-return)
1356 */
1357
1358 static int
1359 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1360 {
1361 pcre_uint32 c = *utf8bytes++;
1362 pcre_uint32 d = c;
1363 int i, j, s;
1364
1365 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1366 {
1367 if ((d & 0x80) == 0) break;
1368 d <<= 1;
1369 }
1370
1371 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1372 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1373
1374 /* i now has a value in the range 1-5 */
1375
1376 s = 6*i;
1377 d = (c & utf8_table3[i]) << s;
1378
1379 for (j = 0; j < i; j++)
1380 {
1381 c = *utf8bytes++;
1382 if ((c & 0xc0) != 0x80) return -(j+1);
1383 s -= 6;
1384 d |= (c & 0x3f) << s;
1385 }
1386
1387 /* Check that encoding was the correct unique one */
1388
1389 for (j = 0; j < utf8_table1_size; j++)
1390 if (d <= utf8_table1[j]) break;
1391 if (j != i) return -(i+1);
1392
1393 /* Valid value */
1394
1395 *vptr = d;
1396 return i+1;
1397 }
1398 #endif /* NOUTF || SUPPORT_PCRE16 */
1399
1400
1401
1402 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1403 /*************************************************
1404 * Convert character value to UTF-8 *
1405 *************************************************/
1406
1407 /* This function takes an integer value in the range 0 - 0x7fffffff
1408 and encodes it as a UTF-8 character in 0 to 6 bytes.
1409
1410 Arguments:
1411 cvalue the character value
1412 utf8bytes pointer to buffer for result - at least 6 bytes long
1413
1414 Returns: number of characters placed in the buffer
1415 */
1416
1417 static int
1418 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1419 {
1420 register int i, j;
1421 if (cvalue > 0x7fffffffu)
1422 return -1;
1423 for (i = 0; i < utf8_table1_size; i++)
1424 if (cvalue <= utf8_table1[i]) break;
1425 utf8bytes += i;
1426 for (j = i; j > 0; j--)
1427 {
1428 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1429 cvalue >>= 6;
1430 }
1431 *utf8bytes = utf8_table2[i] | cvalue;
1432 return i + 1;
1433 }
1434 #endif
1435
1436
1437 #ifdef SUPPORT_PCRE16
1438 /*************************************************
1439 * Convert a string to 16-bit *
1440 *************************************************/
1441
1442 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1443 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1444 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1445 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1446 result is always left in buffer16.
1447
1448 Note that this function does not object to surrogate values. This is
1449 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1450 for the purpose of testing that they are correctly faulted.
1451
1452 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1453 in UTF-8 so that values greater than 255 can be handled.
1454
1455 Arguments:
1456 data TRUE if converting a data line; FALSE for a regex
1457 p points to a byte string
1458 utf true if UTF-8 (to be converted to UTF-16)
1459 len number of bytes in the string (excluding trailing zero)
1460
1461 Returns: number of 16-bit data items used (excluding trailing zero)
1462 OR -1 if a UTF-8 string is malformed
1463 OR -2 if a value > 0x10ffff is encountered
1464 OR -3 if a value > 0xffff is encountered when not in UTF mode
1465 */
1466
1467 static int
1468 to16(int data, pcre_uint8 *p, int utf, int len)
1469 {
1470 pcre_uint16 *pp;
1471
1472 if (buffer16_size < 2*len + 2)
1473 {
1474 if (buffer16 != NULL) free(buffer16);
1475 buffer16_size = 2*len + 2;
1476 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1477 if (buffer16 == NULL)
1478 {
1479 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1480 exit(1);
1481 }
1482 }
1483
1484 pp = buffer16;
1485
1486 if (!utf && !data)
1487 {
1488 while (len-- > 0) *pp++ = *p++;
1489 }
1490
1491 else
1492 {
1493 pcre_uint32 c = 0;
1494 while (len > 0)
1495 {
1496 int chlen = utf82ord(p, &c);
1497 if (chlen <= 0) return -1;
1498 if (c > 0x10ffff) return -2;
1499 p += chlen;
1500 len -= chlen;
1501 if (c < 0x10000) *pp++ = c; else
1502 {
1503 if (!utf) return -3;
1504 c -= 0x10000;
1505 *pp++ = 0xD800 | (c >> 10);
1506 *pp++ = 0xDC00 | (c & 0x3ff);
1507 }
1508 }
1509 }
1510
1511 *pp = 0;
1512 return pp - buffer16;
1513 }
1514 #endif
1515
1516 #ifdef SUPPORT_PCRE32
1517 /*************************************************
1518 * Convert a string to 32-bit *
1519 *************************************************/
1520
1521 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1522 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1523 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1524 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1525 result is always left in buffer32.
1526
1527 Note that this function does not object to surrogate values. This is
1528 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1529 for the purpose of testing that they are correctly faulted.
1530
1531 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1532 in UTF-8 so that values greater than 255 can be handled.
1533
1534 Arguments:
1535 data TRUE if converting a data line; FALSE for a regex
1536 p points to a byte string
1537 utf true if UTF-8 (to be converted to UTF-32)
1538 len number of bytes in the string (excluding trailing zero)
1539
1540 Returns: number of 32-bit data items used (excluding trailing zero)
1541 OR -1 if a UTF-8 string is malformed
1542 OR -2 if a value > 0x10ffff is encountered
1543 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1544 */
1545
1546 static int
1547 to32(int data, pcre_uint8 *p, int utf, int len)
1548 {
1549 pcre_uint32 *pp;
1550
1551 if (buffer32_size < 4*len + 4)
1552 {
1553 if (buffer32 != NULL) free(buffer32);
1554 buffer32_size = 4*len + 4;
1555 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1556 if (buffer32 == NULL)
1557 {
1558 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1559 exit(1);
1560 }
1561 }
1562
1563 pp = buffer32;
1564
1565 if (!utf && !data)
1566 {
1567 while (len-- > 0) *pp++ = *p++;
1568 }
1569
1570 else
1571 {
1572 pcre_uint32 c = 0;
1573 while (len > 0)
1574 {
1575 int chlen = utf82ord(p, &c);
1576 if (chlen <= 0) return -1;
1577 if (utf)
1578 {
1579 if (c > 0x10ffff) return -2;
1580 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1581 }
1582
1583 p += chlen;
1584 len -= chlen;
1585 *pp++ = c;
1586 }
1587 }
1588
1589 *pp = 0;
1590 return pp - buffer32;
1591 }
1592 #endif
1593
1594 /*************************************************
1595 * Read or extend an input line *
1596 *************************************************/
1597
1598 /* Input lines are read into buffer, but both patterns and data lines can be
1599 continued over multiple input lines. In addition, if the buffer fills up, we
1600 want to automatically expand it so as to be able to handle extremely large
1601 lines that are needed for certain stress tests. When the input buffer is
1602 expanded, the other two buffers must also be expanded likewise, and the
1603 contents of pbuffer, which are a copy of the input for callouts, must be
1604 preserved (for when expansion happens for a data line). This is not the most
1605 optimal way of handling this, but hey, this is just a test program!
1606
1607 Arguments:
1608 f the file to read
1609 start where in buffer to start (this *must* be within buffer)
1610 prompt for stdin or readline()
1611
1612 Returns: pointer to the start of new data
1613 could be a copy of start, or could be moved
1614 NULL if no data read and EOF reached
1615 */
1616
1617 static pcre_uint8 *
1618 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1619 {
1620 pcre_uint8 *here = start;
1621
1622 for (;;)
1623 {
1624 size_t rlen = (size_t)(buffer_size - (here - buffer));
1625
1626 if (rlen > 1000)
1627 {
1628 int dlen;
1629
1630 /* If libreadline or libedit support is required, use readline() to read a
1631 line if the input is a terminal. Note that readline() removes the trailing
1632 newline, so we must put it back again, to be compatible with fgets(). */
1633
1634 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1635 if (isatty(fileno(f)))
1636 {
1637 size_t len;
1638 char *s = readline(prompt);
1639 if (s == NULL) return (here == start)? NULL : start;
1640 len = strlen(s);
1641 if (len > 0) add_history(s);
1642 if (len > rlen - 1) len = rlen - 1;
1643 memcpy(here, s, len);
1644 here[len] = '\n';
1645 here[len+1] = 0;
1646 free(s);
1647 }
1648 else
1649 #endif
1650
1651 /* Read the next line by normal means, prompting if the file is stdin. */
1652
1653 {
1654 if (f == stdin) printf("%s", prompt);
1655 if (fgets((char *)here, rlen, f) == NULL)
1656 return (here == start)? NULL : start;
1657 }
1658
1659 dlen = (int)strlen((char *)here);
1660 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1661 here += dlen;
1662 }
1663
1664 else
1665 {
1666 int new_buffer_size = 2*buffer_size;
1667 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1668 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1669
1670 if (new_buffer == NULL || new_pbuffer == NULL)
1671 {
1672 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1673 exit(1);
1674 }
1675
1676 memcpy(new_buffer, buffer, buffer_size);
1677 memcpy(new_pbuffer, pbuffer, buffer_size);
1678
1679 buffer_size = new_buffer_size;
1680
1681 start = new_buffer + (start - buffer);
1682 here = new_buffer + (here - buffer);
1683
1684 free(buffer);
1685 free(pbuffer);
1686
1687 buffer = new_buffer;
1688 pbuffer = new_pbuffer;
1689 }
1690 }
1691
1692 return NULL; /* Control never gets here */
1693 }
1694
1695
1696
1697 /*************************************************
1698 * Read number from string *
1699 *************************************************/
1700
1701 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1702 around with conditional compilation, just do the job by hand. It is only used
1703 for unpicking arguments, so just keep it simple.
1704
1705 Arguments:
1706 str string to be converted
1707 endptr where to put the end pointer
1708
1709 Returns: the unsigned long
1710 */
1711
1712 static int
1713 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1714 {
1715 int result = 0;
1716 while(*str != 0 && isspace(*str)) str++;
1717 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1718 *endptr = str;
1719 return(result);
1720 }
1721
1722
1723
1724 /*************************************************
1725 * Print one character *
1726 *************************************************/
1727
1728 /* Print a single character either literally, or as a hex escape. */
1729
1730 static int pchar(pcre_uint32 c, FILE *f)
1731 {
1732 int n;
1733 if (PRINTOK(c))
1734 {
1735 if (f != NULL) fprintf(f, "%c", c);
1736 return 1;
1737 }
1738
1739 if (c < 0x100)
1740 {
1741 if (use_utf)
1742 {
1743 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1744 return 6;
1745 }
1746 else
1747 {
1748 if (f != NULL) fprintf(f, "\\x%02x", c);
1749 return 4;
1750 }
1751 }
1752
1753 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1754 return n >= 0 ? n : 0;
1755 }
1756
1757
1758
1759 #ifdef SUPPORT_PCRE8
1760 /*************************************************
1761 * Print 8-bit character string *
1762 *************************************************/
1763
1764 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1765 If handed a NULL file, just counts chars without printing. */
1766
1767 static int pchars(pcre_uint8 *p, int length, FILE *f)
1768 {
1769 pcre_uint32 c = 0;
1770 int yield = 0;
1771
1772 if (length < 0)
1773 length = strlen((char *)p);
1774
1775 while (length-- > 0)
1776 {
1777 #if !defined NOUTF
1778 if (use_utf)
1779 {
1780 int rc = utf82ord(p, &c);
1781 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1782 {
1783 length -= rc - 1;
1784 p += rc;
1785 yield += pchar(c, f);
1786 continue;
1787 }
1788 }
1789 #endif
1790 c = *p++;
1791 yield += pchar(c, f);
1792 }
1793
1794 return yield;
1795 }
1796 #endif
1797
1798
1799
1800 #ifdef SUPPORT_PCRE16
1801 /*************************************************
1802 * Find length of 0-terminated 16-bit string *
1803 *************************************************/
1804
1805 static int strlen16(PCRE_SPTR16 p)
1806 {
1807 int len = 0;
1808 while (*p++ != 0) len++;
1809 return len;
1810 }
1811 #endif /* SUPPORT_PCRE16 */
1812
1813
1814
1815 #ifdef SUPPORT_PCRE32
1816 /*************************************************
1817 * Find length of 0-terminated 32-bit string *
1818 *************************************************/
1819
1820 static int strlen32(PCRE_SPTR32 p)
1821 {
1822 int len = 0;
1823 while (*p++ != 0) len++;
1824 return len;
1825 }
1826 #endif /* SUPPORT_PCRE32 */
1827
1828
1829
1830 #ifdef SUPPORT_PCRE16
1831 /*************************************************
1832 * Print 16-bit character string *
1833 *************************************************/
1834
1835 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1836 If handed a NULL file, just counts chars without printing. */
1837
1838 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1839 {
1840 int yield = 0;
1841
1842 if (length < 0)
1843 length = strlen16(p);
1844
1845 while (length-- > 0)
1846 {
1847 pcre_uint32 c = *p++ & 0xffff;
1848 #if !defined NOUTF
1849 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1850 {
1851 int d = *p & 0xffff;
1852 if (d >= 0xDC00 && d < 0xDFFF)
1853 {
1854 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1855 length--;
1856 p++;
1857 }
1858 }
1859 #endif
1860 yield += pchar(c, f);
1861 }
1862
1863 return yield;
1864 }
1865 #endif /* SUPPORT_PCRE16 */
1866
1867
1868
1869 #ifdef SUPPORT_PCRE32
1870 /*************************************************
1871 * Print 32-bit character string *
1872 *************************************************/
1873
1874 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
1875 If handed a NULL file, just counts chars without printing. */
1876
1877 static int pchars32(PCRE_SPTR32 p, int length, FILE *f)
1878 {
1879 int yield = 0;
1880
1881 if (length < 0)
1882 length = strlen32(p);
1883
1884 while (length-- > 0)
1885 {
1886 pcre_uint32 c = *p++;
1887 yield += pchar(c, f);
1888 }
1889
1890 return yield;
1891 }
1892 #endif /* SUPPORT_PCRE32 */
1893
1894
1895
1896 #ifdef SUPPORT_PCRE8
1897 /*************************************************
1898 * Read a capture name (8-bit) and check it *
1899 *************************************************/
1900
1901 static pcre_uint8 *
1902 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1903 {
1904 pcre_uint8 *npp = *pp;
1905 while (isalnum(*p)) *npp++ = *p++;
1906 *npp++ = 0;
1907 *npp = 0;
1908 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1909 {
1910 fprintf(outfile, "no parentheses with name \"");
1911 PCHARSV(*pp, 0, -1, outfile);
1912 fprintf(outfile, "\"\n");
1913 }
1914
1915 *pp = npp;
1916 return p;
1917 }
1918 #endif /* SUPPORT_PCRE8 */
1919
1920
1921
1922 #ifdef SUPPORT_PCRE16
1923 /*************************************************
1924 * Read a capture name (16-bit) and check it *
1925 *************************************************/
1926
1927 /* Note that the text being read is 8-bit. */
1928
1929 static pcre_uint8 *
1930 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1931 {
1932 pcre_uint16 *npp = *pp;
1933 while (isalnum(*p)) *npp++ = *p++;
1934 *npp++ = 0;
1935 *npp = 0;
1936 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1937 {
1938 fprintf(outfile, "no parentheses with name \"");
1939 PCHARSV(*pp, 0, -1, outfile);
1940 fprintf(outfile, "\"\n");
1941 }
1942 *pp = npp;
1943 return p;
1944 }
1945 #endif /* SUPPORT_PCRE16 */
1946
1947
1948
1949 #ifdef SUPPORT_PCRE32
1950 /*************************************************
1951 * Read a capture name (32-bit) and check it *
1952 *************************************************/
1953
1954 /* Note that the text being read is 8-bit. */
1955
1956 static pcre_uint8 *
1957 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
1958 {
1959 pcre_uint32 *npp = *pp;
1960 while (isalnum(*p)) *npp++ = *p++;
1961 *npp++ = 0;
1962 *npp = 0;
1963 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
1964 {
1965 fprintf(outfile, "no parentheses with name \"");
1966 PCHARSV(*pp, 0, -1, outfile);
1967 fprintf(outfile, "\"\n");
1968 }
1969 *pp = npp;
1970 return p;
1971 }
1972 #endif /* SUPPORT_PCRE32 */
1973
1974
1975
1976 /*************************************************
1977 * Callout function *
1978 *************************************************/
1979
1980 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1981 the match. Yield zero unless more callouts than the fail count, or the callout
1982 data is not zero. */
1983
1984 static int callout(pcre_callout_block *cb)
1985 {
1986 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1987 int i, pre_start, post_start, subject_length;
1988
1989 if (callout_extra)
1990 {
1991 fprintf(f, "Callout %d: last capture = %d\n",
1992 cb->callout_number, cb->capture_last);
1993
1994 for (i = 0; i < cb->capture_top * 2; i += 2)
1995 {
1996 if (cb->offset_vector[i] < 0)
1997 fprintf(f, "%2d: <unset>\n", i/2);
1998 else
1999 {
2000 fprintf(f, "%2d: ", i/2);
2001 PCHARSV(cb->subject, cb->offset_vector[i],
2002 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2003 fprintf(f, "\n");
2004 }
2005 }
2006 }
2007
2008 /* Re-print the subject in canonical form, the first time or if giving full
2009 datails. On subsequent calls in the same match, we use pchars just to find the
2010 printed lengths of the substrings. */
2011
2012 if (f != NULL) fprintf(f, "--->");
2013
2014 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2015 PCHARS(post_start, cb->subject, cb->start_match,
2016 cb->current_position - cb->start_match, f);
2017
2018 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2019
2020 PCHARSV(cb->subject, cb->current_position,
2021 cb->subject_length - cb->current_position, f);
2022
2023 if (f != NULL) fprintf(f, "\n");
2024
2025 /* Always print appropriate indicators, with callout number if not already
2026 shown. For automatic callouts, show the pattern offset. */
2027
2028 if (cb->callout_number == 255)
2029 {
2030 fprintf(outfile, "%+3d ", cb->pattern_position);
2031 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2032 }
2033 else
2034 {
2035 if (callout_extra) fprintf(outfile, " ");
2036 else fprintf(outfile, "%3d ", cb->callout_number);
2037 }
2038
2039 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2040 fprintf(outfile, "^");
2041
2042 if (post_start > 0)
2043 {
2044 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2045 fprintf(outfile, "^");
2046 }
2047
2048 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2049 fprintf(outfile, " ");
2050
2051 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2052 pbuffer + cb->pattern_position);
2053
2054 fprintf(outfile, "\n");
2055 first_callout = 0;
2056
2057 if (cb->mark != last_callout_mark)
2058 {
2059 if (cb->mark == NULL)
2060 fprintf(outfile, "Latest Mark: <unset>\n");
2061 else
2062 {
2063 fprintf(outfile, "Latest Mark: ");
2064 PCHARSV(cb->mark, 0, -1, outfile);
2065 putc('\n', outfile);
2066 }
2067 last_callout_mark = cb->mark;
2068 }
2069
2070 if (cb->callout_data != NULL)
2071 {
2072 int callout_data = *((int *)(cb->callout_data));
2073 if (callout_data != 0)
2074 {
2075 fprintf(outfile, "Callout data = %d\n", callout_data);
2076 return callout_data;
2077 }
2078 }
2079
2080 return (cb->callout_number != callout_fail_id)? 0 :
2081 (++callout_count >= callout_fail_count)? 1 : 0;
2082 }
2083
2084
2085 /*************************************************
2086 * Local malloc functions *
2087 *************************************************/
2088
2089 /* Alternative malloc function, to test functionality and save the size of a
2090 compiled re, which is the first store request that pcre_compile() makes. The
2091 show_malloc variable is set only during matching. */
2092
2093 static void *new_malloc(size_t size)
2094 {
2095 void *block = malloc(size);
2096 gotten_store = size;
2097 if (first_gotten_store == 0) first_gotten_store = size;
2098 if (show_malloc)
2099 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2100 return block;
2101 }
2102
2103 static void new_free(void *block)
2104 {
2105 if (show_malloc)
2106 fprintf(outfile, "free %p\n", block);
2107 free(block);
2108 }
2109
2110 /* For recursion malloc/free, to test stacking calls */
2111
2112 static void *stack_malloc(size_t size)
2113 {
2114 void *block = malloc(size);
2115 if (show_malloc)
2116 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2117 return block;
2118 }
2119
2120 static void stack_free(void *block)
2121 {
2122 if (show_malloc)
2123 fprintf(outfile, "stack_free %p\n", block);
2124 free(block);
2125 }
2126
2127
2128 /*************************************************
2129 * Call pcre_fullinfo() *
2130 *************************************************/
2131
2132 /* Get one piece of information from the pcre_fullinfo() function. When only
2133 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2134 value, but the code is defensive.
2135
2136 Arguments:
2137 re compiled regex
2138 study study data
2139 option PCRE_INFO_xxx option
2140 ptr where to put the data
2141
2142 Returns: 0 when OK, < 0 on error
2143 */
2144
2145 static int
2146 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2147 {
2148 int rc;
2149
2150 if (pcre_mode == PCRE32_MODE)
2151 #ifdef SUPPORT_PCRE32
2152 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2153 #else
2154 rc = PCRE_ERROR_BADMODE;
2155 #endif
2156 else if (pcre_mode == PCRE16_MODE)
2157 #ifdef SUPPORT_PCRE16
2158 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2159 #else
2160 rc = PCRE_ERROR_BADMODE;
2161 #endif
2162 else
2163 #ifdef SUPPORT_PCRE8
2164 rc = pcre_fullinfo(re, study, option, ptr);
2165 #else
2166 rc = PCRE_ERROR_BADMODE;
2167 #endif
2168
2169 if (rc < 0)
2170 {
2171 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2172 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2173 if (rc == PCRE_ERROR_BADMODE)
2174 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2175 "%d-bit mode\n", 8 * CHAR_SIZE,
2176 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2177 }
2178
2179 return rc;
2180 }
2181
2182
2183
2184 /*************************************************
2185 * Swap byte functions *
2186 *************************************************/
2187
2188 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2189 value, respectively.
2190
2191 Arguments:
2192 value any number
2193
2194 Returns: the byte swapped value
2195 */
2196
2197 static pcre_uint32
2198 swap_uint32(pcre_uint32 value)
2199 {
2200 return ((value & 0x000000ff) << 24) |
2201 ((value & 0x0000ff00) << 8) |
2202 ((value & 0x00ff0000) >> 8) |
2203 (value >> 24);
2204 }
2205
2206 static pcre_uint16
2207 swap_uint16(pcre_uint16 value)
2208 {
2209 return (value >> 8) | (value << 8);
2210 }
2211
2212
2213
2214 /*************************************************
2215 * Flip bytes in a compiled pattern *
2216 *************************************************/
2217
2218 /* This function is called if the 'F' option was present on a pattern that is
2219 to be written to a file. We flip the bytes of all the integer fields in the
2220 regex data block and the study block. In 16-bit mode this also flips relevant
2221 bytes in the pattern itself. This is to make it possible to test PCRE's
2222 ability to reload byte-flipped patterns, e.g. those compiled on a different
2223 architecture. */
2224
2225 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2226 static void
2227 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2228 {
2229 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2230 #ifdef SUPPORT_PCRE16
2231 int op;
2232 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2233 int length = re->name_count * re->name_entry_size;
2234 #ifdef SUPPORT_UTF
2235 BOOL utf = (re->options & PCRE_UTF16) != 0;
2236 BOOL utf16_char = FALSE;
2237 #endif /* SUPPORT_UTF */
2238 #endif /* SUPPORT_PCRE16 */
2239
2240 /* Always flip the bytes in the main data block and study blocks. */
2241
2242 re->magic_number = REVERSED_MAGIC_NUMBER;
2243 re->size = swap_uint32(re->size);
2244 re->options = swap_uint32(re->options);
2245 re->flags = swap_uint16(re->flags);
2246 re->top_bracket = swap_uint16(re->top_bracket);
2247 re->top_backref = swap_uint16(re->top_backref);
2248 re->first_char = swap_uint16(re->first_char);
2249 re->req_char = swap_uint16(re->req_char);
2250 re->name_table_offset = swap_uint16(re->name_table_offset);
2251 re->name_entry_size = swap_uint16(re->name_entry_size);
2252 re->name_count = swap_uint16(re->name_count);
2253
2254 if (extra != NULL)
2255 {
2256 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2257 rsd->size = swap_uint32(rsd->size);
2258 rsd->flags = swap_uint32(rsd->flags);
2259 rsd->minlength = swap_uint32(rsd->minlength);
2260 }
2261
2262 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2263 in the name table, if present, and then in the pattern itself. */
2264
2265 #ifdef SUPPORT_PCRE16
2266 if (pcre_mode != PCRE16_MODE) return;
2267
2268 while(TRUE)
2269 {
2270 /* Swap previous characters. */
2271 while (length-- > 0)
2272 {
2273 *ptr = swap_uint16(*ptr);
2274 ptr++;
2275 }
2276 #ifdef SUPPORT_UTF
2277 if (utf16_char)
2278 {
2279 if ((ptr[-1] & 0xfc00) == 0xd800)
2280 {
2281 /* We know that there is only one extra character in UTF-16. */
2282 *ptr = swap_uint16(*ptr);
2283 ptr++;
2284 }
2285 }
2286 utf16_char = FALSE;
2287 #endif /* SUPPORT_UTF */
2288
2289 /* Get next opcode. */
2290
2291 length = 0;
2292 op = *ptr;
2293 *ptr++ = swap_uint16(op);
2294
2295 switch (op)
2296 {
2297 case OP_END:
2298 return;
2299
2300 #ifdef SUPPORT_UTF
2301 case OP_CHAR:
2302 case OP_CHARI:
2303 case OP_NOT:
2304 case OP_NOTI:
2305 case OP_STAR:
2306 case OP_MINSTAR:
2307 case OP_PLUS:
2308 case OP_MINPLUS:
2309 case OP_QUERY:
2310 case OP_MINQUERY:
2311 case OP_UPTO:
2312 case OP_MINUPTO:
2313 case OP_EXACT:
2314 case OP_POSSTAR:
2315 case OP_POSPLUS:
2316 case OP_POSQUERY:
2317 case OP_POSUPTO:
2318 case OP_STARI:
2319 case OP_MINSTARI:
2320 case OP_PLUSI:
2321 case OP_MINPLUSI:
2322 case OP_QUERYI:
2323 case OP_MINQUERYI:
2324 case OP_UPTOI:
2325 case OP_MINUPTOI:
2326 case OP_EXACTI:
2327 case OP_POSSTARI:
2328 case OP_POSPLUSI:
2329 case OP_POSQUERYI:
2330 case OP_POSUPTOI:
2331 case OP_NOTSTAR:
2332 case OP_NOTMINSTAR:
2333 case OP_NOTPLUS:
2334 case OP_NOTMINPLUS:
2335 case OP_NOTQUERY:
2336 case OP_NOTMINQUERY:
2337 case OP_NOTUPTO:
2338 case OP_NOTMINUPTO:
2339 case OP_NOTEXACT:
2340 case OP_NOTPOSSTAR:
2341 case OP_NOTPOSPLUS:
2342 case OP_NOTPOSQUERY:
2343 case OP_NOTPOSUPTO:
2344 case OP_NOTSTARI:
2345 case OP_NOTMINSTARI:
2346 case OP_NOTPLUSI:
2347 case OP_NOTMINPLUSI:
2348 case OP_NOTQUERYI:
2349 case OP_NOTMINQUERYI:
2350 case OP_NOTUPTOI:
2351 case OP_NOTMINUPTOI:
2352 case OP_NOTEXACTI:
2353 case OP_NOTPOSSTARI:
2354 case OP_NOTPOSPLUSI:
2355 case OP_NOTPOSQUERYI:
2356 case OP_NOTPOSUPTOI:
2357 if (utf) utf16_char = TRUE;
2358 #endif
2359 /* Fall through. */
2360
2361 default:
2362 length = OP_lengths16[op] - 1;
2363 break;
2364
2365 case OP_CLASS:
2366 case OP_NCLASS:
2367 /* Skip the character bit map. */
2368 ptr += 32/sizeof(pcre_uint16);
2369 length = 0;
2370 break;
2371
2372 case OP_XCLASS:
2373 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2374 if (LINK_SIZE > 1)
2375 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2376 - (1 + LINK_SIZE + 1));
2377 else
2378 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2379
2380 /* Reverse the size of the XCLASS instance. */
2381 *ptr = swap_uint16(*ptr);
2382 ptr++;
2383 if (LINK_SIZE > 1)
2384 {
2385 *ptr = swap_uint16(*ptr);
2386 ptr++;
2387 }
2388
2389 op = *ptr;
2390 *ptr = swap_uint16(op);
2391 ptr++;
2392 if ((op & XCL_MAP) != 0)
2393 {
2394 /* Skip the character bit map. */
2395 ptr += 32/sizeof(pcre_uint16);
2396 length -= 32/sizeof(pcre_uint16);
2397 }
2398 break;
2399 }
2400 }
2401 /* Control should never reach here in 16 bit mode. */
2402 #endif /* SUPPORT_PCRE16 */
2403 }
2404 #endif /* SUPPORT_PCRE[8|16] */
2405
2406
2407
2408 #if defined SUPPORT_PCRE32
2409 static void
2410 regexflip_32(pcre *ere, pcre_extra *extra)
2411 {
2412 real_pcre32 *re = (real_pcre32 *)ere;
2413 int op;
2414 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2415 int length = re->name_count * re->name_entry_size;
2416 #ifdef SUPPORT_UTF
2417 BOOL utf = (re->options & PCRE_UTF32) != 0;
2418 #endif /* SUPPORT_UTF */
2419
2420 /* Always flip the bytes in the main data block and study blocks. */
2421
2422 re->magic_number = REVERSED_MAGIC_NUMBER;
2423 re->size = swap_uint32(re->size);
2424 re->options = swap_uint32(re->options);
2425 re->flags = swap_uint16(re->flags);
2426 re->top_bracket = swap_uint16(re->top_bracket);
2427 re->top_backref = swap_uint16(re->top_backref);
2428 re->first_char = swap_uint32(re->first_char);
2429 re->req_char = swap_uint32(re->req_char);
2430 re->name_table_offset = swap_uint16(re->name_table_offset);
2431 re->name_entry_size = swap_uint16(re->name_entry_size);
2432 re->name_count = swap_uint16(re->name_count);
2433
2434 if (extra != NULL)
2435 {
2436 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2437 rsd->size = swap_uint32(rsd->size);
2438 rsd->flags = swap_uint32(rsd->flags);
2439 rsd->minlength = swap_uint32(rsd->minlength);
2440 }
2441
2442 /* In 32-bit mode we must swap bytes
2443 in the name table, if present, and then in the pattern itself. */
2444
2445 while(TRUE)
2446 {
2447 /* Swap previous characters. */
2448 while (length-- > 0)
2449 {
2450 *ptr = swap_uint32(*ptr);
2451 ptr++;
2452 }
2453
2454 /* Get next opcode. */
2455
2456 length = 0;
2457 op = *ptr;
2458 *ptr++ = swap_uint32(op);
2459
2460 switch (op)
2461 {
2462 case OP_END:
2463 return;
2464
2465 default:
2466 length = OP_lengths32[op] - 1;
2467 break;
2468
2469 case OP_CLASS:
2470 case OP_NCLASS:
2471 /* Skip the character bit map. */
2472 ptr += 32/sizeof(pcre_uint32);
2473 length = 0;
2474 break;
2475
2476 case OP_XCLASS:
2477 /* LINK_SIZE can only be 1 in 32-bit mode. */
2478 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2479
2480 /* Reverse the size of the XCLASS instance. */
2481 *ptr = swap_uint32(*ptr);
2482 ptr++;
2483
2484 op = *ptr;
2485 *ptr = swap_uint32(op);
2486 ptr++;
2487 if ((op & XCL_MAP) != 0)
2488 {
2489 /* Skip the character bit map. */
2490 ptr += 32/sizeof(pcre_uint32);
2491 length -= 32/sizeof(pcre_uint32);
2492 }
2493 break;
2494 }
2495 }
2496 /* Control should never reach here in 32 bit mode. */
2497 }
2498
2499 #endif /* SUPPORT_PCRE32 */
2500
2501
2502
2503 static void
2504 regexflip(pcre *ere, pcre_extra *extra)
2505 {
2506 #if defined SUPPORT_PCRE32
2507 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2508 regexflip_32(ere, extra);
2509 #endif
2510 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2511 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2512 regexflip8_or_16(ere, extra);
2513 #endif
2514 }
2515
2516
2517
2518 /*************************************************
2519 * Check match or recursion limit *
2520 *************************************************/
2521
2522 static int
2523 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2524 int start_offset, int options, int *use_offsets, int use_size_offsets,
2525 int flag, unsigned long int *limit, int errnumber, const char *msg)
2526 {
2527 int count;
2528 int min = 0;
2529 int mid = 64;
2530 int max = -1;
2531
2532 extra->flags |= flag;
2533
2534 for (;;)
2535 {
2536 *limit = mid;
2537
2538 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2539 use_offsets, use_size_offsets);
2540
2541 if (count == errnumber)
2542 {
2543 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2544 min = mid;
2545 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2546 }
2547
2548 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2549 count == PCRE_ERROR_PARTIAL)
2550 {
2551 if (mid == min + 1)
2552 {
2553 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2554 break;
2555 }
2556 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2557 max = mid;
2558 mid = (min + mid)/2;
2559 }
2560 else break; /* Some other error */
2561 }
2562
2563 extra->flags &= ~flag;
2564 return count;
2565 }
2566
2567
2568
2569 /*************************************************
2570 * Case-independent strncmp() function *
2571 *************************************************/
2572
2573 /*
2574 Arguments:
2575 s first string
2576 t second string
2577 n number of characters to compare
2578
2579 Returns: < 0, = 0, or > 0, according to the comparison
2580 */
2581
2582 static int
2583 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2584 {
2585 while (n--)
2586 {
2587 int c = tolower(*s++) - tolower(*t++);
2588 if (c) return c;
2589 }
2590 return 0;
2591 }
2592
2593
2594
2595 /*************************************************
2596 * Check newline indicator *
2597 *************************************************/
2598
2599 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2600 a message and return 0 if there is no match.
2601
2602 Arguments:
2603 p points after the leading '<'
2604 f file for error message
2605
2606 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2607 */
2608
2609 static int
2610 check_newline(pcre_uint8 *p, FILE *f)
2611 {
2612 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2613 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2614 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2615 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2616 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2617 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2618 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2619 fprintf(f, "Unknown newline type at: <%s\n", p);
2620 return 0;
2621 }
2622
2623
2624
2625 /*************************************************
2626 * Usage function *
2627 *************************************************/
2628
2629 static void
2630 usage(void)
2631 {
2632 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2633 printf("Input and output default to stdin and stdout.\n");
2634 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2635 printf("If input is a terminal, readline() is used to read from it.\n");
2636 #else
2637 printf("This version of pcretest is not linked with readline().\n");
2638 #endif
2639 printf("\nOptions:\n");
2640 #ifdef SUPPORT_PCRE16
2641 printf(" -16 use the 16-bit library\n");
2642 #endif
2643 #ifdef SUPPORT_PCRE32
2644 printf(" -32 use the 32-bit library\n");
2645 #endif
2646 printf(" -b show compiled code\n");
2647 printf(" -C show PCRE compile-time options and exit\n");
2648 printf(" -C arg show a specific compile-time option\n");
2649 printf(" and exit with its value. The arg can be:\n");
2650 printf(" linksize internal link size [2, 3, 4]\n");
2651 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2652 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2653 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2654 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2655 printf(" ucp Unicode Properties supported [0, 1]\n");
2656 printf(" jit Just-in-time compiler supported [0, 1]\n");
2657 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2658 printf(" -d debug: show compiled code and information (-b and -i)\n");
2659 #if !defined NODFA
2660 printf(" -dfa force DFA matching for all subjects\n");
2661 #endif
2662 printf(" -help show usage information\n");
2663 printf(" -i show information about compiled patterns\n"
2664 " -M find MATCH_LIMIT minimum for each subject\n"
2665 " -m output memory used information\n"
2666 " -o <n> set size of offsets vector to <n>\n");
2667 #if !defined NOPOSIX
2668 printf(" -p use POSIX interface\n");
2669 #endif
2670 printf(" -q quiet: do not output PCRE version number at start\n");
2671 printf(" -S <n> set stack size to <n> megabytes\n");
2672 printf(" -s force each pattern to be studied at basic level\n"
2673 " -s+ force each pattern to be studied, using JIT if available\n"
2674 " -s++ ditto, verifying when JIT was actually used\n"
2675 " -s+n force each pattern to be studied, using JIT if available,\n"
2676 " where 1 <= n <= 7 selects JIT options\n"
2677 " -s++n ditto, verifying when JIT was actually used\n"
2678 " -t time compilation and execution\n");
2679 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2680 printf(" -tm time execution (matching) only\n");
2681 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2682 }
2683
2684
2685
2686 /*************************************************
2687 * Main Program *
2688 *************************************************/
2689
2690 /* Read lines from named file or stdin and write to named file or stdout; lines
2691 consist of a regular expression, in delimiters and optionally followed by
2692 options, followed by a set of test data, terminated by an empty line. */
2693
2694 int main(int argc, char **argv)
2695 {
2696 FILE *infile = stdin;
2697 const char *version;
2698 int options = 0;
2699 int study_options = 0;
2700 int default_find_match_limit = FALSE;
2701 int op = 1;
2702 int timeit = 0;
2703 int timeitm = 0;
2704 int showinfo = 0;
2705 int showstore = 0;
2706 int force_study = -1;
2707 int force_study_options = 0;
2708 int quiet = 0;
2709 int size_offsets = 45;
2710 int size_offsets_max;
2711 int *offsets = NULL;
2712 int debug = 0;
2713 int done = 0;
2714 int all_use_dfa = 0;
2715 int verify_jit = 0;
2716 int yield = 0;
2717 int stack_size;
2718 pcre_uint8 *dbuffer = NULL;
2719 size_t dbuffer_size = 1u << 14;
2720
2721 #if !defined NOPOSIX
2722 int posix = 0;
2723 #endif
2724 #if !defined NODFA
2725 int *dfa_workspace = NULL;
2726 #endif
2727
2728 pcre_jit_stack *jit_stack = NULL;
2729
2730 /* These vectors store, end-to-end, a list of zero-terminated captured
2731 substring names, each list itself being terminated by an empty name. Assume
2732 that 1024 is plenty long enough for the few names we'll be testing. It is
2733 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2734 for the actual memory, to ensure alignment. */
2735
2736 pcre_uint32 copynames[1024];
2737 pcre_uint32 getnames[1024];
2738
2739 #ifdef SUPPORT_PCRE32
2740 pcre_uint32 *cn32ptr;
2741 pcre_uint32 *gn32ptr;
2742 #endif
2743
2744 #ifdef SUPPORT_PCRE16
2745 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2746 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2747 pcre_uint16 *cn16ptr;
2748 pcre_uint16 *gn16ptr;
2749 #endif
2750
2751 #ifdef SUPPORT_PCRE8
2752 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2753 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2754 pcre_uint8 *cn8ptr;
2755 pcre_uint8 *gn8ptr;
2756 #endif
2757
2758 /* Get buffers from malloc() so that valgrind will check their misuse when
2759 debugging. They grow automatically when very long lines are read. The 16-
2760 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2761
2762 buffer = (pcre_uint8 *)malloc(buffer_size);
2763 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2764
2765 /* The outfile variable is static so that new_malloc can use it. */
2766
2767 outfile = stdout;
2768
2769 /* The following _setmode() stuff is some Windows magic that tells its runtime
2770 library to translate CRLF into a single LF character. At least, that's what
2771 I've been told: never having used Windows I take this all on trust. Originally
2772 it set 0x8000, but then I was advised that _O_BINARY was better. */
2773
2774 #if defined(_WIN32) || defined(WIN32)
2775 _setmode( _fileno( stdout ), _O_BINARY );
2776 #endif
2777
2778 /* Get the version number: both pcre_version() and pcre16_version() give the
2779 same answer. We just need to ensure that we call one that is available. */
2780
2781 #if defined SUPPORT_PCRE8
2782 version = pcre_version();
2783 #elif defined SUPPORT_PCRE16
2784 version = pcre16_version();
2785 #elif defined SUPPORT_PCRE32
2786 version = pcre32_version();
2787 #endif
2788
2789 /* Scan options */
2790
2791 while (argc > 1 && argv[op][0] == '-')
2792 {
2793 pcre_uint8 *endptr;
2794 char *arg = argv[op];
2795
2796 if (strcmp(arg, "-m") == 0) showstore = 1;
2797 else if (strcmp(arg, "-s") == 0) force_study = 0;
2798
2799 else if (strncmp(arg, "-s+", 3) == 0)
2800 {
2801 arg += 3;
2802 if (*arg == '+') { arg++; verify_jit = TRUE; }
2803 force_study = 1;
2804 if (*arg == 0)
2805 force_study_options = jit_study_bits[6];
2806 else if (*arg >= '1' && *arg <= '7')
2807 force_study_options = jit_study_bits[*arg - '1'];
2808 else goto BAD_ARG;
2809 }
2810 else if (strcmp(arg, "-8") == 0)
2811 {
2812 #ifdef SUPPORT_PCRE8
2813 pcre_mode = PCRE8_MODE;
2814 #else
2815 printf("** This version of PCRE was built without 8-bit support\n");
2816 exit(1);
2817 #endif
2818 }
2819 else if (strcmp(arg, "-16") == 0)
2820 {
2821 #ifdef SUPPORT_PCRE16
2822 pcre_mode = PCRE16_MODE;
2823 #else
2824 printf("** This version of PCRE was built without 16-bit support\n");
2825 exit(1);
2826 #endif
2827 }
2828 else if (strcmp(arg, "-32") == 0)
2829 {
2830 #ifdef SUPPORT_PCRE32
2831 pcre_mode = PCRE32_MODE;
2832 #else
2833 printf("** This version of PCRE was built without 32-bit support\n");
2834 exit(1);
2835 #endif
2836 }
2837 else if (strcmp(arg, "-q") == 0) quiet = 1;
2838 else if (strcmp(arg, "-b") == 0) debug = 1;
2839 else if (strcmp(arg, "-i") == 0) showinfo = 1;
2840 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2841 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2842 #if !defined NODFA
2843 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2844 #endif
2845 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2846 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2847 *endptr == 0))
2848 {
2849 op++;
2850 argc--;
2851 }
2852 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2853 {
2854 int both = arg[2] == 0;
2855 int temp;
2856 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2857 *endptr == 0))
2858 {
2859 timeitm = temp;
2860 op++;
2861 argc--;
2862 }
2863 else timeitm = LOOPREPEAT;
2864 if (both) timeit = timeitm;
2865 }
2866 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2867 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2868 *endptr == 0))
2869 {
2870 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2871 printf("PCRE: -S not supported on this OS\n");
2872 exit(1);
2873 #else
2874 int rc;
2875 struct rlimit rlim;
2876 getrlimit(RLIMIT_STACK, &rlim);
2877 rlim.rlim_cur = stack_size * 1024 * 1024;
2878 rc = setrlimit(RLIMIT_STACK, &rlim);
2879 if (rc != 0)
2880 {
2881 printf("PCRE: setrlimit() failed with error %d\n", rc);
2882 exit(1);
2883 }
2884 op++;
2885 argc--;
2886 #endif
2887 }
2888 #if !defined NOPOSIX
2889 else if (strcmp(arg, "-p") == 0) posix = 1;
2890 #endif
2891 else if (strcmp(arg, "-C") == 0)
2892 {
2893 int rc;
2894 unsigned long int lrc;
2895
2896 if (argc > 2)
2897 {
2898 if (strcmp(argv[op + 1], "linksize") == 0)
2899 {
2900 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2901 printf("%d\n", rc);
2902 yield = rc;
2903 }
2904 else if (strcmp(argv[op + 1], "pcre8") == 0)
2905 {
2906 #ifdef SUPPORT_PCRE8
2907 printf("1\n");
2908 yield = 1;
2909 #else
2910 printf("0\n");
2911 yield = 0;
2912 #endif
2913 }
2914 else if (strcmp(argv[op + 1], "pcre16") == 0)
2915 {
2916 #ifdef SUPPORT_PCRE16
2917 printf("1\n");
2918 yield = 1;
2919 #else
2920 printf("0\n");
2921 yield = 0;
2922 #endif
2923 }
2924 else if (strcmp(argv[op + 1], "pcre32") == 0)
2925 {
2926 #ifdef SUPPORT_PCRE32
2927 printf("1\n");
2928 yield = 1;
2929 #else
2930 printf("0\n");
2931 yield = 0;
2932 #endif
2933 goto EXIT;
2934 }
2935 if (strcmp(argv[op + 1], "utf") == 0)
2936 {
2937 #ifdef SUPPORT_PCRE8
2938 if (pcre_mode == PCRE8_MODE)
2939 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2940 #endif
2941 #ifdef SUPPORT_PCRE16
2942 if (pcre_mode == PCRE16_MODE)
2943 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2944 #endif
2945 #ifdef SUPPORT_PCRE32
2946 if (pcre_mode == PCRE32_MODE)
2947 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
2948 #endif
2949 printf("%d\n", rc);
2950 yield = rc;
2951 goto EXIT;
2952 }
2953 else if (strcmp(argv[op + 1], "ucp") == 0)
2954 {
2955 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2956 printf("%d\n", rc);
2957 yield = rc;
2958 }
2959 else if (strcmp(argv[op + 1], "jit") == 0)
2960 {
2961 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2962 printf("%d\n", rc);
2963 yield = rc;
2964 }
2965 else if (strcmp(argv[op + 1], "newline") == 0)
2966 {
2967 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2968 print_newline_config(rc, TRUE);
2969 }
2970 else if (strcmp(argv[op + 1], "ebcdic") == 0)
2971 {
2972 #ifdef EBCDIC
2973 printf("1\n");
2974 yield = 1;
2975 #else
2976 printf("0\n");
2977 #endif
2978 }
2979 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2980 {
2981 #ifdef EBCDIC
2982 printf("0x%02x\n", CHAR_LF);
2983 #else
2984 printf("0\n");
2985 #endif
2986 }
2987 else
2988 {
2989 printf("Unknown -C option: %s\n", argv[op + 1]);
2990 }
2991 goto EXIT;
2992 }
2993
2994 /* No argument for -C: output all configuration information. */
2995
2996 printf("PCRE version %s\n", version);
2997 printf("Compiled with\n");
2998
2999 #ifdef EBCDIC
3000 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3001 #endif
3002
3003 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3004 are set, either both UTFs are supported or both are not supported. */
3005
3006 #ifdef SUPPORT_PCRE8
3007 printf(" 8-bit support\n");
3008 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3009 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3010 #endif
3011 #ifdef SUPPORT_PCRE16
3012 printf(" 16-bit support\n");
3013 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3014 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3015 #endif
3016 #ifdef SUPPORT_PCRE32
3017 printf(" 32-bit support\n");
3018 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3019 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3020 #endif
3021
3022 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3023 printf(" %sUnicode properties support\n", rc? "" : "No ");
3024 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3025 if (rc)
3026 {
3027 const char *arch;
3028 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3029 printf(" Just-in-time compiler support: %s\n", arch);
3030 }
3031 else
3032 printf(" No just-in-time compiler support\n");
3033 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3034 print_newline_config(rc, FALSE);
3035 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3036 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3037 "all Unicode newlines");
3038 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3039 printf(" Internal link size = %d\n", rc);
3040 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3041 printf(" POSIX malloc threshold = %d\n", rc);
3042 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3043 printf(" Default match limit = %ld\n", lrc);
3044 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3045 printf(" Default recursion depth limit = %ld\n", lrc);
3046 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3047 printf(" Match recursion uses %s", rc? "stack" : "heap");
3048 if (showstore)
3049 {
3050 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3051 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3052 }
3053 printf("\n");
3054 goto EXIT;
3055 }
3056 else if (strcmp(arg, "-help") == 0 ||
3057 strcmp(arg, "--help") == 0)
3058 {
3059 usage();
3060 goto EXIT;
3061 }
3062 else
3063 {
3064 BAD_ARG:
3065 printf("** Unknown or malformed option %s\n", arg);
3066 usage();
3067 yield = 1;
3068 goto EXIT;
3069 }
3070 op++;
3071 argc--;
3072 }
3073
3074 /* Get the store for the offsets vector, and remember what it was */
3075
3076 size_offsets_max = size_offsets;
3077 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3078 if (offsets == NULL)
3079 {
3080 printf("** Failed to get %d bytes of memory for offsets vector\n",
3081 (int)(size_offsets_max * sizeof(int)));
3082 yield = 1;
3083 goto EXIT;
3084 }
3085
3086 /* Sort out the input and output files */
3087
3088 if (argc > 1)
3089 {
3090 infile = fopen(argv[op], INPUT_MODE);
3091 if (infile == NULL)
3092 {
3093 printf("** Failed to open %s\n", argv[op]);
3094 yield = 1;
3095 goto EXIT;
3096 }
3097 }
3098
3099 if (argc > 2)
3100 {
3101 outfile = fopen(argv[op+1], OUTPUT_MODE);
3102 if (outfile == NULL)
3103 {
3104 printf("** Failed to open %s\n", argv[op+1]);
3105 yield = 1;
3106 goto EXIT;
3107 }
3108 }
3109
3110 /* Set alternative malloc function */
3111
3112 #ifdef SUPPORT_PCRE8
3113 pcre_malloc = new_malloc;
3114 pcre_free = new_free;
3115 pcre_stack_malloc = stack_malloc;
3116 pcre_stack_free = stack_free;
3117 #endif
3118
3119 #ifdef SUPPORT_PCRE16
3120 pcre16_malloc = new_malloc;
3121 pcre16_free = new_free;
3122 pcre16_stack_malloc = stack_malloc;
3123 pcre16_stack_free = stack_free;
3124 #endif
3125
3126 #ifdef SUPPORT_PCRE32
3127 pcre32_malloc = new_malloc;
3128 pcre32_free = new_free;
3129 pcre32_stack_malloc = stack_malloc;
3130 pcre32_stack_free = stack_free;
3131 #endif
3132
3133 /* Heading line unless quiet, then prompt for first regex if stdin */
3134
3135 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3136
3137 /* Main loop */
3138
3139 while (!done)
3140 {
3141 pcre *re = NULL;
3142 pcre_extra *extra = NULL;
3143
3144 #if !defined NOPOSIX /* There are still compilers that require no indent */
3145 regex_t preg;
3146 int do_posix = 0;
3147 #endif
3148
3149 const char *error;
3150 pcre_uint8 *markptr;
3151 pcre_uint8 *p, *pp, *ppp;
3152 pcre_uint8 *to_file = NULL;
3153 const pcre_uint8 *tables = NULL;
3154 unsigned long int get_options;
3155 unsigned long int true_size, true_study_size = 0;
3156 size_t size, regex_gotten_store;
3157 int do_allcaps = 0;
3158 int do_mark = 0;
3159 int do_study = 0;
3160 int no_force_study = 0;
3161 int do_debug = debug;
3162 int do_G = 0;
3163 int do_g = 0;
3164 int do_showinfo = showinfo;
3165 int do_showrest = 0;
3166 int do_showcaprest = 0;
3167 int do_flip = 0;
3168 int erroroffset, len, delimiter, poffset;
3169
3170 #if !defined NODFA
3171 int dfa_matched = 0;
3172 #endif
3173
3174 use_utf = 0;
3175 debug_lengths = 1;
3176
3177 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3178 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3179 fflush(outfile);
3180
3181 p = buffer;
3182 while (isspace(*p)) p++;
3183 if (*p == 0) continue;
3184
3185 /* See if the pattern is to be loaded pre-compiled from a file. */
3186
3187 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3188 {
3189 pcre_uint32 magic;
3190 pcre_uint8 sbuf[8];
3191 FILE *f;
3192
3193 p++;
3194 if (*p == '!')
3195 {
3196 do_debug = TRUE;
3197 do_showinfo = TRUE;
3198 p++;
3199 }
3200
3201 pp = p + (int)strlen((char *)p);
3202 while (isspace(pp[-1])) pp--;
3203 *pp = 0;
3204
3205 f = fopen((char *)p, "rb");
3206 if (f == NULL)
3207 {
3208 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3209 continue;
3210 }
3211
3212 first_gotten_store = 0;
3213 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3214
3215 true_size =
3216 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3217 true_study_size =
3218 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3219
3220 re = (pcre *)new_malloc(true_size);
3221 if (re == NULL)
3222 {
3223 printf("** Failed to get %d bytes of memory for pcre object\n",
3224 (int)true_size);
3225 yield = 1;
3226 goto EXIT;
3227 }
3228 regex_gotten_store = first_gotten_store;
3229
3230 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3231
3232 magic = REAL_PCRE_MAGIC(re);
3233 if (magic != MAGIC_NUMBER)
3234 {
3235 if (swap_uint32(magic) == MAGIC_NUMBER)
3236 {
3237 do_flip = 1;
3238 }
3239 else
3240 {
3241 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3242 new_free(re);
3243 fclose(f);
3244 continue;
3245 }
3246 }
3247
3248 /* We hide the byte-invert info for little and big endian tests. */
3249 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3250 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3251
3252 /* Now see if there is any following study data. */
3253
3254 if (true_study_size != 0)
3255 {
3256 pcre_study_data *psd;
3257
3258 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3259 extra->flags = PCRE_EXTRA_STUDY_DATA;
3260
3261 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3262 extra->study_data = psd;
3263
3264 if (fread(psd, 1, true_study_size, f) != true_study_size)
3265 {
3266 FAIL_READ:
3267 fprintf(outfile, "Failed to read data from %s\n", p);
3268 if (extra != NULL)
3269 {
3270 PCRE_FREE_STUDY(extra);
3271 }
3272 new_free(re);
3273 fclose(f);
3274 continue;
3275 }
3276 fprintf(outfile, "Study data loaded from %s\n", p);
3277 do_study = 1; /* To get the data output if requested */
3278 }
3279 else fprintf(outfile, "No study data\n");
3280
3281 /* Flip the necessary bytes. */
3282 if (do_flip)
3283 {
3284 int rc;
3285 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3286 if (rc == PCRE_ERROR_BADMODE)
3287 {
3288 /* Simulate the result of the function call below. */
3289 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3290 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3291 PCRE_INFO_OPTIONS);
3292 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3293 "%d-bit mode\n", 8 * CHAR_SIZE,
3294 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3295 new_free(re);
3296 fclose(f);
3297 continue;
3298 }
3299 }
3300
3301 /* Need to know if UTF-8 for printing data strings. */
3302
3303 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3304 {
3305 new_free(re);
3306 fclose(f);
3307 continue;
3308 }
3309 use_utf = (get_options & PCRE_UTF8) != 0;
3310
3311 fclose(f);
3312 goto SHOW_INFO;
3313 }
3314
3315 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3316 the pattern; if it isn't complete, read more. */
3317
3318 delimiter = *p++;
3319
3320 if (isalnum(delimiter) || delimiter == '\\')
3321 {
3322 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3323 goto SKIP_DATA;
3324 }
3325
3326 pp = p;
3327 poffset = (int)(p - buffer);
3328
3329 for(;;)
3330 {
3331 while (*pp != 0)
3332 {
3333 if (*pp == '\\' && pp[1] != 0) pp++;
3334 else if (*pp == delimiter) break;
3335 pp++;
3336 }
3337 if (*pp != 0) break;
3338 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3339 {
3340 fprintf(outfile, "** Unexpected EOF\n");
3341 done = 1;
3342 goto CONTINUE;
3343 }
3344 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3345 }
3346
3347 /* The buffer may have moved while being extended; reset the start of data
3348 pointer to the correct relative point in the buffer. */
3349
3350 p = buffer + poffset;
3351
3352 /* If the first character after the delimiter is backslash, make
3353 the pattern end with backslash. This is purely to provide a way
3354 of testing for the error message when a pattern ends with backslash. */
3355
3356 if (pp[1] == '\\') *pp++ = '\\';
3357
3358 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3359 for callouts. */
3360
3361 *pp++ = 0;
3362 strcpy((char *)pbuffer, (char *)p);
3363
3364 /* Look for options after final delimiter */
3365
3366 options = 0;
3367 study_options = force_study_options;
3368 log_store = showstore; /* default from command line */
3369
3370 while (*pp != 0)
3371 {
3372 switch (*pp++)
3373 {
3374 case 'f': options |= PCRE_FIRSTLINE; break;
3375 case 'g': do_g = 1; break;
3376 case 'i': options |= PCRE_CASELESS; break;
3377 case 'm': options |= PCRE_MULTILINE; break;
3378 case 's': options |= PCRE_DOTALL; break;
3379 case 'x': options |= PCRE_EXTENDED; break;
3380
3381 case '+':
3382 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3383 break;
3384
3385 case '=': do_allcaps = 1; break;
3386 case 'A': options |= PCRE_ANCHORED; break;
3387 case 'B': do_debug = 1; break;
3388 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3389 case 'D': do_debug = do_showinfo = 1; break;
3390 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3391 case 'F': do_flip = 1; break;
3392 case 'G': do_G = 1; break;
3393 case 'I': do_showinfo = 1; break;
3394 case 'J': options |= PCRE_DUPNAMES; break;
3395 case 'K': do_mark = 1; break;
3396 case 'M': log_store = 1; break;
3397 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3398
3399 #if !defined NOPOSIX
3400 case 'P': do_posix = 1; break;
3401 #endif
3402
3403 case 'S':
3404 do_study = 1;
3405 for (;;)
3406 {
3407 switch (*pp++)
3408 {
3409 case 'S':
3410 do_study = 0;
3411 no_force_study = 1;
3412 break;
3413
3414 case '!':
3415 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3416 break;
3417
3418 case '+':
3419 if (*pp == '+')
3420 {
3421 verify_jit = TRUE;
3422 pp++;
3423 }
3424 if (*pp >= '1' && *pp <= '7')
3425 study_options |= jit_study_bits[*pp++ - '1'];
3426 else
3427 study_options |= jit_study_bits[6];
3428 break;
3429
3430 case '-':
3431 study_options &= ~PCRE_STUDY_ALLJIT;
3432 break;
3433
3434 default:
3435 pp--;
3436 goto ENDLOOP;
3437 }
3438 }
3439 ENDLOOP:
3440 break;
3441
3442 case 'U': options |= PCRE_UNGREEDY; break;
3443 case 'W': options |= PCRE_UCP; break;
3444 case 'X': options |= PCRE_EXTRA; break;
3445 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3446 case 'Z': debug_lengths = 0; break;
3447 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3448 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3449
3450 case 'T':
3451 switch (*pp++)
3452 {
3453 case '0': tables = tables0; break;
3454 case '1': tables = tables1; break;
3455
3456 case '\r':
3457 case '\n':
3458 case ' ':
3459 case 0:
3460 fprintf(outfile, "** Missing table number after /T\n");
3461 goto SKIP_DATA;
3462
3463 default:
3464 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3465 goto SKIP_DATA;
3466 }
3467 break;
3468
3469 case 'L':
3470 ppp = pp;
3471 /* The '\r' test here is so that it works on Windows. */
3472 /* The '0' test is just in case this is an unterminated line. */
3473 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3474 *ppp = 0;
3475 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3476 {
3477 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3478 goto SKIP_DATA;
3479 }
3480 locale_set = 1;
3481 tables = PCRE_MAKETABLES;
3482 pp = ppp;
3483 break;
3484
3485 case '>':
3486 to_file = pp;
3487 while (*pp != 0) pp++;
3488 while (isspace(pp[-1])) pp--;
3489 *pp = 0;
3490 break;
3491
3492 case '<':
3493 {
3494 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3495 {
3496 options |= PCRE_JAVASCRIPT_COMPAT;
3497 pp += 3;
3498 }
3499 else
3500 {
3501 int x = check_newline(pp, outfile);
3502 if (x == 0) goto SKIP_DATA;
3503 options |= x;
3504 while (*pp++ != '>');
3505 }
3506 }
3507 break;
3508
3509 case '\r': /* So that it works in Windows */
3510 case '\n':
3511 case ' ':
3512 break;
3513
3514 default:
3515 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3516 goto SKIP_DATA;
3517 }
3518 }
3519
3520 /* Handle compiling via the POSIX interface, which doesn't support the
3521 timing, showing, or debugging options, nor the ability to pass over
3522 local character tables. Neither does it have 16-bit support. */
3523
3524 #if !defined NOPOSIX
3525 if (posix || do_posix)
3526 {
3527 int rc;
3528 int cflags = 0;
3529
3530 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3531 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3532 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3533 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3534 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3535 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3536 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3537
3538 first_gotten_store = 0;
3539 rc = regcomp(&preg, (char *)p, cflags);
3540
3541 /* Compilation failed; go back for another re, skipping to blank line
3542 if non-interactive. */
3543
3544 if (rc != 0)
3545 {
3546 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3547 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3548 goto SKIP_DATA;
3549 }
3550 }
3551
3552 /* Handle compiling via the native interface */
3553
3554 else
3555 #endif /* !defined NOPOSIX */
3556
3557 {
3558 /* In 16- or 32-bit mode, convert the input. */
3559
3560 #ifdef SUPPORT_PCRE16
3561 if (pcre_mode == PCRE16_MODE)
3562 {
3563 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3564 {
3565 case -1:
3566 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3567 "converted to UTF-16\n");
3568 goto SKIP_DATA;
3569
3570 case -2:
3571 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3572 "cannot be converted to UTF-16\n");
3573 goto SKIP_DATA;
3574
3575 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3576 fprintf(outfile, "**Failed: character value greater than 0xffff "
3577 "cannot be converted to 16-bit in non-UTF mode\n");
3578 goto SKIP_DATA;
3579
3580 default:
3581 break;
3582 }
3583 p = (pcre_uint8 *)buffer16;
3584 }
3585 #endif
3586
3587 #ifdef SUPPORT_PCRE32
3588 if (pcre_mode == PCRE32_MODE)
3589 {
3590 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3591 {
3592 case -1:
3593 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3594 "converted to UTF-32\n");
3595 goto SKIP_DATA;
3596
3597 case -2:
3598 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3599 "cannot be converted to UTF-32\n");
3600 goto SKIP_DATA;
3601
3602 case -3:
3603 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3604 goto SKIP_DATA;
3605
3606 default:
3607 break;
3608 }
3609 p = (pcre_uint8 *)buffer32;
3610 }
3611 #endif
3612
3613 /* Compile many times when timing */
3614
3615 if (timeit > 0)
3616 {
3617 register int i;
3618 clock_t time_taken;
3619 clock_t start_time = clock();
3620 for (i = 0; i < timeit; i++)
3621 {
3622 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3623 if (re != NULL) free(re);
3624 }
3625 time_taken = clock() - start_time;
3626 fprintf(outfile, "Compile time %.4f milliseconds\n",
3627 (((double)time_taken * 1000.0) / (double)timeit) /
3628 (double)CLOCKS_PER_SEC);
3629 }
3630
3631 first_gotten_store = 0;
3632 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3633
3634 /* Compilation failed; go back for another re, skipping to blank line
3635 if non-interactive. */
3636
3637 if (re == NULL)
3638 {
3639 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3640 SKIP_DATA:
3641 if (infile != stdin)
3642 {
3643 for (;;)
3644 {
3645 if (extend_inputline(infile, buffer, NULL) == NULL)
3646 {
3647 done = 1;
3648 goto CONTINUE;
3649 }
3650 len = (int)strlen((char *)buffer);
3651 while (len > 0 && isspace(buffer[len-1])) len--;
3652 if (len == 0) break;
3653 }
3654 fprintf(outfile, "\n");
3655 }
3656 goto CONTINUE;
3657 }
3658
3659 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3660 within the regex; check for this so that we know how to process the data
3661 lines. */
3662
3663 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3664 goto SKIP_DATA;
3665 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3666
3667 /* Extract the size for possible writing before possibly flipping it,
3668 and remember the store that was got. */
3669
3670 true_size = REAL_PCRE_SIZE(re);
3671 regex_gotten_store = first_gotten_store;
3672
3673 /* Output code size information if requested */
3674
3675 if (log_store)
3676 {
3677 int name_count, name_entry_size, real_pcre_size;
3678
3679 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3680 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3681 #ifdef SUPPORT_PCRE8
3682 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3683 real_pcre_size = sizeof(real_pcre);
3684 #endif
3685 #ifdef SUPPORT_PCRE16
3686 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3687 real_pcre_size = sizeof(real_pcre16);
3688 #endif
3689 #ifdef SUPPORT_PCRE32
3690 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3691 real_pcre_size = sizeof(real_pcre32);
3692 #endif
3693 fprintf(outfile, "Memory allocation (code space): %d\n",
3694 (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3695 }
3696
3697 /* If -s or /S was present, study the regex to generate additional info to
3698 help with the matching, unless the pattern has the SS option, which
3699 suppresses the effect of /S (used for a few test patterns where studying is
3700 never sensible). */
3701
3702 if (do_study || (force_study >= 0 && !no_force_study))
3703 {
3704 if (timeit > 0)
3705 {
3706 register int i;
3707 clock_t time_taken;
3708 clock_t start_time = clock();
3709 for (i = 0; i < timeit; i++)
3710 {
3711 PCRE_STUDY(extra, re, study_options, &error);
3712 }
3713 time_taken = clock() - start_time;
3714 if (extra != NULL)
3715 {
3716 PCRE_FREE_STUDY(extra);
3717 }
3718 fprintf(outfile, " Study time %.4f milliseconds\n",
3719 (((double)time_taken * 1000.0) / (double)timeit) /
3720 (double)CLOCKS_PER_SEC);
3721 }
3722 PCRE_STUDY(extra, re, study_options, &error);
3723 if (error != NULL)
3724 fprintf(outfile, "Failed to study: %s\n", error);
3725 else if (extra != NULL)
3726 {
3727 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3728 if (log_store)
3729 {
3730 size_t jitsize;
3731 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3732 jitsize != 0)
3733 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3734 }
3735 }
3736 }
3737
3738 /* If /K was present, we set up for handling MARK data. */
3739
3740 if (do_mark)
3741 {
3742 if (extra == NULL)
3743 {
3744 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3745 extra->flags = 0;
3746 }
3747 extra->mark = &markptr;
3748 extra->flags |= PCRE_EXTRA_MARK;
3749 }
3750
3751 /* Extract and display information from the compiled data if required. */
3752
3753 SHOW_INFO:
3754
3755 if (do_debug)
3756 {
3757 fprintf(outfile, "------------------------------------------------------------------\n");
3758 PCRE_PRINTINT(re, outfile, debug_lengths);
3759 }
3760
3761 /* We already have the options in get_options (see above) */
3762
3763 if (do_showinfo)
3764 {
3765 unsigned long int all_options;
3766 pcre_uint32 first_char, need_char;
3767 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
3768 hascrorlf, maxlookbehind;
3769 int nameentrysize, namecount;
3770 const pcre_uint8 *nametable;
3771
3772 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3773 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3774 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3775 new_info(re, NULL, PCRE_INFO_FIRSTLITERAL, &first_char) +
3776 new_info(re, NULL, PCRE_INFO_FIRSTLITERALSET, &first_char_set) +
3777 new_info(re, NULL, PCRE_INFO_LASTLITERAL2, &need_char) +
3778 new_info(re, NULL, PCRE_INFO_LASTLITERAL2SET, &need_char_set) +
3779 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3780 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3781 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3782 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3783 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3784 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3785 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3786 != 0)
3787 goto SKIP_DATA;
3788
3789 if (size != regex_gotten_store) fprintf(outfile,
3790 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3791 (int)size, (int)regex_gotten_store);
3792
3793 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3794 if (backrefmax > 0)
3795 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3796
3797 if (namecount > 0)
3798 {
3799 fprintf(outfile, "Named capturing subpatterns:\n");
3800 while (namecount-- > 0)
3801 {
3802 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
3803 int length = (int)STRLEN(nametable + imm2_size);
3804 fprintf(outfile, " ");
3805 PCHARSV(nametable, imm2_size, length, outfile);
3806 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3807 #ifdef SUPPORT_PCRE32
3808 if (pcre_mode == PCRE32_MODE)
3809 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
3810 #endif
3811 #ifdef SUPPORT_PCRE16
3812 if (pcre_mode == PCRE16_MODE)
3813 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
3814 #endif
3815 #ifdef SUPPORT_PCRE8
3816 if (pcre_mode == PCRE8_MODE)
3817 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
3818 #endif
3819 nametable += nameentrysize * CHAR_SIZE;
3820 }
3821 }
3822
3823 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3824 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3825
3826 all_options = REAL_PCRE_OPTIONS(re);
3827 if (do_flip) all_options = swap_uint32(all_options);
3828
3829 if (get_options == 0) fprintf(outfile, "No options\n");
3830 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3831 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3832 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3833 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3834 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3835 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3836 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3837 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3838 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3839 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3840 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3841 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3842 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3843 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3844 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3845 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3846 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3847 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3848
3849 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3850
3851 switch (get_options & PCRE_NEWLINE_BITS)
3852 {
3853 case PCRE_NEWLINE_CR:
3854 fprintf(outfile, "Forced newline sequence: CR\n");
3855 break;
3856
3857 case PCRE_NEWLINE_LF:
3858 fprintf(outfile, "Forced newline sequence: LF\n");
3859 break;
3860
3861 case PCRE_NEWLINE_CRLF:
3862 fprintf(outfile, "Forced newline sequence: CRLF\n");
3863 break;
3864
3865 case PCRE_NEWLINE_ANYCRLF:
3866 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3867 break;
3868
3869 case PCRE_NEWLINE_ANY:
3870 fprintf(outfile, "Forced newline sequence: ANY\n");
3871 break;
3872
3873 default:
3874 break;
3875 }
3876
3877 if (first_char_set == 2)
3878 {
3879 fprintf(outfile, "First char at start or follows newline\n");
3880 }
3881 else if (first_char_set == 1)
3882 {
3883 const char *caseless =
3884 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
3885 "" : " (caseless)";
3886
3887 if (PRINTOK(first_char))
3888 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3889 else
3890 {
3891 fprintf(outfile, "First char = ");
3892 pchar(first_char, outfile);
3893 fprintf(outfile, "%s\n", caseless);
3894 }
3895 }
3896 else
3897 {
3898 fprintf(outfile, "No first char\n");
3899 }
3900
3901 if (need_char_set == 0)
3902 {
3903 fprintf(outfile, "No need char\n");
3904 }
3905 else
3906 {
3907 const char *caseless =
3908 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
3909 "" : " (caseless)";
3910
3911 if (PRINTOK(need_char))
3912 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3913 else
3914 {
3915 fprintf(outfile, "Need char = ");
3916 pchar(need_char, outfile);
3917 fprintf(outfile, "%s\n", caseless);
3918 }
3919 }
3920
3921 if (maxlookbehind > 0)
3922 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3923
3924 /* Don't output study size; at present it is in any case a fixed
3925 value, but it varies, depending on the computer architecture, and
3926 so messes up the test suite. (And with the /F option, it might be
3927 flipped.) If study was forced by an external -s, don't show this
3928 information unless -i or -d was also present. This means that, except
3929 when auto-callouts are involved, the output from runs with and without
3930 -s should be identical. */
3931
3932 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3933 {
3934 if (extra == NULL)
3935 fprintf(outfile, "Study returned NULL\n");
3936 else
3937 {
3938 pcre_uint8 *start_bits = NULL;
3939 int minlength;
3940
3941 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3942 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3943
3944 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3945 {
3946 if (start_bits == NULL)
3947 fprintf(outfile, "No set of starting bytes\n");
3948 else
3949 {
3950 int i;
3951 int c = 24;
3952 fprintf(outfile, "Starting byte set: ");
3953 for (i = 0; i < 256; i++)
3954 {
3955 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3956 {
3957 if (c > 75)
3958 {
3959 fprintf(outfile, "\n ");
3960 c = 2;
3961 }
3962 if (PRINTOK(i) && i != ' ')
3963 {
3964 fprintf(outfile, "%c ", i);
3965 c += 2;
3966 }
3967 else
3968 {
3969 fprintf(outfile, "\\x%02x ", i);
3970 c += 5;
3971 }
3972 }
3973 }
3974 fprintf(outfile, "\n");
3975 }
3976 }
3977 }
3978
3979 /* Show this only if the JIT was set by /S, not by -s. */
3980
3981 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3982 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3983 {
3984 int jit;
3985 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3986 {
3987 if (jit)
3988 fprintf(outfile, "JIT study was successful\n");
3989 else
3990 #ifdef SUPPORT_JIT
3991 fprintf(outfile, "JIT study was not successful\n");
3992 #else
3993 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3994 #endif
3995 }
3996 }
3997 }
3998 }
3999
4000 /* If the '>' option was present, we write out the regex to a file, and
4001 that is all. The first 8 bytes of the file are the regex length and then
4002 the study length, in big-endian order. */
4003
4004 if (to_file != NULL)
4005 {
4006 FILE *f = fopen((char *)to_file, "wb");
4007 if (f == NULL)
4008 {
4009 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4010 }
4011 else
4012 {
4013 pcre_uint8 sbuf[8];
4014
4015 if (do_flip) regexflip(re, extra);
4016 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4017 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4018 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4019 sbuf[3] = (pcre_uint8)((true_size) & 255);
4020 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4021 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4022 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4023 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4024
4025 if (fwrite(sbuf, 1, 8, f) < 8 ||
4026 fwrite(re, 1, true_size, f) < true_size)
4027 {
4028 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4029 }
4030 else
4031 {
4032 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4033
4034 /* If there is study data, write it. */
4035
4036 if (extra != NULL)
4037 {
4038 if (fwrite(extra->study_data, 1, true_study_size, f) <
4039 true_study_size)
4040 {
4041 fprintf(outfile, "Write error on %s: %s\n", to_file,
4042 strerror(errno));
4043 }
4044 else fprintf(outfile, "Study data written to %s\n", to_file);
4045 }
4046 }
4047 fclose(f);
4048 }
4049
4050 new_free(re);
4051 if (extra != NULL)
4052 {
4053 PCRE_FREE_STUDY(extra);
4054 }
4055 if (locale_set)
4056 {
4057 new_free((void *)tables);
4058 setlocale(LC_CTYPE, "C");
4059 locale_set = 0;
4060 }
4061 continue; /* With next regex */
4062 }
4063 } /* End of non-POSIX compile */
4064
4065 /* Read data lines and test them */
4066
4067 for (;;)
4068 {
4069 #ifdef SUPPORT_PCRE8
4070 pcre_uint8 *q8;
4071 #endif
4072 #ifdef SUPPORT_PCRE16
4073 pcre_uint16 *q16;
4074 #endif
4075 #ifdef SUPPORT_PCRE32
4076 pcre_uint32 *q32;
4077 #endif
4078 pcre_uint8 *bptr;
4079 int *use_offsets = offsets;
4080 int use_size_offsets = size_offsets;
4081 int callout_data = 0;
4082 int callout_data_set = 0;
4083 int count;
4084 pcre_uint32 c;
4085 int copystrings = 0;
4086 int find_match_limit = default_find_match_limit;
4087 int getstrings = 0;
4088 int getlist = 0;
4089 int gmatched = 0;
4090 int start_offset = 0;
4091 int start_offset_sign = 1;
4092 int g_notempty = 0;
4093 int use_dfa = 0;
4094
4095 *copynames = 0;
4096 *getnames = 0;
4097
4098 #ifdef SUPPORT_PCRE32
4099 cn32ptr = copynames;
4100 gn32ptr = getnames;
4101 #endif
4102 #ifdef SUPPORT_PCRE16
4103 cn16ptr = copynames16;
4104 gn16ptr = getnames16;
4105 #endif
4106 #ifdef SUPPORT_PCRE8
4107 cn8ptr = copynames8;
4108 gn8ptr = getnames8;
4109 #endif
4110
4111 SET_PCRE_CALLOUT(callout);
4112 first_callout = 1;
4113 last_callout_mark = NULL;
4114 callout_extra = 0;
4115 callout_count = 0;
4116 callout_fail_count = 999999;
4117 callout_fail_id = -1;
4118 show_malloc = 0;
4119 options = 0;
4120
4121 if (extra != NULL) extra->flags &=
4122 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4123
4124 len = 0;
4125 for (;;)
4126 {
4127 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4128 {
4129 if (len > 0) /* Reached EOF without hitting a newline */
4130 {
4131 fprintf(outfile, "\n");
4132 break;
4133 }
4134 done = 1;
4135 goto CONTINUE;
4136 }
4137 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4138 len = (int)strlen((char *)buffer);
4139 if (buffer[len-1] == '\n') break;
4140 }
4141
4142 while (len > 0 && isspace(buffer[len-1])) len--;
4143 buffer[len] = 0;
4144 if (len == 0) break;
4145
4146 p = buffer;
4147 while (isspace(*p)) p++;
4148
4149 #ifndef NOUTF
4150 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4151 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4152 if (use_utf)
4153 {
4154 char *q;
4155 pcre_uint32 c;
4156 int n = 1;
4157
4158 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &c);
4159 if (n <= 0)
4160 {
4161 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4162 goto NEXT_DATA;
4163 }
4164 }
4165 #endif
4166
4167 /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4168 the number of pcre_uchar units that will be needed. */
4169 if (dbuffer == NULL || len >= dbuffer_size)
4170 {
4171 dbuffer_size *= 2;
4172 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4173 if (dbuffer == NULL)
4174 {
4175 fprintf(stderr, "pcretest: malloc(%d) failed\n", dbuffer_size);
4176 exit(1);
4177 }
4178 }
4179
4180 #ifdef SUPPORT_PCRE8
4181 q8 = (pcre_uint8 *) dbuffer;
4182 #endif
4183 #ifdef SUPPORT_PCRE16
4184 q16 = (pcre_uint16 *) dbuffer;
4185 #endif
4186 #ifdef SUPPORT_PCRE32
4187 q32 = (pcre_uint32 *) dbuffer;
4188 #endif
4189
4190 while ((c = *p++) != 0)
4191 {
4192 int i = 0;
4193 int n = 0;
4194
4195 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4196 In non-UTF mode, allow the value of the byte to fall through to later,
4197 where values greater than 127 are turned into UTF-8 when running in
4198 16-bit or 32-bit mode. */
4199
4200 if (c != '\\')
4201 {
4202 #ifndef NOUTF
4203 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4204 #endif
4205 }
4206
4207 /* Handle backslash escapes */
4208
4209 else switch ((c = *p++))
4210 {
4211 case 'a': c = 7; break;
4212 case 'b': c = '\b'; break;
4213 case 'e': c = 27; break;
4214 case 'f': c = '\f'; break;
4215 case 'n': c = '\n'; break;
4216 case 'r': c = '\r'; break;
4217 case 't': c = '\t'; break;
4218 case 'v': c = '\v'; break;
4219
4220 case '0': case '1': case '2': case '3':
4221 case '4': case '5': case '6': case '7':
4222 c -= '0';
4223 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4224 c = c * 8 + *p++ - '0';
4225 break;
4226
4227 case 'x':
4228 if (*p == '{')
4229 {
4230 pcre_uint8 *pt = p;
4231 c = 0;
4232
4233 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4234 when isxdigit() is a macro that refers to its argument more than
4235 once. This is banned by the C Standard, but apparently happens in at
4236 least one MacOS environment. */
4237
4238 for (pt++; isxdigit(*pt); pt++)
4239 {
4240 if (++i == 9)
4241 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4242 "using only the first eight.\n");
4243 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4244 }
4245 if (*pt == '}')
4246 {
4247 p = pt + 1;
4248 break;
4249 }
4250 /* Not correct form for \x{...}; fall through */
4251 }
4252
4253 /* \x without {} always defines just one byte in 8-bit mode. This
4254 allows UTF-8 characters to be constructed byte by byte, and also allows
4255 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4256 Otherwise, pass it down to later code so that it can be turned into
4257 UTF-8 when running in 16/32-bit mode. */
4258
4259 c = 0;
4260 while (i++ < 2 && isxdigit(*p))
4261 {
4262 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4263 p++;
4264 }
4265 #if !defined NOUTF && defined SUPPORT_PCRE8
4266 if (use_utf && (pcre_mode == PCRE8_MODE))
4267 {
4268 *q8++ = c;
4269 continue;
4270 }
4271 #endif
4272 break;
4273
4274 case 0: /* \ followed by EOF allows for an empty line */
4275 p--;
4276 continue;
4277
4278 case '>':
4279 if (*p == '-')
4280 {
4281 start_offset_sign = -1;
4282 p++;
4283 }
4284 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4285 start_offset *= start_offset_sign;
4286 continue;
4287
4288 case 'A': /* Option setting */
4289 options |= PCRE_ANCHORED;
4290 continue;
4291
4292 case 'B':
4293 options |= PCRE_NOTBOL;
4294 continue;
4295
4296 case 'C':
4297 if (isdigit(*p)) /* Set copy string */
4298 {
4299 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4300 copystrings |= 1 << n;
4301 }
4302 else if (isalnum(*p))
4303 {
4304 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4305 }
4306 else if (*p == '+')
4307 {
4308 callout_extra = 1;
4309 p++;
4310 }
4311 else if (*p == '-')
4312 {
4313 SET_PCRE_CALLOUT(NULL);
4314 p++;
4315 }
4316 else if (*p == '!')
4317 {
4318 callout_fail_id = 0;
4319 p++;
4320 while(isdigit(*p))
4321 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4322 callout_fail_count = 0;
4323 if (*p == '!')
4324 {
4325 p++;
4326 while(isdigit(*p))
4327 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4328 }
4329 }
4330 else if (*p == '*')
4331 {
4332 int sign = 1;
4333 callout_data = 0;
4334 if (*(++p) == '-') { sign = -1; p++; }
4335 while(isdigit(*p))
4336 callout_data = callout_data * 10 + *p++ - '0';
4337 callout_data *= sign;
4338 callout_data_set = 1;
4339 }
4340 continue;
4341
4342 #if !defined NODFA
4343 case 'D':
4344 #if !defined NOPOSIX
4345 if (posix || do_posix)
4346 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4347 else
4348 #endif
4349 use_dfa = 1;
4350 continue;
4351 #endif
4352
4353 #if !defined NODFA
4354 case 'F':
4355 options |= PCRE_DFA_SHORTEST;
4356 continue;
4357 #endif
4358
4359 case 'G':
4360 if (isdigit(*p))
4361 {
4362 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4363 getstrings |= 1 << n;
4364 }
4365 else if (isalnum(*p))
4366 {
4367 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4368 }
4369 continue;
4370
4371 case 'J':
4372 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4373 if (extra != NULL
4374 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4375 && extra->executable_jit != NULL)
4376 {
4377 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4378 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4379 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4380 }
4381 continue;
4382
4383 case 'L':
4384 getlist = 1;
4385 continue;
4386
4387 case 'M':
4388 find_match_limit = 1;
4389 continue;
4390
4391 case 'N':
4392 if ((options & PCRE_NOTEMPTY) != 0)
4393 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4394 else
4395 options |= PCRE_NOTEMPTY;
4396 continue;
4397
4398 case 'O':
4399 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4400 if (n > size_offsets_max)
4401 {
4402 size_offsets_max = n;
4403 free(offsets);
4404 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4405 if (offsets == NULL)
4406 {
4407 printf("** Failed to get %d bytes of memory for offsets vector\n",
4408 (int)(size_offsets_max * sizeof(int)));
4409 yield = 1;
4410 goto EXIT;
4411 }
4412 }
4413 use_size_offsets = n;
4414 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4415 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4416 continue;
4417
4418 case 'P':
4419 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4420 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4421 continue;
4422
4423 case 'Q':
4424 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4425 if (extra == NULL)
4426 {
4427 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4428 extra->flags = 0;
4429 }
4430 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4431 extra->match_limit_recursion = n;
4432 continue;
4433
4434 case 'q':
4435 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4436 if (extra == NULL)
4437 {
4438 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4439 extra->flags = 0;
4440 }
4441 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4442 extra->match_limit = n;
4443 continue;
4444
4445 #if !defined NODFA
4446 case 'R':
4447 options |= PCRE_DFA_RESTART;
4448 continue;
4449 #endif
4450
4451 case 'S':
4452 show_malloc = 1;
4453 continue;
4454
4455 case 'Y':
4456 options |= PCRE_NO_START_OPTIMIZE;
4457 continue;
4458
4459 case 'Z':
4460 options |= PCRE_NOTEOL;
4461 continue;
4462
4463 case '?':
4464 options |= PCRE_NO_UTF8_CHECK;
4465 continue;
4466
4467 case '<':
4468 {
4469 int x = check_newline(p, outfile);
4470 if (x == 0) goto NEXT_DATA;
4471 options |= x;
4472 while (*p++ != '>');
4473 }
4474 continue;
4475 }
4476
4477 /* We now have a character value in c that may be greater than 255. In
4478 16-bit or 32-bit mode, we always convert characters to UTF-8 so that
4479 values greater than 255 can be passed to non-UTF 16- or 32-bit strings.
4480 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4481 than 127 in UTF mode must have come from \x{...} or octal constructs
4482 because values from \x.. get this far only in non-UTF mode. */
4483
4484 #ifdef SUPPORT_PCRE8
4485 if (pcre_mode == PCRE8_MODE)
4486 {
4487 #ifndef NOUTF
4488 if (use_utf)
4489 {
4490 q8 += ord2utf8(c, q8);
4491 }
4492 else
4493 #endif
4494 {
4495 if (c > 0xffu)
4496 {
4497 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4498 "and UTF-8 mode is not enabled.\n", c);
4499 fprintf(outfile, "** Truncation will probably give the wrong "
4500 "result.\n");
4501 }
4502
4503 *q8++ = c;
4504 }
4505 }
4506 #endif
4507 #ifdef SUPPORT_PCRE16
4508 if (pcre_mode == PCRE16_MODE)
4509 {
4510 #ifndef NOUTF
4511 if (use_utf)
4512 {
4513 if (c > 0x10ffffu)
4514 {
4515 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4516 "cannot be converted to UTF-16\n");
4517 goto NEXT_DATA;
4518 }
4519 else if (c >= 0x10000u)
4520 {
4521 c-= 0x10000u;
4522 *q16++ = 0xD800 | (c >> 10);
4523 *q16++ = 0xDC00 | (c & 0x3ff);
4524 }
4525 else
4526 *q16++ = c;
4527 }
4528 else
4529 #endif
4530 {
4531 if (c > 0xffffu)
4532 {
4533 fprintf(outfile, "** Character value is greater than 0xffff "
4534 "and UTF-16 mode is not enabled.\n", c);
4535 fprintf(outfile, "** Truncation will probably give the wrong "
4536 "result.\n");
4537 }
4538
4539 *q16++ = c;
4540 }
4541 }
4542 #endif
4543 #ifdef SUPPORT_PCRE32
4544 if (pcre_mode == PCRE32_MODE)
4545 {
4546 *q32++ = c;
4547 }
4548 #endif
4549
4550 }
4551
4552 /* Reached end of subject string */
4553
4554 #ifdef SUPPORT_PCRE8
4555 if (pcre_mode == PCRE8_MODE)
4556 {
4557 *q8 = 0;
4558 len = (int)(q8 - (pcre_uint8 *)dbuffer);
4559 }
4560 #endif
4561 #ifdef SUPPORT_PCRE16
4562 if (pcre_mode == PCRE16_MODE)
4563 {
4564 *q16 = 0;
4565 len = (int)(q16 - (pcre_uint16 *)dbuffer);
4566 }
4567 #endif
4568 #ifdef SUPPORT_PCRE32
4569 if (pcre_mode == PCRE32_MODE)
4570 {
4571 *q32 = 0;
4572 len = (int)(q32 - (pcre_uint32 *)dbuffer);
4573 }
4574 #endif
4575
4576 /* Move the data to the end of the buffer so that a read over the end of
4577 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
4578 we are using the POSIX interface, we must include the terminating zero. */
4579
4580 bptr = dbuffer;
4581
4582 #if !defined NOPOSIX
4583 if (posix || do_posix)
4584 {
4585 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4586 bptr += dbuffer_size - len - 1;
4587 }
4588 else
4589 #endif
4590 {
4591 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
4592 }
4593
4594 if ((all_use_dfa || use_dfa) && find_match_limit)
4595 {
4596 printf("**Match limit not relevant for DFA matching: ignored\n");
4597 find_match_limit = 0;
4598 }
4599
4600 /* Handle matching via the POSIX interface, which does not
4601 support timing or playing with the match limit or callout data. */
4602
4603 #if !defined NOPOSIX
4604 if (posix || do_posix)
4605 {
4606 int rc;
4607 int eflags = 0;
4608 regmatch_t *pmatch = NULL;
4609 if (use_size_offsets > 0)
4610 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4611 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4612 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4613 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
4614
4615 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
4616
4617 if (rc != 0)
4618 {
4619 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4620 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4621 }
4622 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
4623 {
4624 fprintf(outfile, "Matched with REG_NOSUB\n");
4625 }
4626 else
4627 {
4628 size_t i;
4629 for (i = 0; i < (size_t)use_size_offsets; i++)
4630 {
4631 if (pmatch[i].rm_so >= 0)
4632 {
4633 fprintf(outfile, "%2d: ", (int)i);
4634 PCHARSV(dbuffer, pmatch[i].rm_so,
4635 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
4636 fprintf(outfile, "\n");
4637 if (do_showcaprest || (i == 0 && do_showrest))
4638 {
4639 fprintf(outfile, "%2d+ ", (int)i);
4640 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
4641 outfile);
4642 fprintf(outfile, "\n");
4643 }
4644 }
4645 }
4646 }
4647 free(pmatch);
4648 goto NEXT_DATA;
4649 }
4650
4651 #endif /* !defined NOPOSIX */
4652
4653 /* Handle matching via the native interface - repeats for /g and /G */
4654
4655 /* Ensure that there is a JIT callback if we want to verify that JIT was
4656 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4657
4658 if (verify_jit && jit_stack == NULL && extra != NULL)
4659 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4660
4661 for (;; gmatched++) /* Loop for /g or /G */
4662 {
4663 markptr = NULL;
4664 jit_was_used = FALSE;
4665
4666 if (timeitm > 0)
4667 {
4668 register int i;
4669 clock_t time_taken;
4670 clock_t start_time = clock();
4671
4672 #if !defined NODFA
4673 if (all_use_dfa || use_dfa)
4674 {
4675 if ((options & PCRE_DFA_RESTART) != 0)
4676 {
4677 fprintf(outfile, "Timing DFA restarts is not supported\n");
4678 break;
4679 }
4680 if (dfa_workspace == NULL)
4681 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4682 for (i = 0; i < timeitm; i++)
4683 {
4684 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4685 (options | g_notempty), use_offsets, use_size_offsets,
4686 dfa_workspace, DFA_WS_DIMENSION);
4687 }
4688 }
4689 else
4690 #endif
4691
4692 for (i = 0; i < timeitm; i++)
4693 {
4694 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4695 (options | g_notempty), use_offsets, use_size_offsets);
4696 }
4697 time_taken = clock() - start_time;
4698 fprintf(outfile, "Execute time %.4f milliseconds\n",
4699 (((double)time_taken * 1000.0) / (double)timeitm) /
4700 (double)CLOCKS_PER_SEC);
4701 }
4702
4703 /* If find_match_limit is set, we want to do repeated matches with
4704 varying limits in order to find the minimum value for the match limit and
4705 for the recursion limit. The match limits are relevant only to the normal
4706 running of pcre_exec(), so disable the JIT optimization. This makes it
4707 possible to run the same set of tests with and without JIT externally
4708 requested. */
4709
4710 if (find_match_limit)
4711 {
4712 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4713 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4714 extra->flags = 0;
4715
4716 (void)check_match_limit(re, extra, bptr, len, start_offset,
4717 options|g_notempty, use_offsets, use_size_offsets,
4718 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4719 PCRE_ERROR_MATCHLIMIT, "match()");
4720
4721 count = check_match_limit(re, extra, bptr, len, start_offset,
4722 options|g_notempty, use_offsets, use_size_offsets,
4723 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4724 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4725 }
4726
4727 /* If callout_data is set, use the interface with additional data */
4728
4729 else if (callout_data_set)
4730 {
4731 if (extra == NULL)
4732 {
4733 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4734 extra->flags = 0;
4735 }
4736 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4737 extra->callout_data = &callout_data;
4738 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4739 options | g_notempty, use_offsets, use_size_offsets);
4740 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4741 }
4742
4743 /* The normal case is just to do the match once, with the default
4744 value of match_limit. */
4745
4746 #if !defined NODFA
4747 else if (all_use_dfa || use_dfa)
4748 {
4749 if (dfa_workspace == NULL)
4750 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4751 if (dfa_matched++ == 0)
4752 dfa_workspace[0] = -1; /* To catch bad restart */
4753 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4754 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4755 DFA_WS_DIMENSION);
4756 if (count == 0)
4757 {
4758 fprintf(outfile, "Matched, but too many subsidiary matches\n");
4759 count = use_size_offsets/2;
4760 }
4761 }
4762 #endif
4763
4764 else
4765 {
4766 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4767 options | g_notempty, use_offsets, use_size_offsets);
4768 if (count == 0)
4769 {
4770 fprintf(outfile, "Matched, but too many substrings\n");
4771 count = use_size_offsets/3;
4772 }
4773 }
4774
4775 /* Matched */
4776
4777 if (count >= 0)
4778 {
4779 int i, maxcount;
4780 void *cnptr, *gnptr;
4781
4782 #if !defined NODFA
4783 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4784 #endif
4785 maxcount = use_size_offsets/3;
4786
4787 /* This is a check against a lunatic return value. */
4788
4789 if (count > maxcount)
4790 {
4791 fprintf(outfile,
4792 "** PCRE error: returned count %d is too big for offset size %d\n",
4793 count, use_size_offsets);
4794 count = use_size_offsets/3;
4795 if (do_g || do_G)
4796 {
4797 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4798 do_g = do_G = FALSE; /* Break g/G loop */
4799 }
4800 }
4801
4802 /* do_allcaps requests showing of all captures in the pattern, to check
4803 unset ones at the end. */
4804
4805 if (do_allcaps)
4806 {
4807 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4808 goto SKIP_DATA;
4809 count++; /* Allow for full match */
4810 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4811 }
4812
4813 /* Output the captured substrings */
4814
4815 for (i = 0; i < count * 2; i += 2)
4816 {
4817 if (use_offsets[i] < 0)
4818 {
4819 if (use_offsets[i] != -1)
4820 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4821 use_offsets[i], i);
4822 if (use_offsets[i+1] != -1)
4823 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4824 use_offsets[i+1], i+1);
4825 fprintf(outfile, "%2d: <unset>\n", i/2);
4826 }
4827 else
4828 {
4829 fprintf(outfile, "%2d: ", i/2);
4830 PCHARSV(bptr, use_offsets[i],
4831 use_offsets[i+1] - use_offsets[i], outfile);
4832 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4833 fprintf(outfile, "\n");
4834 if (do_showcaprest || (i == 0 && do_showrest))
4835 {
4836 fprintf(outfile, "%2d+ ", i/2);
4837 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4838 outfile);
4839 fprintf(outfile, "\n");
4840 }
4841 }
4842 }
4843
4844 if (markptr != NULL)
4845 {
4846 fprintf(outfile, "MK: ");
4847 PCHARSV(markptr, 0, -1, outfile);
4848 fprintf(outfile, "\n");
4849 }
4850
4851 for (i = 0; i < 32; i++)
4852 {
4853 if ((copystrings & (1 << i)) != 0)
4854 {
4855 int rc;
4856 char copybuffer[256];
4857 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4858 copybuffer, sizeof(copybuffer));
4859 if (rc < 0)
4860 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4861 else
4862 {
4863 fprintf(outfile, "%2dC ", i);
4864 PCHARSV(copybuffer, 0, rc, outfile);
4865 fprintf(outfile, " (%d)\n", rc);
4866 }
4867 }
4868 }
4869
4870 cnptr = copynames;
4871 for (;;)
4872 {
4873 int rc;
4874 char copybuffer[256];
4875
4876 if (pcre_mode == PCRE16_MODE)
4877 {
4878 if (*(pcre_uint16 *)cnptr == 0) break;
4879 }
4880 else
4881 {
4882 if (*(pcre_uint8 *)cnptr == 0) break;
4883 }
4884
4885 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4886 cnptr, copybuffer, sizeof(copybuffer));
4887
4888 if (rc < 0)
4889 {
4890 fprintf(outfile, "copy substring ");
4891 PCHARSV(cnptr, 0, -1, outfile);
4892 fprintf(outfile, " failed %d\n", rc);
4893 }
4894 else
4895 {
4896 fprintf(outfile, " C ");
4897 PCHARSV(copybuffer, 0, rc, outfile);
4898 fprintf(outfile, " (%d) ", rc);
4899 PCHARSV(cnptr, 0, -1, outfile);
4900 putc('\n', outfile);
4901 }
4902
4903 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4904 }
4905
4906 for (i = 0; i < 32; i++)
4907 {
4908 if ((getstrings & (1 << i)) != 0)
4909 {
4910 int rc;
4911 const char *substring;
4912 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4913 if (rc < 0)
4914 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4915 else
4916 {
4917 fprintf(outfile, "%2dG ", i);
4918 PCHARSV(substring, 0, rc, outfile);
4919 fprintf(outfile, " (%d)\n", rc);
4920 PCRE_FREE_SUBSTRING(substring);
4921 }
4922 }
4923 }
4924
4925 gnptr = getnames;
4926 for (;;)
4927 {
4928 int rc;
4929 const char *substring;
4930
4931 if (pcre_mode == PCRE16_MODE)
4932 {
4933 if (*(pcre_uint16 *)gnptr == 0) break;
4934 }
4935 else
4936 {
4937 if (*(pcre_uint8 *)gnptr == 0) break;
4938 }
4939
4940 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4941 gnptr, &substring);
4942 if (rc < 0)
4943 {
4944 fprintf(outfile, "get substring ");
4945 PCHARSV(gnptr, 0, -1, outfile);
4946 fprintf(outfile, " failed %d\n", rc);
4947 }
4948 else
4949 {
4950 fprintf(outfile, " G ");
4951 PCHARSV(substring, 0, rc, outfile);
4952 fprintf(outfile, " (%d) ", rc);
4953 PCHARSV(gnptr, 0, -1, outfile);
4954 PCRE_FREE_SUBSTRING(substring);
4955 putc('\n', outfile);
4956 }
4957
4958 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4959 }
4960
4961 if (getlist)
4962 {
4963 int rc;
4964 const char **stringlist;
4965 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4966 if (rc < 0)
4967 fprintf(outfile, "get substring list failed %d\n", rc);
4968 else
4969 {
4970 for (i = 0; i < count; i++)
4971 {
4972 fprintf(outfile, "%2dL ", i);
4973 PCHARSV(stringlist[i], 0, -1, outfile);
4974 putc('\n', outfile);
4975 }
4976 if (stringlist[i] != NULL)
4977 fprintf(outfile, "string list not terminated by NULL\n");
4978 PCRE_FREE_SUBSTRING_LIST(stringlist);
4979 }
4980 }
4981 }
4982
4983 /* There was a partial match */
4984
4985 else if (count == PCRE_ERROR_PARTIAL)
4986 {
4987 if (markptr == NULL) fprintf(outfile, "Partial match");
4988 else
4989 {
4990 fprintf(outfile, "Partial match, mark=");
4991 PCHARSV(markptr, 0, -1, outfile);
4992 }
4993 if (use_size_offsets > 1)
4994 {
4995 fprintf(outfile, ": ");
4996 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4997 outfile);
4998 }
4999 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5000 fprintf(outfile, "\n");
5001 break; /* Out of the /g loop */
5002 }
5003
5004 /* Failed to match. If this is a /g or /G loop and we previously set
5005 g_notempty after a null match, this is not necessarily the end. We want
5006 to advance the start offset, and continue. We won't be at the end of the
5007 string - that was checked before setting g_notempty.
5008
5009 Complication arises in the case when the newline convention is "any",
5010 "crlf", or "anycrlf". If the previous match was at the end of a line
5011 terminated by CRLF, an advance of one character just passes the \r,
5012 whereas we should prefer the longer newline sequence, as does the code in
5013 pcre_exec(). Fudge the offset value to achieve this. We check for a
5014 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5015 find the default.
5016
5017 Otherwise, in the case of UTF-8 matching, the advance must be one
5018 character, not one byte. */
5019
5020 else
5021 {
5022 if (g_notempty != 0)
5023 {
5024 int onechar = 1;
5025 unsigned int obits = REAL_PCRE_OPTIONS(re);
5026 use_offsets[0] = start_offset;
5027 if ((obits & PCRE_NEWLINE_BITS) == 0)
5028 {
5029 int d;
5030 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5031 /* Note that these values are always the ASCII ones, even in
5032 EBCDIC environments. CR = 13, NL = 10. */
5033 obits = (d == 13)? PCRE_NEWLINE_CR :
5034 (d == 10)? PCRE_NEWLINE_LF :
5035 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5036 (d == -2)? PCRE_NEWLINE_ANYCRLF :
5037 (d == -1)? PCRE_NEWLINE_ANY : 0;
5038 }
5039 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5040 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5041 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5042 &&
5043 start_offset < len - 1 && (
5044 #ifdef SUPPORT_PCRE8
5045 (pcre_mode == PCRE8_MODE &&
5046 bptr[start_offset] == '\r' &&
5047 bptr[start_offset + 1] == '\n') ||
5048 #endif
5049 #ifdef SUPPORT_PCRE16
5050 (pcre_mode == PCRE16_MODE &&
5051 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5052 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5053 #endif
5054 #ifdef SUPPORT_PCRE32
5055 (pcre_mode == PCRE32_MODE &&
5056 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5057 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5058 #endif
5059 0))
5060 onechar++;
5061 else if (use_utf)
5062 {
5063 while (start_offset + onechar < len)
5064 {
5065 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5066 onechar++;
5067 }
5068 }
5069 use_offsets[1] = start_offset + onechar;
5070 }
5071 else
5072 {
5073 switch(count)
5074 {
5075 case PCRE_ERROR_NOMATCH:
5076 if (gmatched == 0)
5077 {
5078 if (markptr == NULL)
5079 {
5080 fprintf(outfile, "No match");
5081 }
5082 else
5083 {
5084 fprintf(outfile, "No match, mark = ");
5085 PCHARSV(markptr, 0, -1, outfile);
5086 }
5087 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5088 putc('\n', outfile);
5089 }
5090 break;
5091
5092 case PCRE_ERROR_BADUTF8:
5093 case PCRE_ERROR_SHORTUTF8:
5094 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5095 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5096 8 * CHAR_SIZE);
5097 if (use_size_offsets >= 2)
5098 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5099 use_offsets[1]);
5100 fprintf(outfile, "\n");
5101 break;
5102
5103 case PCRE_ERROR_BADUTF8_OFFSET:
5104 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5105 8 * CHAR_SIZE);
5106 break;
5107
5108 default:
5109 if (count < 0 &&
5110 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5111 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5112 else
5113 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5114 break;
5115 }
5116
5117 break; /* Out of the /g loop */
5118 }
5119 }
5120
5121 /* If not /g or /G we are done */
5122
5123 if (!do_g && !do_G) break;
5124
5125 /* If we have matched an empty string, first check to see if we are at
5126 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5127 Perl's /g options does. This turns out to be rather cunning. First we set
5128 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5129 same point. If this fails (picked up above) we advance to the next
5130 character. */
5131
5132 g_notempty = 0;
5133
5134 if (use_offsets[0] == use_offsets[1])
5135 {
5136 if (use_offsets[0] == len) break;
5137 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5138 }
5139
5140 /* For /g, update the start offset, leaving the rest alone */
5141
5142 if (do_g) start_offset = use_offsets[1];
5143
5144 /* For /G, update the pointer and length */
5145
5146 else
5147 {
5148 bptr += use_offsets[1] * CHAR_SIZE;
5149 len -= use_offsets[1];
5150 }
5151 } /* End of loop for /g and /G */
5152
5153 NEXT_DATA: continue;
5154 } /* End of loop for data lines */
5155
5156 CONTINUE:
5157
5158 #if !defined NOPOSIX
5159 if (posix || do_posix) regfree(&preg);
5160 #endif
5161
5162 if (re != NULL) new_free(re);
5163 if (extra != NULL)
5164 {
5165 PCRE_FREE_STUDY(extra);
5166 }
5167 if (locale_set)
5168 {
5169 new_free((void *)tables);
5170 setlocale(LC_CTYPE, "C");
5171 locale_set = 0;
5172 }
5173 if (jit_stack != NULL)
5174 {
5175 PCRE_JIT_STACK_FREE(jit_stack);
5176 jit_stack = NULL;
5177 }
5178 }
5179
5180 if (infile == stdin) fprintf(outfile, "\n");
5181
5182 EXIT:
5183
5184 if (infile != NULL && infile != stdin) fclose(infile);
5185 if (outfile != NULL && outfile != stdout) fclose(outfile);
5186
5187 free(buffer);
5188 free(dbuffer);
5189 free(pbuffer);
5190 free(offsets);
5191
5192 #ifdef SUPPORT_PCRE16
5193 if (buffer16 != NULL) free(buffer16);
5194 #endif
5195 #ifdef SUPPORT_PCRE32
5196 if (buffer32 != NULL) free(buffer32);
5197 #endif
5198
5199 #if !defined NODFA
5200 if (dfa_workspace != NULL)
5201 free(dfa_workspace);
5202 #endif
5203
5204 return yield;
5205 }
5206
5207 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5