/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1080 - (show annotations)
Tue Oct 16 15:55:07 2012 UTC (7 years, 1 month ago) by chpe
File MIME type: text/plain
File size: 153830 byte(s)
pcre32: fullinfo: Add variants of (FIRST|LAST)LITERAL that are 32-bit clean

Since for pcre32 the whole range of the output is already used up
for the character itself, return the special values separately.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136
137 #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138 /* Configure internal macros to 32 bit mode. */
139 #define COMPILE_PCRE32
140 #endif
141 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142 /* Configure internal macros to 16 bit mode. */
143 #define COMPILE_PCRE16
144 #endif
145 #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146 /* Configure internal macros to 16 bit mode. */
147 #define COMPILE_PCRE8
148 #endif
149
150 #include "pcre_internal.h"
151
152 /* The pcre_printint() function, which prints the internal form of a compiled
153 regex, is held in a separate file so that (a) it can be compiled in either
154 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
155 when that is compiled in debug mode. */
156
157 #ifdef SUPPORT_PCRE8
158 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159 #endif
160 #ifdef SUPPORT_PCRE16
161 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162 #endif
163 #ifdef SUPPORT_PCRE32
164 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165 #endif
166
167 /* We need access to some of the data tables that PCRE uses. So as not to have
168 to keep two copies, we include the source files here, changing the names of the
169 external symbols to prevent clashes. */
170
171 #define PCRE_INCLUDED
172
173 #include "pcre_tables.c"
174 #include "pcre_ucd.c"
175
176 /* The definition of the macro PRINTABLE, which determines whether to print an
177 output character as-is or as a hex value when showing compiled patterns, is
178 the same as in the printint.src file. We uses it here in cases when the locale
179 has not been explicitly changed, so as to get consistent output from systems
180 that differ in their output from isprint() even in the "C" locale. */
181
182 #ifdef EBCDIC
183 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
184 #else
185 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
186 #endif
187
188 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189
190 /* Posix support is disabled in 16 or 32 bit only mode. */
191 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192 #define NOPOSIX
193 #endif
194
195 /* It is possible to compile this test program without including support for
196 testing the POSIX interface, though this is not available via the standard
197 Makefile. */
198
199 #if !defined NOPOSIX
200 #include "pcreposix.h"
201 #endif
202
203 /* It is also possible, originally for the benefit of a version that was
204 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
205 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
206 automatically cut out the UTF support if PCRE is built without it. */
207
208 #ifndef SUPPORT_UTF
209 #ifndef NOUTF
210 #define NOUTF
211 #endif
212 #endif
213
214 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
215 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216 only from one place and is handled differently). I couldn't dream up any way of
217 using a single macro to do this in a generic way, because of the many different
218 argument requirements. We know that at least one of SUPPORT_PCRE8 and
219 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
220 use these in the definitions of generic macros.
221
222 **** Special note about the PCHARSxxx macros: the address of the string to be
223 printed is always given as two arguments: a base address followed by an offset.
224 The base address is cast to the correct data size for 8 or 16 bit data; the
225 offset is in units of this size. If the string were given as base+offset in one
226 argument, the casting might be incorrectly applied. */
227
228 #ifdef SUPPORT_PCRE8
229
230 #define PCHARS8(lv, p, offset, len, f) \
231 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
232
233 #define PCHARSV8(p, offset, len, f) \
234 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235
236 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237 p = read_capture_name8(p, cn8, re)
238
239 #define STRLEN8(p) ((int)strlen((char *)p))
240
241 #define SET_PCRE_CALLOUT8(callout) \
242 pcre_callout = callout
243
244 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
245 pcre_assign_jit_stack(extra, callback, userdata)
246
247 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
248 re = pcre_compile((char *)pat, options, error, erroffset, tables)
249
250 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251 namesptr, cbuffer, size) \
252 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
253 (char *)namesptr, cbuffer, size)
254
255 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
256 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
257
258 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259 offsets, size_offsets, workspace, size_workspace) \
260 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
261 offsets, size_offsets, workspace, size_workspace)
262
263 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
264 offsets, size_offsets) \
265 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
266 offsets, size_offsets)
267
268 #define PCRE_FREE_STUDY8(extra) \
269 pcre_free_study(extra)
270
271 #define PCRE_FREE_SUBSTRING8(substring) \
272 pcre_free_substring(substring)
273
274 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
275 pcre_free_substring_list(listptr)
276
277 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
278 getnamesptr, subsptr) \
279 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
280 (char *)getnamesptr, subsptr)
281
282 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
283 n = pcre_get_stringnumber(re, (char *)ptr)
284
285 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
286 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
287
288 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
289 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
290
291 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
292 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
293
294 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
295 pcre_printint(re, outfile, debug_lengths)
296
297 #define PCRE_STUDY8(extra, re, options, error) \
298 extra = pcre_study(re, options, error)
299
300 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
301 pcre_jit_stack_alloc(startsize, maxsize)
302
303 #define PCRE_JIT_STACK_FREE8(stack) \
304 pcre_jit_stack_free(stack)
305
306 #endif /* SUPPORT_PCRE8 */
307
308 /* -----------------------------------------------------------*/
309
310 #ifdef SUPPORT_PCRE16
311
312 #define PCHARS16(lv, p, offset, len, f) \
313 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
314
315 #define PCHARSV16(p, offset, len, f) \
316 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
317
318 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319 p = read_capture_name16(p, cn16, re)
320
321 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
322
323 #define SET_PCRE_CALLOUT16(callout) \
324 pcre16_callout = (int (*)(pcre16_callout_block *))callout
325
326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327 pcre16_assign_jit_stack((pcre16_extra *)extra, \
328 (pcre16_jit_callback)callback, userdata)
329
330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332 tables)
333
334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335 namesptr, cbuffer, size) \
336 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338
339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341 (PCRE_UCHAR16 *)cbuffer, size/2)
342
343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344 offsets, size_offsets, workspace, size_workspace) \
345 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347 workspace, size_workspace)
348
349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350 offsets, size_offsets) \
351 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352 len, start_offset, options, offsets, size_offsets)
353
354 #define PCRE_FREE_STUDY16(extra) \
355 pcre16_free_study((pcre16_extra *)extra)
356
357 #define PCRE_FREE_SUBSTRING16(substring) \
358 pcre16_free_substring((PCRE_SPTR16)substring)
359
360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362
363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364 getnamesptr, subsptr) \
365 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367
368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370
371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373 (PCRE_SPTR16 *)(void*)subsptr)
374
375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377 (PCRE_SPTR16 **)(void*)listptr)
378
379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381 tables)
382
383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384 pcre16_printint(re, outfile, debug_lengths)
385
386 #define PCRE_STUDY16(extra, re, options, error) \
387 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388
389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391
392 #define PCRE_JIT_STACK_FREE16(stack) \
393 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394
395 #endif /* SUPPORT_PCRE16 */
396
397 /* -----------------------------------------------------------*/
398
399 #ifdef SUPPORT_PCRE32
400
401 #define PCHARS32(lv, p, offset, len, f) \
402 lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)
403
404 #define PCHARSV32(p, offset, len, f) \
405 (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)
406
407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408 p = read_capture_name32(p, cn32, re)
409
410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411
412 #define SET_PCRE_CALLOUT32(callout) \
413 pcre32_callout = (int (*)(pcre32_callout_block *))callout
414
415 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416 pcre32_assign_jit_stack((pcre32_extra *)extra, \
417 (pcre32_jit_callback)callback, userdata)
418
419 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421 tables)
422
423 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424 namesptr, cbuffer, size) \
425 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427
428 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430 (PCRE_UCHAR32 *)cbuffer, size/2)
431
432 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433 offsets, size_offsets, workspace, size_workspace) \
434 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436 workspace, size_workspace)
437
438 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439 offsets, size_offsets) \
440 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441 len, start_offset, options, offsets, size_offsets)
442
443 #define PCRE_FREE_STUDY32(extra) \
444 pcre32_free_study((pcre32_extra *)extra)
445
446 #define PCRE_FREE_SUBSTRING32(substring) \
447 pcre32_free_substring((PCRE_SPTR32)substring)
448
449 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
451
452 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453 getnamesptr, subsptr) \
454 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456
457 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
459
460 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462 (PCRE_SPTR32 *)(void*)subsptr)
463
464 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466 (PCRE_SPTR32 **)(void*)listptr)
467
468 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470 tables)
471
472 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473 pcre32_printint(re, outfile, debug_lengths)
474
475 #define PCRE_STUDY32(extra, re, options, error) \
476 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477
478 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480
481 #define PCRE_JIT_STACK_FREE32(stack) \
482 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483
484 #endif /* SUPPORT_PCRE32 */
485
486
487 /* ----- Both modes are supported; a runtime test is needed, except for
488 pcre_config(), and the JIT stack functions, when it doesn't matter which
489 version is called. ----- */
490
491 enum {
492 PCRE8_MODE,
493 PCRE16_MODE,
494 PCRE32_MODE
495 };
496
497 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2
498
499 #define CHAR_SIZE (1 << pcre_mode)
500
501 #define PCHARS(lv, p, offset, len, f) \
502 if (pcre_mode == PCRE32_MODE) \
503 PCHARS32(lv, p, offset, len, f); \
504 else if (pcre_mode == PCRE16_MODE) \
505 PCHARS16(lv, p, offset, len, f); \
506 else \
507 PCHARS8(lv, p, offset, len, f)
508
509 #define PCHARSV(p, offset, len, f) \
510 if (pcre_mode == PCRE32_MODE) \
511 PCHARSV32(p, offset, len, f); \
512 else if (pcre_mode == PCRE16_MODE) \
513 PCHARSV16(p, offset, len, f); \
514 else \
515 PCHARSV8(p, offset, len, f)
516
517 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
518 if (pcre_mode == PCRE32_MODE) \
519 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
520 else if (pcre_mode == PCRE16_MODE) \
521 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
522 else \
523 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
524
525 #define SET_PCRE_CALLOUT(callout) \
526 if (pcre_mode == PCRE32_MODE) \
527 SET_PCRE_CALLOUT32(callout); \
528 else if (pcre_mode == PCRE16_MODE) \
529 SET_PCRE_CALLOUT16(callout); \
530 else \
531 SET_PCRE_CALLOUT8(callout)
532
533 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
534
535 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
536 if (pcre_mode == PCRE32_MODE) \
537 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
538 else if (pcre_mode == PCRE16_MODE) \
539 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
540 else \
541 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
542
543 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
544 if (pcre_mode == PCRE32_MODE) \
545 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
546 else if (pcre_mode == PCRE16_MODE) \
547 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
548 else \
549 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
550
551 #define PCRE_CONFIG pcre_config
552
553 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
554 namesptr, cbuffer, size) \
555 if (pcre_mode == PCRE32_MODE) \
556 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
557 namesptr, cbuffer, size); \
558 else if (pcre_mode == PCRE16_MODE) \
559 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
560 namesptr, cbuffer, size); \
561 else \
562 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
563 namesptr, cbuffer, size)
564
565 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
566 if (pcre_mode == PCRE32_MODE) \
567 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
568 else if (pcre_mode == PCRE16_MODE) \
569 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
570 else \
571 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
572
573 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
574 offsets, size_offsets, workspace, size_workspace) \
575 if (pcre_mode == PCRE32_MODE) \
576 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
577 offsets, size_offsets, workspace, size_workspace); \
578 else if (pcre_mode == PCRE16_MODE) \
579 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
580 offsets, size_offsets, workspace, size_workspace); \
581 else \
582 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets, workspace, size_workspace)
584
585 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets) \
587 if (pcre_mode == PCRE32_MODE) \
588 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets); \
590 else if (pcre_mode == PCRE16_MODE) \
591 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets); \
593 else \
594 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
595 offsets, size_offsets)
596
597 #define PCRE_FREE_STUDY(extra) \
598 if (pcre_mode == PCRE32_MODE) \
599 PCRE_FREE_STUDY32(extra); \
600 else if (pcre_mode == PCRE16_MODE) \
601 PCRE_FREE_STUDY16(extra); \
602 else \
603 PCRE_FREE_STUDY8(extra)
604
605 #define PCRE_FREE_SUBSTRING(substring) \
606 if (pcre_mode == PCRE32_MODE) \
607 PCRE_FREE_SUBSTRING32(substring); \
608 else if (pcre_mode == PCRE16_MODE) \
609 PCRE_FREE_SUBSTRING16(substring); \
610 else \
611 PCRE_FREE_SUBSTRING8(substring)
612
613 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
614 if (pcre_mode == PCRE32_MODE) \
615 PCRE_FREE_SUBSTRING_LIST32(listptr); \
616 else if (pcre_mode == PCRE16_MODE) \
617 PCRE_FREE_SUBSTRING_LIST16(listptr); \
618 else \
619 PCRE_FREE_SUBSTRING_LIST8(listptr)
620
621 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
622 getnamesptr, subsptr) \
623 if (pcre_mode == PCRE32_MODE) \
624 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
625 getnamesptr, subsptr); \
626 else if (pcre_mode == PCRE16_MODE) \
627 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
628 getnamesptr, subsptr); \
629 else \
630 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
631 getnamesptr, subsptr)
632
633 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
634 if (pcre_mode == PCRE32_MODE) \
635 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
636 else if (pcre_mode == PCRE16_MODE) \
637 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
638 else \
639 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
640
641 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
642 if (pcre_mode == PCRE32_MODE) \
643 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
644 else if (pcre_mode == PCRE16_MODE) \
645 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
646 else \
647 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
648
649 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
650 if (pcre_mode == PCRE32_MODE) \
651 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
652 else if (pcre_mode == PCRE16_MODE) \
653 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
654 else \
655 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
656
657 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
658 (pcre_mode == PCRE32_MODE ? \
659 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
660 : pcre_mode == PCRE16_MODE ? \
661 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
662 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
663
664 #define PCRE_JIT_STACK_FREE(stack) \
665 if (pcre_mode == PCRE32_MODE) \
666 PCRE_JIT_STACK_FREE32(stack); \
667 else if (pcre_mode == PCRE16_MODE) \
668 PCRE_JIT_STACK_FREE16(stack); \
669 else \
670 PCRE_JIT_STACK_FREE8(stack)
671
672 #define PCRE_MAKETABLES \
673 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
674
675 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
676 if (pcre_mode == PCRE32_MODE) \
677 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
678 else if (pcre_mode == PCRE16_MODE) \
679 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
680 else \
681 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
682
683 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
684 if (pcre_mode == PCRE32_MODE) \
685 PCRE_PRINTINT32(re, outfile, debug_lengths); \
686 else if (pcre_mode == PCRE16_MODE) \
687 PCRE_PRINTINT16(re, outfile, debug_lengths); \
688 else \
689 PCRE_PRINTINT8(re, outfile, debug_lengths)
690
691 #define PCRE_STUDY(extra, re, options, error) \
692 if (pcre_mode == PCRE32_MODE) \
693 PCRE_STUDY32(extra, re, options, error); \
694 else if (pcre_mode == PCRE16_MODE) \
695 PCRE_STUDY16(extra, re, options, error); \
696 else \
697 PCRE_STUDY8(extra, re, options, error)
698
699 /* ----- Only 8-bit mode is supported ----- */
700
701 #elif defined SUPPORT_PCRE8
702 #define CHAR_SIZE 1
703 #define PCHARS PCHARS8
704 #define PCHARSV PCHARSV8
705 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
706 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
707 #define STRLEN STRLEN8
708 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
709 #define PCRE_COMPILE PCRE_COMPILE8
710 #define PCRE_CONFIG pcre_config
711 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
712 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
713 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
714 #define PCRE_EXEC PCRE_EXEC8
715 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
716 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
717 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
718 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
719 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
720 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
721 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
722 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
723 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
724 #define PCRE_MAKETABLES pcre_maketables()
725 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
726 #define PCRE_PRINTINT PCRE_PRINTINT8
727 #define PCRE_STUDY PCRE_STUDY8
728
729 /* ----- Only 16-bit mode is supported ----- */
730
731 #elif defined SUPPORT_PCRE16
732 #define CHAR_SIZE 2
733 #define PCHARS PCHARS16
734 #define PCHARSV PCHARSV16
735 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
736 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
737 #define STRLEN STRLEN16
738 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
739 #define PCRE_COMPILE PCRE_COMPILE16
740 #define PCRE_CONFIG pcre16_config
741 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
742 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
743 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
744 #define PCRE_EXEC PCRE_EXEC16
745 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
746 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
747 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
748 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
749 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
750 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
751 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
752 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
753 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
754 #define PCRE_MAKETABLES pcre16_maketables()
755 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
756 #define PCRE_PRINTINT PCRE_PRINTINT16
757 #define PCRE_STUDY PCRE_STUDY16
758
759 /* ----- Only 32-bit mode is supported ----- */
760
761 #elif defined SUPPORT_PCRE32
762 #define CHAR_SIZE 4
763 #define PCHARS PCHARS32
764 #define PCHARSV PCHARSV32
765 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
766 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
767 #define STRLEN STRLEN32
768 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
769 #define PCRE_COMPILE PCRE_COMPILE32
770 #define PCRE_CONFIG pcre32_config
771 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
772 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
773 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
774 #define PCRE_EXEC PCRE_EXEC32
775 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
776 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
777 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
778 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
779 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
780 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
781 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
782 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
783 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
784 #define PCRE_MAKETABLES pcre32_maketables()
785 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
786 #define PCRE_PRINTINT PCRE_PRINTINT32
787 #define PCRE_STUDY PCRE_STUDY32
788
789 #endif
790
791 /* ----- End of mode-specific function call macros ----- */
792
793
794 /* Other parameters */
795
796 #ifndef CLOCKS_PER_SEC
797 #ifdef CLK_TCK
798 #define CLOCKS_PER_SEC CLK_TCK
799 #else
800 #define CLOCKS_PER_SEC 100
801 #endif
802 #endif
803
804 #if !defined NODFA
805 #define DFA_WS_DIMENSION 1000
806 #endif
807
808 /* This is the default loop count for timing. */
809
810 #define LOOPREPEAT 500000
811
812 /* Static variables */
813
814 static FILE *outfile;
815 static int log_store = 0;
816 static int callout_count;
817 static int callout_extra;
818 static int callout_fail_count;
819 static int callout_fail_id;
820 static int debug_lengths;
821 static int first_callout;
822 static int jit_was_used;
823 static int locale_set = 0;
824 static int show_malloc;
825 static int use_utf;
826 static size_t gotten_store;
827 static size_t first_gotten_store = 0;
828 static const unsigned char *last_callout_mark = NULL;
829
830 /* The buffers grow automatically if very long input lines are encountered. */
831
832 static int buffer_size = 50000;
833 static pcre_uint8 *buffer = NULL;
834 static pcre_uint8 *dbuffer = NULL;
835 static pcre_uint8 *pbuffer = NULL;
836
837 /* Another buffer is needed translation to 16-bit character strings. It will
838 obtained and extended as required. */
839
840 #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
841
842 /* We need the table of operator lengths that is used for 16/32-bit compiling, in
843 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
844 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
845 appropriately for the 16/32-bit world. Just as a safety check, make sure that
846 COMPILE_PCRE[16|32] is *not* set. */
847
848 #ifdef COMPILE_PCRE16
849 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
850 #endif
851
852 #ifdef COMPILE_PCRE32
853 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
854 #endif
855
856 #if LINK_SIZE == 2
857 #undef LINK_SIZE
858 #define LINK_SIZE 1
859 #elif LINK_SIZE == 3 || LINK_SIZE == 4
860 #undef LINK_SIZE
861 #define LINK_SIZE 2
862 #else
863 #error LINK_SIZE must be either 2, 3, or 4
864 #endif
865
866 #undef IMM2_SIZE
867 #define IMM2_SIZE 1
868
869 #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
870
871 #ifdef SUPPORT_PCRE16
872 static int buffer16_size = 0;
873 static pcre_uint16 *buffer16 = NULL;
874 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
875 #endif /* SUPPORT_PCRE16 */
876
877 #ifdef SUPPORT_PCRE32
878 static int buffer32_size = 0;
879 static pcre_uint32 *buffer32 = NULL;
880 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
881 #endif /* SUPPORT_PCRE32 */
882
883 /* If we have 8-bit support, default to it; if there is also
884 16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
885 there must be 16-or 32-bit support, so default it to 1. */
886
887 #if defined SUPPORT_PCRE8
888 static int pcre_mode = PCRE8_MODE;
889 #elif defined SUPPORT_PCRE16
890 static int pcre_mode = PCRE16_MODE;
891 #elif defined SUPPORT_PCRE32
892 static int pcre_mode = PCRE32_MODE;
893 #endif
894
895 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
896
897 static int jit_study_bits[] =
898 {
899 PCRE_STUDY_JIT_COMPILE,
900 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
901 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
902 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
903 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
904 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
905 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
906 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
907 };
908
909 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
910 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
911
912 /* Textual explanations for runtime error codes */
913
914 static const char *errtexts[] = {
915 NULL, /* 0 is no error */
916 NULL, /* NOMATCH is handled specially */
917 "NULL argument passed",
918 "bad option value",
919 "magic number missing",
920 "unknown opcode - pattern overwritten?",
921 "no more memory",
922 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
923 "match limit exceeded",
924 "callout error code",
925 NULL, /* BADUTF8/16 is handled specially */
926 NULL, /* BADUTF8/16 offset is handled specially */
927 NULL, /* PARTIAL is handled specially */
928 "not used - internal error",
929 "internal error - pattern overwritten?",
930 "bad count value",
931 "item unsupported for DFA matching",
932 "backreference condition or recursion test not supported for DFA matching",
933 "match limit not supported for DFA matching",
934 "workspace size exceeded in DFA matching",
935 "too much recursion for DFA matching",
936 "recursion limit exceeded",
937 "not used - internal error",
938 "invalid combination of newline options",
939 "bad offset value",
940 NULL, /* SHORTUTF8/16 is handled specially */
941 "nested recursion at the same subject position",
942 "JIT stack limit reached",
943 "pattern compiled in wrong mode: 8-bit/16-bit error",
944 "pattern compiled with other endianness",
945 "invalid data in workspace for DFA restart"
946 };
947
948
949 /*************************************************
950 * Alternate character tables *
951 *************************************************/
952
953 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
954 using the default tables of the library. However, the T option can be used to
955 select alternate sets of tables, for different kinds of testing. Note also that
956 the L (locale) option also adjusts the tables. */
957
958 /* This is the set of tables distributed as default with PCRE. It recognizes
959 only ASCII characters. */
960
961 static const pcre_uint8 tables0[] = {
962
963 /* This table is a lower casing table. */
964
965 0, 1, 2, 3, 4, 5, 6, 7,
966 8, 9, 10, 11, 12, 13, 14, 15,
967 16, 17, 18, 19, 20, 21, 22, 23,
968 24, 25, 26, 27, 28, 29, 30, 31,
969 32, 33, 34, 35, 36, 37, 38, 39,
970 40, 41, 42, 43, 44, 45, 46, 47,
971 48, 49, 50, 51, 52, 53, 54, 55,
972 56, 57, 58, 59, 60, 61, 62, 63,
973 64, 97, 98, 99,100,101,102,103,
974 104,105,106,107,108,109,110,111,
975 112,113,114,115,116,117,118,119,
976 120,121,122, 91, 92, 93, 94, 95,
977 96, 97, 98, 99,100,101,102,103,
978 104,105,106,107,108,109,110,111,
979 112,113,114,115,116,117,118,119,
980 120,121,122,123,124,125,126,127,
981 128,129,130,131,132,133,134,135,
982 136,137,138,139,140,141,142,143,
983 144,145,146,147,148,149,150,151,
984 152,153,154,155,156,157,158,159,
985 160,161,162,163,164,165,166,167,
986 168,169,170,171,172,173,174,175,
987 176,177,178,179,180,181,182,183,
988 184,185,186,187,188,189,190,191,
989 192,193,194,195,196,197,198,199,
990 200,201,202,203,204,205,206,207,
991 208,209,210,211,212,213,214,215,
992 216,217,218,219,220,221,222,223,
993 224,225,226,227,228,229,230,231,
994 232,233,234,235,236,237,238,239,
995 240,241,242,243,244,245,246,247,
996 248,249,250,251,252,253,254,255,
997
998 /* This table is a case flipping table. */
999
1000 0, 1, 2, 3, 4, 5, 6, 7,
1001 8, 9, 10, 11, 12, 13, 14, 15,
1002 16, 17, 18, 19, 20, 21, 22, 23,
1003 24, 25, 26, 27, 28, 29, 30, 31,
1004 32, 33, 34, 35, 36, 37, 38, 39,
1005 40, 41, 42, 43, 44, 45, 46, 47,
1006 48, 49, 50, 51, 52, 53, 54, 55,
1007 56, 57, 58, 59, 60, 61, 62, 63,
1008 64, 97, 98, 99,100,101,102,103,
1009 104,105,106,107,108,109,110,111,
1010 112,113,114,115,116,117,118,119,
1011 120,121,122, 91, 92, 93, 94, 95,
1012 96, 65, 66, 67, 68, 69, 70, 71,
1013 72, 73, 74, 75, 76, 77, 78, 79,
1014 80, 81, 82, 83, 84, 85, 86, 87,
1015 88, 89, 90,123,124,125,126,127,
1016 128,129,130,131,132,133,134,135,
1017 136,137,138,139,140,141,142,143,
1018 144,145,146,147,148,149,150,151,
1019 152,153,154,155,156,157,158,159,
1020 160,161,162,163,164,165,166,167,
1021 168,169,170,171,172,173,174,175,
1022 176,177,178,179,180,181,182,183,
1023 184,185,186,187,188,189,190,191,
1024 192,193,194,195,196,197,198,199,
1025 200,201,202,203,204,205,206,207,
1026 208,209,210,211,212,213,214,215,
1027 216,217,218,219,220,221,222,223,
1028 224,225,226,227,228,229,230,231,
1029 232,233,234,235,236,237,238,239,
1030 240,241,242,243,244,245,246,247,
1031 248,249,250,251,252,253,254,255,
1032
1033 /* This table contains bit maps for various character classes. Each map is 32
1034 bytes long and the bits run from the least significant end of each byte. The
1035 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1036 graph, print, punct, and cntrl. Other classes are built from combinations. */
1037
1038 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1039 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1040 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1041 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1042
1043 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1044 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1045 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1046 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1047
1048 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1049 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1050 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1051 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1052
1053 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1054 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1055 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1056 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1057
1058 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1059 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1060 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1061 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1062
1063 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1064 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1065 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1066 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1067
1068 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1069 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1070 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1071 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1072
1073 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1074 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1075 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1076 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1077
1078 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1079 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1080 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1081 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1082
1083 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1084 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1085 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1086 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1087
1088 /* This table identifies various classes of character by individual bits:
1089 0x01 white space character
1090 0x02 letter
1091 0x04 decimal digit
1092 0x08 hexadecimal digit
1093 0x10 alphanumeric or '_'
1094 0x80 regular expression metacharacter or binary zero
1095 */
1096
1097 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1098 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1099 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1100 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1101 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1102 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1103 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1104 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1105 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1106 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1107 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1108 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1109 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1110 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1111 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1112 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1113 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1114 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1115 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1116 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1117 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1118 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1119 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1120 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1121 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1122 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1123 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1124 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1125 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1126 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1127 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1128 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1129
1130 /* This is a set of tables that came orginally from a Windows user. It seems to
1131 be at least an approximation of ISO 8859. In particular, there are characters
1132 greater than 128 that are marked as spaces, letters, etc. */
1133
1134 static const pcre_uint8 tables1[] = {
1135 0,1,2,3,4,5,6,7,
1136 8,9,10,11,12,13,14,15,
1137 16,17,18,19,20,21,22,23,
1138 24,25,26,27,28,29,30,31,
1139 32,33,34,35,36,37,38,39,
1140 40,41,42,43,44,45,46,47,
1141 48,49,50,51,52,53,54,55,
1142 56,57,58,59,60,61,62,63,
1143 64,97,98,99,100,101,102,103,
1144 104,105,106,107,108,109,110,111,
1145 112,113,114,115,116,117,118,119,
1146 120,121,122,91,92,93,94,95,
1147 96,97,98,99,100,101,102,103,
1148 104,105,106,107,108,109,110,111,
1149 112,113,114,115,116,117,118,119,
1150 120,121,122,123,124,125,126,127,
1151 128,129,130,131,132,133,134,135,
1152 136,137,138,139,140,141,142,143,
1153 144,145,146,147,148,149,150,151,
1154 152,153,154,155,156,157,158,159,
1155 160,161,162,163,164,165,166,167,
1156 168,169,170,171,172,173,174,175,
1157 176,177,178,179,180,181,182,183,
1158 184,185,186,187,188,189,190,191,
1159 224,225,226,227,228,229,230,231,
1160 232,233,234,235,236,237,238,239,
1161 240,241,242,243,244,245,246,215,
1162 248,249,250,251,252,253,254,223,
1163 224,225,226,227,228,229,230,231,
1164 232,233,234,235,236,237,238,239,
1165 240,241,242,243,244,245,246,247,
1166 248,249,250,251,252,253,254,255,
1167 0,1,2,3,4,5,6,7,
1168 8,9,10,11,12,13,14,15,
1169 16,17,18,19,20,21,22,23,
1170 24,25,26,27,28,29,30,31,
1171 32,33,34,35,36,37,38,39,
1172 40,41,42,43,44,45,46,47,
1173 48,49,50,51,52,53,54,55,
1174 56,57,58,59,60,61,62,63,
1175 64,97,98,99,100,101,102,103,
1176 104,105,106,107,108,109,110,111,
1177 112,113,114,115,116,117,118,119,
1178 120,121,122,91,92,93,94,95,
1179 96,65,66,67,68,69,70,71,
1180 72,73,74,75,76,77,78,79,
1181 80,81,82,83,84,85,86,87,
1182 88,89,90,123,124,125,126,127,
1183 128,129,130,131,132,133,134,135,
1184 136,137,138,139,140,141,142,143,
1185 144,145,146,147,148,149,150,151,
1186 152,153,154,155,156,157,158,159,
1187 160,161,162,163,164,165,166,167,
1188 168,169,170,171,172,173,174,175,
1189 176,177,178,179,180,181,182,183,
1190 184,185,186,187,188,189,190,191,
1191 224,225,226,227,228,229,230,231,
1192 232,233,234,235,236,237,238,239,
1193 240,241,242,243,244,245,246,215,
1194 248,249,250,251,252,253,254,223,
1195 192,193,194,195,196,197,198,199,
1196 200,201,202,203,204,205,206,207,
1197 208,209,210,211,212,213,214,247,
1198 216,217,218,219,220,221,222,255,
1199 0,62,0,0,1,0,0,0,
1200 0,0,0,0,0,0,0,0,
1201 32,0,0,0,1,0,0,0,
1202 0,0,0,0,0,0,0,0,
1203 0,0,0,0,0,0,255,3,
1204 126,0,0,0,126,0,0,0,
1205 0,0,0,0,0,0,0,0,
1206 0,0,0,0,0,0,0,0,
1207 0,0,0,0,0,0,255,3,
1208 0,0,0,0,0,0,0,0,
1209 0,0,0,0,0,0,12,2,
1210 0,0,0,0,0,0,0,0,
1211 0,0,0,0,0,0,0,0,
1212 254,255,255,7,0,0,0,0,
1213 0,0,0,0,0,0,0,0,
1214 255,255,127,127,0,0,0,0,
1215 0,0,0,0,0,0,0,0,
1216 0,0,0,0,254,255,255,7,
1217 0,0,0,0,0,4,32,4,
1218 0,0,0,128,255,255,127,255,
1219 0,0,0,0,0,0,255,3,
1220 254,255,255,135,254,255,255,7,
1221 0,0,0,0,0,4,44,6,
1222 255,255,127,255,255,255,127,255,
1223 0,0,0,0,254,255,255,255,
1224 255,255,255,255,255,255,255,127,
1225 0,0,0,0,254,255,255,255,
1226 255,255,255,255,255,255,255,255,
1227 0,2,0,0,255,255,255,255,
1228 255,255,255,255,255,255,255,127,
1229 0,0,0,0,255,255,255,255,
1230 255,255,255,255,255,255,255,255,
1231 0,0,0,0,254,255,0,252,
1232 1,0,0,248,1,0,0,120,
1233 0,0,0,0,254,255,255,255,
1234 0,0,128,0,0,0,128,0,
1235 255,255,255,255,0,0,0,0,
1236 0,0,0,0,0,0,0,128,
1237 255,255,255,255,0,0,0,0,
1238 0,0,0,0,0,0,0,0,
1239 128,0,0,0,0,0,0,0,
1240 0,1,1,0,1,1,0,0,
1241 0,0,0,0,0,0,0,0,
1242 0,0,0,0,0,0,0,0,
1243 1,0,0,0,128,0,0,0,
1244 128,128,128,128,0,0,128,0,
1245 28,28,28,28,28,28,28,28,
1246 28,28,0,0,0,0,0,128,
1247 0,26,26,26,26,26,26,18,
1248 18,18,18,18,18,18,18,18,
1249 18,18,18,18,18,18,18,18,
1250 18,18,18,128,128,0,128,16,
1251 0,26,26,26,26,26,26,18,
1252 18,18,18,18,18,18,18,18,
1253 18,18,18,18,18,18,18,18,
1254 18,18,18,128,128,0,0,0,
1255 0,0,0,0,0,1,0,0,
1256 0,0,0,0,0,0,0,0,
1257 0,0,0,0,0,0,0,0,
1258 0,0,0,0,0,0,0,0,
1259 1,0,0,0,0,0,0,0,
1260 0,0,18,0,0,0,0,0,
1261 0,0,20,20,0,18,0,0,
1262 0,20,18,0,0,0,0,0,
1263 18,18,18,18,18,18,18,18,
1264 18,18,18,18,18,18,18,18,
1265 18,18,18,18,18,18,18,0,
1266 18,18,18,18,18,18,18,18,
1267 18,18,18,18,18,18,18,18,
1268 18,18,18,18,18,18,18,18,
1269 18,18,18,18,18,18,18,0,
1270 18,18,18,18,18,18,18,18
1271 };
1272
1273
1274
1275
1276 #ifndef HAVE_STRERROR
1277 /*************************************************
1278 * Provide strerror() for non-ANSI libraries *
1279 *************************************************/
1280
1281 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1282 in their libraries, but can provide the same facility by this simple
1283 alternative function. */
1284
1285 extern int sys_nerr;
1286 extern char *sys_errlist[];
1287
1288 char *
1289 strerror(int n)
1290 {
1291 if (n < 0 || n >= sys_nerr) return "unknown error number";
1292 return sys_errlist[n];
1293 }
1294 #endif /* HAVE_STRERROR */
1295
1296
1297
1298 /*************************************************
1299 * Print newline configuration *
1300 *************************************************/
1301
1302 /*
1303 Arguments:
1304 rc the return code from PCRE_CONFIG_NEWLINE
1305 isc TRUE if called from "-C newline"
1306 Returns: nothing
1307 */
1308
1309 static void
1310 print_newline_config(int rc, BOOL isc)
1311 {
1312 const char *s = NULL;
1313 if (!isc) printf(" Newline sequence is ");
1314 switch(rc)
1315 {
1316 case CHAR_CR: s = "CR"; break;
1317 case CHAR_LF: s = "LF"; break;
1318 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1319 case -1: s = "ANY"; break;
1320 case -2: s = "ANYCRLF"; break;
1321
1322 default:
1323 printf("a non-standard value: 0x%04x\n", rc);
1324 return;
1325 }
1326
1327 printf("%s\n", s);
1328 }
1329
1330
1331
1332 /*************************************************
1333 * JIT memory callback *
1334 *************************************************/
1335
1336 static pcre_jit_stack* jit_callback(void *arg)
1337 {
1338 jit_was_used = TRUE;
1339 return (pcre_jit_stack *)arg;
1340 }
1341
1342
1343 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1344 /*************************************************
1345 * Convert UTF-8 string to value *
1346 *************************************************/
1347
1348 /* This function takes one or more bytes that represents a UTF-8 character,
1349 and returns the value of the character.
1350
1351 Argument:
1352 utf8bytes a pointer to the byte vector
1353 vptr a pointer to an int to receive the value
1354
1355 Returns: > 0 => the number of bytes consumed
1356 -6 to 0 => malformed UTF-8 character at offset = (-return)
1357 */
1358
1359 static int
1360 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1361 {
1362 int c = *utf8bytes++;
1363 int d = c;
1364 int i, j, s;
1365
1366 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1367 {
1368 if ((d & 0x80) == 0) break;
1369 d <<= 1;
1370 }
1371
1372 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1373 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1374
1375 /* i now has a value in the range 1-5 */
1376
1377 s = 6*i;
1378 d = (c & utf8_table3[i]) << s;
1379
1380 for (j = 0; j < i; j++)
1381 {
1382 c = *utf8bytes++;
1383 if ((c & 0xc0) != 0x80) return -(j+1);
1384 s -= 6;
1385 d |= (c & 0x3f) << s;
1386 }
1387
1388 /* Check that encoding was the correct unique one */
1389
1390 for (j = 0; j < utf8_table1_size; j++)
1391 if (d <= utf8_table1[j]) break;
1392 if (j != i) return -(i+1);
1393
1394 /* Valid value */
1395
1396 *vptr = d;
1397 return i+1;
1398 }
1399 #endif /* NOUTF || SUPPORT_PCRE16 */
1400
1401
1402
1403 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1404 /*************************************************
1405 * Convert character value to UTF-8 *
1406 *************************************************/
1407
1408 /* This function takes an integer value in the range 0 - 0x7fffffff
1409 and encodes it as a UTF-8 character in 0 to 6 bytes.
1410
1411 Arguments:
1412 cvalue the character value
1413 utf8bytes pointer to buffer for result - at least 6 bytes long
1414
1415 Returns: number of characters placed in the buffer
1416 */
1417
1418 static int
1419 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1420 {
1421 register int i, j;
1422 for (i = 0; i < utf8_table1_size; i++)
1423 if (cvalue <= utf8_table1[i]) break;
1424 utf8bytes += i;
1425 for (j = i; j > 0; j--)
1426 {
1427 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1428 cvalue >>= 6;
1429 }
1430 *utf8bytes = utf8_table2[i] | cvalue;
1431 return i + 1;
1432 }
1433 #endif
1434
1435
1436 #ifdef SUPPORT_PCRE16
1437 /*************************************************
1438 * Convert a string to 16-bit *
1439 *************************************************/
1440
1441 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1442 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1443 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1444 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1445 result is always left in buffer16.
1446
1447 Note that this function does not object to surrogate values. This is
1448 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1449 for the purpose of testing that they are correctly faulted.
1450
1451 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1452 in UTF-8 so that values greater than 255 can be handled.
1453
1454 Arguments:
1455 data TRUE if converting a data line; FALSE for a regex
1456 p points to a byte string
1457 utf true if UTF-8 (to be converted to UTF-16)
1458 len number of bytes in the string (excluding trailing zero)
1459
1460 Returns: number of 16-bit data items used (excluding trailing zero)
1461 OR -1 if a UTF-8 string is malformed
1462 OR -2 if a value > 0x10ffff is encountered
1463 OR -3 if a value > 0xffff is encountered when not in UTF mode
1464 */
1465
1466 static int
1467 to16(int data, pcre_uint8 *p, int utf, int len)
1468 {
1469 pcre_uint16 *pp;
1470
1471 if (buffer16_size < 2*len + 2)
1472 {
1473 if (buffer16 != NULL) free(buffer16);
1474 buffer16_size = 2*len + 2;
1475 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1476 if (buffer16 == NULL)
1477 {
1478 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1479 exit(1);
1480 }
1481 }
1482
1483 pp = buffer16;
1484
1485 if (!utf && !data)
1486 {
1487 while (len-- > 0) *pp++ = *p++;
1488 }
1489
1490 else
1491 {
1492 int c = 0;
1493 while (len > 0)
1494 {
1495 int chlen = utf82ord(p, &c);
1496 if (chlen <= 0) return -1;
1497 if (c > 0x10ffff) return -2;
1498 p += chlen;
1499 len -= chlen;
1500 if (c < 0x10000) *pp++ = c; else
1501 {
1502 if (!utf) return -3;
1503 c -= 0x10000;
1504 *pp++ = 0xD800 | (c >> 10);
1505 *pp++ = 0xDC00 | (c & 0x3ff);
1506 }
1507 }
1508 }
1509
1510 *pp = 0;
1511 return pp - buffer16;
1512 }
1513 #endif
1514
1515 #ifdef SUPPORT_PCRE32
1516 /*************************************************
1517 * Convert a string to 32-bit *
1518 *************************************************/
1519
1520 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1521 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1522 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1523 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1524 result is always left in buffer32.
1525
1526 Note that this function does not object to surrogate values. This is
1527 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1528 for the purpose of testing that they are correctly faulted.
1529
1530 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1531 in UTF-8 so that values greater than 255 can be handled.
1532
1533 Arguments:
1534 data TRUE if converting a data line; FALSE for a regex
1535 p points to a byte string
1536 utf true if UTF-8 (to be converted to UTF-32)
1537 len number of bytes in the string (excluding trailing zero)
1538
1539 Returns: number of 32-bit data items used (excluding trailing zero)
1540 OR -1 if a UTF-8 string is malformed
1541 OR -2 if a value > 0x10ffff is encountered
1542 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1543 */
1544
1545 static int
1546 to32(int data, pcre_uint8 *p, int utf, int len)
1547 {
1548 pcre_uint32 *pp;
1549
1550 if (buffer32_size < 4*len + 4)
1551 {
1552 if (buffer32 != NULL) free(buffer32);
1553 buffer32_size = 4*len + 4;
1554 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1555 if (buffer32 == NULL)
1556 {
1557 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1558 exit(1);
1559 }
1560 }
1561
1562 pp = buffer32;
1563
1564 if (!utf && !data)
1565 {
1566 while (len-- > 0) *pp++ = *p++;
1567 }
1568
1569 else
1570 {
1571 int c = 0;
1572 while (len > 0)
1573 {
1574 int chlen = utf82ord(p, &c);
1575 if (chlen <= 0) return -1;
1576 if (utf)
1577 {
1578 if (c > 0x10ffff) return -2;
1579 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1580 }
1581
1582 p += chlen;
1583 len -= chlen;
1584 *pp++ = c;
1585 }
1586 }
1587
1588 *pp = 0;
1589 return pp - buffer32;
1590 }
1591 #endif
1592
1593 /*************************************************
1594 * Read or extend an input line *
1595 *************************************************/
1596
1597 /* Input lines are read into buffer, but both patterns and data lines can be
1598 continued over multiple input lines. In addition, if the buffer fills up, we
1599 want to automatically expand it so as to be able to handle extremely large
1600 lines that are needed for certain stress tests. When the input buffer is
1601 expanded, the other two buffers must also be expanded likewise, and the
1602 contents of pbuffer, which are a copy of the input for callouts, must be
1603 preserved (for when expansion happens for a data line). This is not the most
1604 optimal way of handling this, but hey, this is just a test program!
1605
1606 Arguments:
1607 f the file to read
1608 start where in buffer to start (this *must* be within buffer)
1609 prompt for stdin or readline()
1610
1611 Returns: pointer to the start of new data
1612 could be a copy of start, or could be moved
1613 NULL if no data read and EOF reached
1614 */
1615
1616 static pcre_uint8 *
1617 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1618 {
1619 pcre_uint8 *here = start;
1620
1621 for (;;)
1622 {
1623 size_t rlen = (size_t)(buffer_size - (here - buffer));
1624
1625 if (rlen > 1000)
1626 {
1627 int dlen;
1628
1629 /* If libreadline or libedit support is required, use readline() to read a
1630 line if the input is a terminal. Note that readline() removes the trailing
1631 newline, so we must put it back again, to be compatible with fgets(). */
1632
1633 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1634 if (isatty(fileno(f)))
1635 {
1636 size_t len;
1637 char *s = readline(prompt);
1638 if (s == NULL) return (here == start)? NULL : start;
1639 len = strlen(s);
1640 if (len > 0) add_history(s);
1641 if (len > rlen - 1) len = rlen - 1;
1642 memcpy(here, s, len);
1643 here[len] = '\n';
1644 here[len+1] = 0;
1645 free(s);
1646 }
1647 else
1648 #endif
1649
1650 /* Read the next line by normal means, prompting if the file is stdin. */
1651
1652 {
1653 if (f == stdin) printf("%s", prompt);
1654 if (fgets((char *)here, rlen, f) == NULL)
1655 return (here == start)? NULL : start;
1656 }
1657
1658 dlen = (int)strlen((char *)here);
1659 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1660 here += dlen;
1661 }
1662
1663 else
1664 {
1665 int new_buffer_size = 2*buffer_size;
1666 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1667 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1668 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1669
1670 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1671 {
1672 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1673 exit(1);
1674 }
1675
1676 memcpy(new_buffer, buffer, buffer_size);
1677 memcpy(new_pbuffer, pbuffer, buffer_size);
1678
1679 buffer_size = new_buffer_size;
1680
1681 start = new_buffer + (start - buffer);
1682 here = new_buffer + (here - buffer);
1683
1684 free(buffer);
1685 free(dbuffer);
1686 free(pbuffer);
1687
1688 buffer = new_buffer;
1689 dbuffer = new_dbuffer;
1690 pbuffer = new_pbuffer;
1691 }
1692 }
1693
1694 return NULL; /* Control never gets here */
1695 }
1696
1697
1698
1699 /*************************************************
1700 * Read number from string *
1701 *************************************************/
1702
1703 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1704 around with conditional compilation, just do the job by hand. It is only used
1705 for unpicking arguments, so just keep it simple.
1706
1707 Arguments:
1708 str string to be converted
1709 endptr where to put the end pointer
1710
1711 Returns: the unsigned long
1712 */
1713
1714 static int
1715 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1716 {
1717 int result = 0;
1718 while(*str != 0 && isspace(*str)) str++;
1719 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1720 *endptr = str;
1721 return(result);
1722 }
1723
1724
1725
1726 /*************************************************
1727 * Print one character *
1728 *************************************************/
1729
1730 /* Print a single character either literally, or as a hex escape. */
1731
1732 static int pchar(pcre_uint32 c, FILE *f)
1733 {
1734 if (PRINTOK(c))
1735 {
1736 if (f != NULL) fprintf(f, "%c", c);
1737 return 1;
1738 }
1739
1740 if (c < 0x100)
1741 {
1742 if (use_utf)
1743 {
1744 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1745 return 6;
1746 }
1747 else
1748 {
1749 if (f != NULL) fprintf(f, "\\x%02x", c);
1750 return 4;
1751 }
1752 }
1753
1754 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1755 return (c <= 0x000000ff)? 6 :
1756 (c <= 0x00000fff)? 7 :
1757 (c <= 0x0000ffff)? 8 :
1758 (c <= 0x000fffff)? 9 : 10;
1759 }
1760
1761
1762
1763 #ifdef SUPPORT_PCRE8
1764 /*************************************************
1765 * Print 8-bit character string *
1766 *************************************************/
1767
1768 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1769 If handed a NULL file, just counts chars without printing. */
1770
1771 static int pchars(pcre_uint8 *p, int length, FILE *f)
1772 {
1773 int c = 0;
1774 int yield = 0;
1775
1776 if (length < 0)
1777 length = strlen((char *)p);
1778
1779 while (length-- > 0)
1780 {
1781 #if !defined NOUTF
1782 if (use_utf)
1783 {
1784 int rc = utf82ord(p, &c);
1785 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1786 {
1787 length -= rc - 1;
1788 p += rc;
1789 yield += pchar(c, f);
1790 continue;
1791 }
1792 }
1793 #endif
1794 c = *p++;
1795 yield += pchar(c, f);
1796 }
1797
1798 return yield;
1799 }
1800 #endif
1801
1802
1803
1804 #ifdef SUPPORT_PCRE16
1805 /*************************************************
1806 * Find length of 0-terminated 16-bit string *
1807 *************************************************/
1808
1809 static int strlen16(PCRE_SPTR16 p)
1810 {
1811 int len = 0;
1812 while (*p++ != 0) len++;
1813 return len;
1814 }
1815 #endif /* SUPPORT_PCRE16 */
1816
1817
1818
1819 #ifdef SUPPORT_PCRE32
1820 /*************************************************
1821 * Find length of 0-terminated 32-bit string *
1822 *************************************************/
1823
1824 static int strlen32(PCRE_SPTR32 p)
1825 {
1826 int len = 0;
1827 while (*p++ != 0) len++;
1828 return len;
1829 }
1830 #endif /* SUPPORT_PCRE32 */
1831
1832
1833
1834 #ifdef SUPPORT_PCRE16
1835 /*************************************************
1836 * Print 16-bit character string *
1837 *************************************************/
1838
1839 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1840 If handed a NULL file, just counts chars without printing. */
1841
1842 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1843 {
1844 int yield = 0;
1845
1846 if (length < 0)
1847 length = strlen16(p);
1848
1849 while (length-- > 0)
1850 {
1851 pcre_uint32 c = *p++ & 0xffff;
1852 #if !defined NOUTF
1853 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1854 {
1855 int d = *p & 0xffff;
1856 if (d >= 0xDC00 && d < 0xDFFF)
1857 {
1858 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1859 length--;
1860 p++;
1861 }
1862 }
1863 #endif
1864 yield += pchar(c, f);
1865 }
1866
1867 return yield;
1868 }
1869 #endif /* SUPPORT_PCRE16 */
1870
1871
1872
1873 #ifdef SUPPORT_PCRE32
1874 /*************************************************
1875 * Print 32-bit character string *
1876 *************************************************/
1877
1878 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
1879 If handed a NULL file, just counts chars without printing. */
1880
1881 static int pchars32(PCRE_SPTR32 p, int length, FILE *f)
1882 {
1883 int yield = 0;
1884
1885 if (length < 0)
1886 length = strlen32(p);
1887
1888 while (length-- > 0)
1889 {
1890 pcre_uint32 c = *p++;
1891 yield += pchar(c, f);
1892 }
1893
1894 return yield;
1895 }
1896 #endif /* SUPPORT_PCRE32 */
1897
1898
1899
1900 #ifdef SUPPORT_PCRE8
1901 /*************************************************
1902 * Read a capture name (8-bit) and check it *
1903 *************************************************/
1904
1905 static pcre_uint8 *
1906 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1907 {
1908 pcre_uint8 *npp = *pp;
1909 while (isalnum(*p)) *npp++ = *p++;
1910 *npp++ = 0;
1911 *npp = 0;
1912 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1913 {
1914 fprintf(outfile, "no parentheses with name \"");
1915 PCHARSV(*pp, 0, -1, outfile);
1916 fprintf(outfile, "\"\n");
1917 }
1918
1919 *pp = npp;
1920 return p;
1921 }
1922 #endif /* SUPPORT_PCRE8 */
1923
1924
1925
1926 #ifdef SUPPORT_PCRE16
1927 /*************************************************
1928 * Read a capture name (16-bit) and check it *
1929 *************************************************/
1930
1931 /* Note that the text being read is 8-bit. */
1932
1933 static pcre_uint8 *
1934 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1935 {
1936 pcre_uint16 *npp = *pp;
1937 while (isalnum(*p)) *npp++ = *p++;
1938 *npp++ = 0;
1939 *npp = 0;
1940 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1941 {
1942 fprintf(outfile, "no parentheses with name \"");
1943 PCHARSV(*pp, 0, -1, outfile);
1944 fprintf(outfile, "\"\n");
1945 }
1946 *pp = npp;
1947 return p;
1948 }
1949 #endif /* SUPPORT_PCRE16 */
1950
1951
1952
1953 #ifdef SUPPORT_PCRE32
1954 /*************************************************
1955 * Read a capture name (32-bit) and check it *
1956 *************************************************/
1957
1958 /* Note that the text being read is 8-bit. */
1959
1960 static pcre_uint8 *
1961 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
1962 {
1963 pcre_uint32 *npp = *pp;
1964 while (isalnum(*p)) *npp++ = *p++;
1965 *npp++ = 0;
1966 *npp = 0;
1967 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
1968 {
1969 fprintf(outfile, "no parentheses with name \"");
1970 PCHARSV(*pp, 0, -1, outfile);
1971 fprintf(outfile, "\"\n");
1972 }
1973 *pp = npp;
1974 return p;
1975 }
1976 #endif /* SUPPORT_PCRE32 */
1977
1978
1979
1980 /*************************************************
1981 * Callout function *
1982 *************************************************/
1983
1984 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1985 the match. Yield zero unless more callouts than the fail count, or the callout
1986 data is not zero. */
1987
1988 static int callout(pcre_callout_block *cb)
1989 {
1990 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1991 int i, pre_start, post_start, subject_length;
1992
1993 if (callout_extra)
1994 {
1995 fprintf(f, "Callout %d: last capture = %d\n",
1996 cb->callout_number, cb->capture_last);
1997
1998 for (i = 0; i < cb->capture_top * 2; i += 2)
1999 {
2000 if (cb->offset_vector[i] < 0)
2001 fprintf(f, "%2d: <unset>\n", i/2);
2002 else
2003 {
2004 fprintf(f, "%2d: ", i/2);
2005 PCHARSV(cb->subject, cb->offset_vector[i],
2006 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2007 fprintf(f, "\n");
2008 }
2009 }
2010 }
2011
2012 /* Re-print the subject in canonical form, the first time or if giving full
2013 datails. On subsequent calls in the same match, we use pchars just to find the
2014 printed lengths of the substrings. */
2015
2016 if (f != NULL) fprintf(f, "--->");
2017
2018 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2019 PCHARS(post_start, cb->subject, cb->start_match,
2020 cb->current_position - cb->start_match, f);
2021
2022 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2023
2024 PCHARSV(cb->subject, cb->current_position,
2025 cb->subject_length - cb->current_position, f);
2026
2027 if (f != NULL) fprintf(f, "\n");
2028
2029 /* Always print appropriate indicators, with callout number if not already
2030 shown. For automatic callouts, show the pattern offset. */
2031
2032 if (cb->callout_number == 255)
2033 {
2034 fprintf(outfile, "%+3d ", cb->pattern_position);
2035 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2036 }
2037 else
2038 {
2039 if (callout_extra) fprintf(outfile, " ");
2040 else fprintf(outfile, "%3d ", cb->callout_number);
2041 }
2042
2043 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2044 fprintf(outfile, "^");
2045
2046 if (post_start > 0)
2047 {
2048 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2049 fprintf(outfile, "^");
2050 }
2051
2052 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2053 fprintf(outfile, " ");
2054
2055 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2056 pbuffer + cb->pattern_position);
2057
2058 fprintf(outfile, "\n");
2059 first_callout = 0;
2060
2061 if (cb->mark != last_callout_mark)
2062 {
2063 if (cb->mark == NULL)
2064 fprintf(outfile, "Latest Mark: <unset>\n");
2065 else
2066 {
2067 fprintf(outfile, "Latest Mark: ");
2068 PCHARSV(cb->mark, 0, -1, outfile);
2069 putc('\n', outfile);
2070 }
2071 last_callout_mark = cb->mark;
2072 }
2073
2074 if (cb->callout_data != NULL)
2075 {
2076 int callout_data = *((int *)(cb->callout_data));
2077 if (callout_data != 0)
2078 {
2079 fprintf(outfile, "Callout data = %d\n", callout_data);
2080 return callout_data;
2081 }
2082 }
2083
2084 return (cb->callout_number != callout_fail_id)? 0 :
2085 (++callout_count >= callout_fail_count)? 1 : 0;
2086 }
2087
2088
2089 /*************************************************
2090 * Local malloc functions *
2091 *************************************************/
2092
2093 /* Alternative malloc function, to test functionality and save the size of a
2094 compiled re, which is the first store request that pcre_compile() makes. The
2095 show_malloc variable is set only during matching. */
2096
2097 static void *new_malloc(size_t size)
2098 {
2099 void *block = malloc(size);
2100 gotten_store = size;
2101 if (first_gotten_store == 0) first_gotten_store = size;
2102 if (show_malloc)
2103 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2104 return block;
2105 }
2106
2107 static void new_free(void *block)
2108 {
2109 if (show_malloc)
2110 fprintf(outfile, "free %p\n", block);
2111 free(block);
2112 }
2113
2114 /* For recursion malloc/free, to test stacking calls */
2115
2116 static void *stack_malloc(size_t size)
2117 {
2118 void *block = malloc(size);
2119 if (show_malloc)
2120 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2121 return block;
2122 }
2123
2124 static void stack_free(void *block)
2125 {
2126 if (show_malloc)
2127 fprintf(outfile, "stack_free %p\n", block);
2128 free(block);
2129 }
2130
2131
2132 /*************************************************
2133 * Call pcre_fullinfo() *
2134 *************************************************/
2135
2136 /* Get one piece of information from the pcre_fullinfo() function. When only
2137 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2138 value, but the code is defensive.
2139
2140 Arguments:
2141 re compiled regex
2142 study study data
2143 option PCRE_INFO_xxx option
2144 ptr where to put the data
2145
2146 Returns: 0 when OK, < 0 on error
2147 */
2148
2149 static int
2150 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2151 {
2152 int rc;
2153
2154 if (pcre_mode == PCRE32_MODE)
2155 #ifdef SUPPORT_PCRE32
2156 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2157 #else
2158 rc = PCRE_ERROR_BADMODE;
2159 #endif
2160 else if (pcre_mode == PCRE16_MODE)
2161 #ifdef SUPPORT_PCRE16
2162 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2163 #else
2164 rc = PCRE_ERROR_BADMODE;
2165 #endif
2166 else
2167 #ifdef SUPPORT_PCRE8
2168 rc = pcre_fullinfo(re, study, option, ptr);
2169 #else
2170 rc = PCRE_ERROR_BADMODE;
2171 #endif
2172
2173 if (rc < 0)
2174 {
2175 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2176 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2177 if (rc == PCRE_ERROR_BADMODE)
2178 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2179 "%d-bit mode\n", 8 * CHAR_SIZE,
2180 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2181 }
2182
2183 return rc;
2184 }
2185
2186
2187
2188 /*************************************************
2189 * Swap byte functions *
2190 *************************************************/
2191
2192 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2193 value, respectively.
2194
2195 Arguments:
2196 value any number
2197
2198 Returns: the byte swapped value
2199 */
2200
2201 static pcre_uint32
2202 swap_uint32(pcre_uint32 value)
2203 {
2204 return ((value & 0x000000ff) << 24) |
2205 ((value & 0x0000ff00) << 8) |
2206 ((value & 0x00ff0000) >> 8) |
2207 (value >> 24);
2208 }
2209
2210 static pcre_uint16
2211 swap_uint16(pcre_uint16 value)
2212 {
2213 return (value >> 8) | (value << 8);
2214 }
2215
2216
2217
2218 /*************************************************
2219 * Flip bytes in a compiled pattern *
2220 *************************************************/
2221
2222 /* This function is called if the 'F' option was present on a pattern that is
2223 to be written to a file. We flip the bytes of all the integer fields in the
2224 regex data block and the study block. In 16-bit mode this also flips relevant
2225 bytes in the pattern itself. This is to make it possible to test PCRE's
2226 ability to reload byte-flipped patterns, e.g. those compiled on a different
2227 architecture. */
2228
2229 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2230 static void
2231 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2232 {
2233 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2234 #ifdef SUPPORT_PCRE16
2235 int op;
2236 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2237 int length = re->name_count * re->name_entry_size;
2238 #ifdef SUPPORT_UTF
2239 BOOL utf = (re->options & PCRE_UTF16) != 0;
2240 BOOL utf16_char = FALSE;
2241 #endif /* SUPPORT_UTF */
2242 #endif /* SUPPORT_PCRE16 */
2243
2244 /* Always flip the bytes in the main data block and study blocks. */
2245
2246 re->magic_number = REVERSED_MAGIC_NUMBER;
2247 re->size = swap_uint32(re->size);
2248 re->options = swap_uint32(re->options);
2249 re->flags = swap_uint16(re->flags);
2250 re->top_bracket = swap_uint16(re->top_bracket);
2251 re->top_backref = swap_uint16(re->top_backref);
2252 re->first_char = swap_uint16(re->first_char);
2253 re->req_char = swap_uint16(re->req_char);
2254 re->name_table_offset = swap_uint16(re->name_table_offset);
2255 re->name_entry_size = swap_uint16(re->name_entry_size);
2256 re->name_count = swap_uint16(re->name_count);
2257
2258 if (extra != NULL)
2259 {
2260 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2261 rsd->size = swap_uint32(rsd->size);
2262 rsd->flags = swap_uint32(rsd->flags);
2263 rsd->minlength = swap_uint32(rsd->minlength);
2264 }
2265
2266 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2267 in the name table, if present, and then in the pattern itself. */
2268
2269 #ifdef SUPPORT_PCRE16
2270 if (pcre_mode != PCRE16_MODE) return;
2271
2272 while(TRUE)
2273 {
2274 /* Swap previous characters. */
2275 while (length-- > 0)
2276 {
2277 *ptr = swap_uint16(*ptr);
2278 ptr++;
2279 }
2280 #ifdef SUPPORT_UTF
2281 if (utf16_char)
2282 {
2283 if ((ptr[-1] & 0xfc00) == 0xd800)
2284 {
2285 /* We know that there is only one extra character in UTF-16. */
2286 *ptr = swap_uint16(*ptr);
2287 ptr++;
2288 }
2289 }
2290 utf16_char = FALSE;
2291 #endif /* SUPPORT_UTF */
2292
2293 /* Get next opcode. */
2294
2295 length = 0;
2296 op = *ptr;
2297 *ptr++ = swap_uint16(op);
2298
2299 switch (op)
2300 {
2301 case OP_END:
2302 return;
2303
2304 #ifdef SUPPORT_UTF
2305 case OP_CHAR:
2306 case OP_CHARI:
2307 case OP_NOT:
2308 case OP_NOTI:
2309 case OP_STAR:
2310 case OP_MINSTAR:
2311 case OP_PLUS:
2312 case OP_MINPLUS:
2313 case OP_QUERY:
2314 case OP_MINQUERY:
2315 case OP_UPTO:
2316 case OP_MINUPTO:
2317 case OP_EXACT:
2318 case OP_POSSTAR:
2319 case OP_POSPLUS:
2320 case OP_POSQUERY:
2321 case OP_POSUPTO:
2322 case OP_STARI:
2323 case OP_MINSTARI:
2324 case OP_PLUSI:
2325 case OP_MINPLUSI:
2326 case OP_QUERYI:
2327 case OP_MINQUERYI:
2328 case OP_UPTOI:
2329 case OP_MINUPTOI:
2330 case OP_EXACTI:
2331 case OP_POSSTARI:
2332 case OP_POSPLUSI:
2333 case OP_POSQUERYI:
2334 case OP_POSUPTOI:
2335 case OP_NOTSTAR:
2336 case OP_NOTMINSTAR:
2337 case OP_NOTPLUS:
2338 case OP_NOTMINPLUS:
2339 case OP_NOTQUERY:
2340 case OP_NOTMINQUERY:
2341 case OP_NOTUPTO:
2342 case OP_NOTMINUPTO:
2343 case OP_NOTEXACT:
2344 case OP_NOTPOSSTAR:
2345 case OP_NOTPOSPLUS:
2346 case OP_NOTPOSQUERY:
2347 case OP_NOTPOSUPTO:
2348 case OP_NOTSTARI:
2349 case OP_NOTMINSTARI:
2350 case OP_NOTPLUSI:
2351 case OP_NOTMINPLUSI:
2352 case OP_NOTQUERYI:
2353 case OP_NOTMINQUERYI:
2354 case OP_NOTUPTOI:
2355 case OP_NOTMINUPTOI:
2356 case OP_NOTEXACTI:
2357 case OP_NOTPOSSTARI:
2358 case OP_NOTPOSPLUSI:
2359 case OP_NOTPOSQUERYI:
2360 case OP_NOTPOSUPTOI:
2361 if (utf) utf16_char = TRUE;
2362 #endif
2363 /* Fall through. */
2364
2365 default:
2366 length = OP_lengths16[op] - 1;
2367 break;
2368
2369 case OP_CLASS:
2370 case OP_NCLASS:
2371 /* Skip the character bit map. */
2372 ptr += 32/sizeof(pcre_uint16);
2373 length = 0;
2374 break;
2375
2376 case OP_XCLASS:
2377 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2378 if (LINK_SIZE > 1)
2379 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2380 - (1 + LINK_SIZE + 1));
2381 else
2382 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2383
2384 /* Reverse the size of the XCLASS instance. */
2385 *ptr = swap_uint16(*ptr);
2386 ptr++;
2387 if (LINK_SIZE > 1)
2388 {
2389 *ptr = swap_uint16(*ptr);
2390 ptr++;
2391 }
2392
2393 op = *ptr;
2394 *ptr = swap_uint16(op);
2395 ptr++;
2396 if ((op & XCL_MAP) != 0)
2397 {
2398 /* Skip the character bit map. */
2399 ptr += 32/sizeof(pcre_uint16);
2400 length -= 32/sizeof(pcre_uint16);
2401 }
2402 break;
2403 }
2404 }
2405 /* Control should never reach here in 16 bit mode. */
2406 #endif /* SUPPORT_PCRE16 */
2407 }
2408 #endif /* SUPPORT_PCRE[8|16] */
2409
2410
2411
2412 #if defined SUPPORT_PCRE32
2413 static void
2414 regexflip_32(pcre *ere, pcre_extra *extra)
2415 {
2416 real_pcre32 *re = (real_pcre32 *)ere;
2417 int op;
2418 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2419 int length = re->name_count * re->name_entry_size;
2420 #ifdef SUPPORT_UTF
2421 BOOL utf = (re->options & PCRE_UTF32) != 0;
2422 #endif /* SUPPORT_UTF */
2423
2424 /* Always flip the bytes in the main data block and study blocks. */
2425
2426 re->magic_number = REVERSED_MAGIC_NUMBER;
2427 re->size = swap_uint32(re->size);
2428 re->options = swap_uint32(re->options);
2429 re->flags = swap_uint16(re->flags);
2430 re->top_bracket = swap_uint16(re->top_bracket);
2431 re->top_backref = swap_uint16(re->top_backref);
2432 re->first_char = swap_uint32(re->first_char);
2433 re->req_char = swap_uint32(re->req_char);
2434 re->name_table_offset = swap_uint16(re->name_table_offset);
2435 re->name_entry_size = swap_uint16(re->name_entry_size);
2436 re->name_count = swap_uint16(re->name_count);
2437
2438 if (extra != NULL)
2439 {
2440 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2441 rsd->size = swap_uint32(rsd->size);
2442 rsd->flags = swap_uint32(rsd->flags);
2443 rsd->minlength = swap_uint32(rsd->minlength);
2444 }
2445
2446 /* In 32-bit mode we must swap bytes
2447 in the name table, if present, and then in the pattern itself. */
2448
2449 while(TRUE)
2450 {
2451 /* Swap previous characters. */
2452 while (length-- > 0)
2453 {
2454 *ptr = swap_uint32(*ptr);
2455 ptr++;
2456 }
2457
2458 /* Get next opcode. */
2459
2460 length = 0;
2461 op = *ptr;
2462 *ptr++ = swap_uint32(op);
2463
2464 switch (op)
2465 {
2466 case OP_END:
2467 return;
2468
2469 default:
2470 length = OP_lengths32[op] - 1;
2471 break;
2472
2473 case OP_CLASS:
2474 case OP_NCLASS:
2475 /* Skip the character bit map. */
2476 ptr += 32/sizeof(pcre_uint32);
2477 length = 0;
2478 break;
2479
2480 case OP_XCLASS:
2481 /* LINK_SIZE can only be 1 in 32-bit mode. */
2482 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2483
2484 /* Reverse the size of the XCLASS instance. */
2485 *ptr = swap_uint32(*ptr);
2486 ptr++;
2487
2488 op = *ptr;
2489 *ptr = swap_uint32(op);
2490 ptr++;
2491 if ((op & XCL_MAP) != 0)
2492 {
2493 /* Skip the character bit map. */
2494 ptr += 32/sizeof(pcre_uint32);
2495 length -= 32/sizeof(pcre_uint32);
2496 }
2497 break;
2498 }
2499 }
2500 /* Control should never reach here in 32 bit mode. */
2501 }
2502
2503 #endif /* SUPPORT_PCRE32 */
2504
2505
2506
2507 static void
2508 regexflip(pcre *ere, pcre_extra *extra)
2509 {
2510 #if defined SUPPORT_PCRE32
2511 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2512 regexflip_32(ere, extra);
2513 #endif
2514 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2515 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2516 regexflip8_or_16(ere, extra);
2517 #endif
2518 }
2519
2520
2521
2522 /*************************************************
2523 * Check match or recursion limit *
2524 *************************************************/
2525
2526 static int
2527 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2528 int start_offset, int options, int *use_offsets, int use_size_offsets,
2529 int flag, unsigned long int *limit, int errnumber, const char *msg)
2530 {
2531 int count;
2532 int min = 0;
2533 int mid = 64;
2534 int max = -1;
2535
2536 extra->flags |= flag;
2537
2538 for (;;)
2539 {
2540 *limit = mid;
2541
2542 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2543 use_offsets, use_size_offsets);
2544
2545 if (count == errnumber)
2546 {
2547 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2548 min = mid;
2549 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2550 }
2551
2552 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2553 count == PCRE_ERROR_PARTIAL)
2554 {
2555 if (mid == min + 1)
2556 {
2557 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2558 break;
2559 }
2560 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2561 max = mid;
2562 mid = (min + mid)/2;
2563 }
2564 else break; /* Some other error */
2565 }
2566
2567 extra->flags &= ~flag;
2568 return count;
2569 }
2570
2571
2572
2573 /*************************************************
2574 * Case-independent strncmp() function *
2575 *************************************************/
2576
2577 /*
2578 Arguments:
2579 s first string
2580 t second string
2581 n number of characters to compare
2582
2583 Returns: < 0, = 0, or > 0, according to the comparison
2584 */
2585
2586 static int
2587 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2588 {
2589 while (n--)
2590 {
2591 int c = tolower(*s++) - tolower(*t++);
2592 if (c) return c;
2593 }
2594 return 0;
2595 }
2596
2597
2598
2599 /*************************************************
2600 * Check newline indicator *
2601 *************************************************/
2602
2603 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2604 a message and return 0 if there is no match.
2605
2606 Arguments:
2607 p points after the leading '<'
2608 f file for error message
2609
2610 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2611 */
2612
2613 static int
2614 check_newline(pcre_uint8 *p, FILE *f)
2615 {
2616 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2617 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2618 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2619 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2620 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2621 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2622 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2623 fprintf(f, "Unknown newline type at: <%s\n", p);
2624 return 0;
2625 }
2626
2627
2628
2629 /*************************************************
2630 * Usage function *
2631 *************************************************/
2632
2633 static void
2634 usage(void)
2635 {
2636 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2637 printf("Input and output default to stdin and stdout.\n");
2638 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2639 printf("If input is a terminal, readline() is used to read from it.\n");
2640 #else
2641 printf("This version of pcretest is not linked with readline().\n");
2642 #endif
2643 printf("\nOptions:\n");
2644 #ifdef SUPPORT_PCRE16
2645 printf(" -16 use the 16-bit library\n");
2646 #endif
2647 #ifdef SUPPORT_PCRE32
2648 printf(" -32 use the 32-bit library\n");
2649 #endif
2650 printf(" -b show compiled code\n");
2651 printf(" -C show PCRE compile-time options and exit\n");
2652 printf(" -C arg show a specific compile-time option\n");
2653 printf(" and exit with its value. The arg can be:\n");
2654 printf(" linksize internal link size [2, 3, 4]\n");
2655 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2656 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2657 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2658 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2659 printf(" ucp Unicode Properties supported [0, 1]\n");
2660 printf(" jit Just-in-time compiler supported [0, 1]\n");
2661 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2662 printf(" -d debug: show compiled code and information (-b and -i)\n");
2663 #if !defined NODFA
2664 printf(" -dfa force DFA matching for all subjects\n");
2665 #endif
2666 printf(" -help show usage information\n");
2667 printf(" -i show information about compiled patterns\n"
2668 " -M find MATCH_LIMIT minimum for each subject\n"
2669 " -m output memory used information\n"
2670 " -o <n> set size of offsets vector to <n>\n");
2671 #if !defined NOPOSIX
2672 printf(" -p use POSIX interface\n");
2673 #endif
2674 printf(" -q quiet: do not output PCRE version number at start\n");
2675 printf(" -S <n> set stack size to <n> megabytes\n");
2676 printf(" -s force each pattern to be studied at basic level\n"
2677 " -s+ force each pattern to be studied, using JIT if available\n"
2678 " -s++ ditto, verifying when JIT was actually used\n"
2679 " -s+n force each pattern to be studied, using JIT if available,\n"
2680 " where 1 <= n <= 7 selects JIT options\n"
2681 " -s++n ditto, verifying when JIT was actually used\n"
2682 " -t time compilation and execution\n");
2683 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2684 printf(" -tm time execution (matching) only\n");
2685 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2686 }
2687
2688
2689
2690 /*************************************************
2691 * Main Program *
2692 *************************************************/
2693
2694 /* Read lines from named file or stdin and write to named file or stdout; lines
2695 consist of a regular expression, in delimiters and optionally followed by
2696 options, followed by a set of test data, terminated by an empty line. */
2697
2698 int main(int argc, char **argv)
2699 {
2700 FILE *infile = stdin;
2701 const char *version;
2702 int options = 0;
2703 int study_options = 0;
2704 int default_find_match_limit = FALSE;
2705 int op = 1;
2706 int timeit = 0;
2707 int timeitm = 0;
2708 int showinfo = 0;
2709 int showstore = 0;
2710 int force_study = -1;
2711 int force_study_options = 0;
2712 int quiet = 0;
2713 int size_offsets = 45;
2714 int size_offsets_max;
2715 int *offsets = NULL;
2716 int debug = 0;
2717 int done = 0;
2718 int all_use_dfa = 0;
2719 int verify_jit = 0;
2720 int yield = 0;
2721 int stack_size;
2722
2723 #if !defined NOPOSIX
2724 int posix = 0;
2725 #endif
2726 #if !defined NODFA
2727 int *dfa_workspace = NULL;
2728 #endif
2729
2730 pcre_jit_stack *jit_stack = NULL;
2731
2732 /* These vectors store, end-to-end, a list of zero-terminated captured
2733 substring names, each list itself being terminated by an empty name. Assume
2734 that 1024 is plenty long enough for the few names we'll be testing. It is
2735 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2736 for the actual memory, to ensure alignment. */
2737
2738 pcre_uint32 copynames[1024];
2739 pcre_uint32 getnames[1024];
2740
2741 #ifdef SUPPORT_PCRE32
2742 pcre_uint32 *cn32ptr;
2743 pcre_uint32 *gn32ptr;
2744 #endif
2745
2746 #ifdef SUPPORT_PCRE16
2747 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2748 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2749 pcre_uint16 *cn16ptr;
2750 pcre_uint16 *gn16ptr;
2751 #endif
2752
2753 #ifdef SUPPORT_PCRE8
2754 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2755 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2756 pcre_uint8 *cn8ptr;
2757 pcre_uint8 *gn8ptr;
2758 #endif
2759
2760 /* Get buffers from malloc() so that valgrind will check their misuse when
2761 debugging. They grow automatically when very long lines are read. The 16-
2762 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2763
2764 buffer = (pcre_uint8 *)malloc(buffer_size);
2765 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2766 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2767
2768 /* The outfile variable is static so that new_malloc can use it. */
2769
2770 outfile = stdout;
2771
2772 /* The following _setmode() stuff is some Windows magic that tells its runtime
2773 library to translate CRLF into a single LF character. At least, that's what
2774 I've been told: never having used Windows I take this all on trust. Originally
2775 it set 0x8000, but then I was advised that _O_BINARY was better. */
2776
2777 #if defined(_WIN32) || defined(WIN32)
2778 _setmode( _fileno( stdout ), _O_BINARY );
2779 #endif
2780
2781 /* Get the version number: both pcre_version() and pcre16_version() give the
2782 same answer. We just need to ensure that we call one that is available. */
2783
2784 #if defined SUPPORT_PCRE8
2785 version = pcre_version();
2786 #elif defined SUPPORT_PCRE16
2787 version = pcre16_version();
2788 #elif defined SUPPORT_PCRE32
2789 version = pcre32_version();
2790 #endif
2791
2792 /* Scan options */
2793
2794 while (argc > 1 && argv[op][0] == '-')
2795 {
2796 pcre_uint8 *endptr;
2797 char *arg = argv[op];
2798
2799 if (strcmp(arg, "-m") == 0) showstore = 1;
2800 else if (strcmp(arg, "-s") == 0) force_study = 0;
2801
2802 else if (strncmp(arg, "-s+", 3) == 0)
2803 {
2804 arg += 3;
2805 if (*arg == '+') { arg++; verify_jit = TRUE; }
2806 force_study = 1;
2807 if (*arg == 0)
2808 force_study_options = jit_study_bits[6];
2809 else if (*arg >= '1' && *arg <= '7')
2810 force_study_options = jit_study_bits[*arg - '1'];
2811 else goto BAD_ARG;
2812 }
2813 else if (strcmp(arg, "-16") == 0)
2814 {
2815 #ifdef SUPPORT_PCRE16
2816 pcre_mode = PCRE16_MODE;
2817 #else
2818 printf("** This version of PCRE was built without 16-bit support\n");
2819 exit(1);
2820 #endif
2821 }
2822 else if (strcmp(arg, "-32") == 0)
2823 {
2824 #ifdef SUPPORT_PCRE32
2825 pcre_mode = PCRE32_MODE;
2826 #else
2827 printf("** This version of PCRE was built without 32-bit support\n");
2828 exit(1);
2829 #endif
2830 }
2831 else if (strcmp(arg, "-q") == 0) quiet = 1;
2832 else if (strcmp(arg, "-b") == 0) debug = 1;
2833 else if (strcmp(arg, "-i") == 0) showinfo = 1;
2834 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2835 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2836 #if !defined NODFA
2837 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2838 #endif
2839 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2840 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2841 *endptr == 0))
2842 {
2843 op++;
2844 argc--;
2845 }
2846 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2847 {
2848 int both = arg[2] == 0;
2849 int temp;
2850 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2851 *endptr == 0))
2852 {
2853 timeitm = temp;
2854 op++;
2855 argc--;
2856 }
2857 else timeitm = LOOPREPEAT;
2858 if (both) timeit = timeitm;
2859 }
2860 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2861 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2862 *endptr == 0))
2863 {
2864 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2865 printf("PCRE: -S not supported on this OS\n");
2866 exit(1);
2867 #else
2868 int rc;
2869 struct rlimit rlim;
2870 getrlimit(RLIMIT_STACK, &rlim);
2871 rlim.rlim_cur = stack_size * 1024 * 1024;
2872 rc = setrlimit(RLIMIT_STACK, &rlim);
2873 if (rc != 0)
2874 {
2875 printf("PCRE: setrlimit() failed with error %d\n", rc);
2876 exit(1);
2877 }
2878 op++;
2879 argc--;
2880 #endif
2881 }
2882 #if !defined NOPOSIX
2883 else if (strcmp(arg, "-p") == 0) posix = 1;
2884 #endif
2885 else if (strcmp(arg, "-C") == 0)
2886 {
2887 int rc;
2888 unsigned long int lrc;
2889
2890 if (argc > 2)
2891 {
2892 if (strcmp(argv[op + 1], "linksize") == 0)
2893 {
2894 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2895 printf("%d\n", rc);
2896 yield = rc;
2897 }
2898 else if (strcmp(argv[op + 1], "pcre8") == 0)
2899 {
2900 #ifdef SUPPORT_PCRE8
2901 printf("1\n");
2902 yield = 1;
2903 #else
2904 printf("0\n");
2905 yield = 0;
2906 #endif
2907 }
2908 else if (strcmp(argv[op + 1], "pcre16") == 0)
2909 {
2910 #ifdef SUPPORT_PCRE16
2911 printf("1\n");
2912 yield = 1;
2913 #else
2914 printf("0\n");
2915 yield = 0;
2916 #endif
2917 }
2918 else if (strcmp(argv[op + 1], "pcre32") == 0)
2919 {
2920 #ifdef SUPPORT_PCRE32
2921 printf("1\n");
2922 yield = 1;
2923 #else
2924 printf("0\n");
2925 yield = 0;
2926 #endif
2927 goto EXIT;
2928 }
2929 if (strcmp(argv[op + 1], "utf") == 0)
2930 {
2931 #ifdef SUPPORT_PCRE8
2932 if (pcre_mode == PCRE8_MODE)
2933 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2934 #endif
2935 #ifdef SUPPORT_PCRE16
2936 if (pcre_mode == PCRE16_MODE)
2937 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2938 #endif
2939 #ifdef SUPPORT_PCRE32
2940 if (pcre_mode == PCRE32_MODE)
2941 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
2942 #endif
2943 printf("%d\n", rc);
2944 yield = rc;
2945 goto EXIT;
2946 }
2947 else if (strcmp(argv[op + 1], "ucp") == 0)
2948 {
2949 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2950 printf("%d\n", rc);
2951 yield = rc;
2952 }
2953 else if (strcmp(argv[op + 1], "jit") == 0)
2954 {
2955 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2956 printf("%d\n", rc);
2957 yield = rc;
2958 }
2959 else if (strcmp(argv[op + 1], "newline") == 0)
2960 {
2961 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2962 print_newline_config(rc, TRUE);
2963 }
2964 else if (strcmp(argv[op + 1], "ebcdic") == 0)
2965 {
2966 #ifdef EBCDIC
2967 printf("1\n");
2968 yield = 1;
2969 #else
2970 printf("0\n");
2971 #endif
2972 }
2973 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2974 {
2975 #ifdef EBCDIC
2976 printf("0x%02x\n", CHAR_LF);
2977 #else
2978 printf("0\n");
2979 #endif
2980 }
2981 else
2982 {
2983 printf("Unknown -C option: %s\n", argv[op + 1]);
2984 }
2985 goto EXIT;
2986 }
2987
2988 /* No argument for -C: output all configuration information. */
2989
2990 printf("PCRE version %s\n", version);
2991 printf("Compiled with\n");
2992
2993 #ifdef EBCDIC
2994 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2995 #endif
2996
2997 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2998 are set, either both UTFs are supported or both are not supported. */
2999
3000 #ifdef SUPPORT_PCRE8
3001 printf(" 8-bit support\n");
3002 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3003 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3004 #endif
3005 #ifdef SUPPORT_PCRE16
3006 printf(" 16-bit support\n");
3007 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3008 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3009 #endif
3010 #ifdef SUPPORT_PCRE32
3011 printf(" 32-bit support\n");
3012 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3013 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3014 #endif
3015
3016 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3017 printf(" %sUnicode properties support\n", rc? "" : "No ");
3018 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3019 if (rc)
3020 {
3021 const char *arch;
3022 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3023 printf(" Just-in-time compiler support: %s\n", arch);
3024 }
3025 else
3026 printf(" No just-in-time compiler support\n");
3027 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3028 print_newline_config(rc, FALSE);
3029 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3030 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3031 "all Unicode newlines");
3032 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3033 printf(" Internal link size = %d\n", rc);
3034 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3035 printf(" POSIX malloc threshold = %d\n", rc);
3036 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3037 printf(" Default match limit = %ld\n", lrc);
3038 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3039 printf(" Default recursion depth limit = %ld\n", lrc);
3040 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3041 printf(" Match recursion uses %s", rc? "stack" : "heap");
3042 if (showstore)
3043 {
3044 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3045 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3046 }
3047 printf("\n");
3048 goto EXIT;
3049 }
3050 else if (strcmp(arg, "-help") == 0 ||
3051 strcmp(arg, "--help") == 0)
3052 {
3053 usage();
3054 goto EXIT;
3055 }
3056 else
3057 {
3058 BAD_ARG:
3059 printf("** Unknown or malformed option %s\n", arg);
3060 usage();
3061 yield = 1;
3062 goto EXIT;
3063 }
3064 op++;
3065 argc--;
3066 }
3067
3068 /* Get the store for the offsets vector, and remember what it was */
3069
3070 size_offsets_max = size_offsets;
3071 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3072 if (offsets == NULL)
3073 {
3074 printf("** Failed to get %d bytes of memory for offsets vector\n",
3075 (int)(size_offsets_max * sizeof(int)));
3076 yield = 1;
3077 goto EXIT;
3078 }
3079
3080 /* Sort out the input and output files */
3081
3082 if (argc > 1)
3083 {
3084 infile = fopen(argv[op], INPUT_MODE);
3085 if (infile == NULL)
3086 {
3087 printf("** Failed to open %s\n", argv[op]);
3088 yield = 1;
3089 goto EXIT;
3090 }
3091 }
3092
3093 if (argc > 2)
3094 {
3095 outfile = fopen(argv[op+1], OUTPUT_MODE);
3096 if (outfile == NULL)
3097 {
3098 printf("** Failed to open %s\n", argv[op+1]);
3099 yield = 1;
3100 goto EXIT;
3101 }
3102 }
3103
3104 /* Set alternative malloc function */
3105
3106 #ifdef SUPPORT_PCRE8
3107 pcre_malloc = new_malloc;
3108 pcre_free = new_free;
3109 pcre_stack_malloc = stack_malloc;
3110 pcre_stack_free = stack_free;
3111 #endif
3112
3113 #ifdef SUPPORT_PCRE16
3114 pcre16_malloc = new_malloc;
3115 pcre16_free = new_free;
3116 pcre16_stack_malloc = stack_malloc;
3117 pcre16_stack_free = stack_free;
3118 #endif
3119
3120 #ifdef SUPPORT_PCRE32
3121 pcre32_malloc = new_malloc;
3122 pcre32_free = new_free;
3123 pcre32_stack_malloc = stack_malloc;
3124 pcre32_stack_free = stack_free;
3125 #endif
3126
3127 /* Heading line unless quiet, then prompt for first regex if stdin */
3128
3129 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3130
3131 /* Main loop */
3132
3133 while (!done)
3134 {
3135 pcre *re = NULL;
3136 pcre_extra *extra = NULL;
3137
3138 #if !defined NOPOSIX /* There are still compilers that require no indent */
3139 regex_t preg;
3140 int do_posix = 0;
3141 #endif
3142
3143 const char *error;
3144 pcre_uint8 *markptr;
3145 pcre_uint8 *p, *pp, *ppp;
3146 pcre_uint8 *to_file = NULL;
3147 const pcre_uint8 *tables = NULL;
3148 unsigned long int get_options;
3149 unsigned long int true_size, true_study_size = 0;
3150 size_t size, regex_gotten_store;
3151 int do_allcaps = 0;
3152 int do_mark = 0;
3153 int do_study = 0;
3154 int no_force_study = 0;
3155 int do_debug = debug;
3156 int do_G = 0;
3157 int do_g = 0;
3158 int do_showinfo = showinfo;
3159 int do_showrest = 0;
3160 int do_showcaprest = 0;
3161 int do_flip = 0;
3162 int erroroffset, len, delimiter, poffset;
3163
3164 #if !defined NODFA
3165 int dfa_matched = 0;
3166 #endif
3167
3168 use_utf = 0;
3169 debug_lengths = 1;
3170
3171 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3172 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3173 fflush(outfile);
3174
3175 p = buffer;
3176 while (isspace(*p)) p++;
3177 if (*p == 0) continue;
3178
3179 /* See if the pattern is to be loaded pre-compiled from a file. */
3180
3181 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3182 {
3183 pcre_uint32 magic;
3184 pcre_uint8 sbuf[8];
3185 FILE *f;
3186
3187 p++;
3188 if (*p == '!')
3189 {
3190 do_debug = TRUE;
3191 do_showinfo = TRUE;
3192 p++;
3193 }
3194
3195 pp = p + (int)strlen((char *)p);
3196 while (isspace(pp[-1])) pp--;
3197 *pp = 0;
3198
3199 f = fopen((char *)p, "rb");
3200 if (f == NULL)
3201 {
3202 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3203 continue;
3204 }
3205
3206 first_gotten_store = 0;
3207 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3208
3209 true_size =
3210 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3211 true_study_size =
3212 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3213
3214 re = (pcre *)new_malloc(true_size);
3215 if (re == NULL)
3216 {
3217 printf("** Failed to get %d bytes of memory for pcre object\n",
3218 (int)true_size);
3219 yield = 1;
3220 goto EXIT;
3221 }
3222 regex_gotten_store = first_gotten_store;
3223
3224 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3225
3226 magic = REAL_PCRE_MAGIC(re);
3227 if (magic != MAGIC_NUMBER)
3228 {
3229 if (swap_uint32(magic) == MAGIC_NUMBER)
3230 {
3231 do_flip = 1;
3232 }
3233 else
3234 {
3235 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3236 new_free(re);
3237 fclose(f);
3238 continue;
3239 }
3240 }
3241
3242 /* We hide the byte-invert info for little and big endian tests. */
3243 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3244 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3245
3246 /* Now see if there is any following study data. */
3247
3248 if (true_study_size != 0)
3249 {
3250 pcre_study_data *psd;
3251
3252 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3253 extra->flags = PCRE_EXTRA_STUDY_DATA;
3254
3255 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3256 extra->study_data = psd;
3257
3258 if (fread(psd, 1, true_study_size, f) != true_study_size)
3259 {
3260 FAIL_READ:
3261 fprintf(outfile, "Failed to read data from %s\n", p);
3262 if (extra != NULL)
3263 {
3264 PCRE_FREE_STUDY(extra);
3265 }
3266 new_free(re);
3267 fclose(f);
3268 continue;
3269 }
3270 fprintf(outfile, "Study data loaded from %s\n", p);
3271 do_study = 1; /* To get the data output if requested */
3272 }
3273 else fprintf(outfile, "No study data\n");
3274
3275 /* Flip the necessary bytes. */
3276 if (do_flip)
3277 {
3278 int rc;
3279 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3280 if (rc == PCRE_ERROR_BADMODE)
3281 {
3282 /* Simulate the result of the function call below. */
3283 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3284 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3285 PCRE_INFO_OPTIONS);
3286 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3287 "%d-bit mode\n", 8 * CHAR_SIZE,
3288 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3289 new_free(re);
3290 fclose(f);
3291 continue;
3292 }
3293 }
3294
3295 /* Need to know if UTF-8 for printing data strings. */
3296
3297 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3298 {
3299 new_free(re);
3300 fclose(f);
3301 continue;
3302 }
3303 use_utf = (get_options & PCRE_UTF8) != 0;
3304
3305 fclose(f);
3306 goto SHOW_INFO;
3307 }
3308
3309 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3310 the pattern; if it isn't complete, read more. */
3311
3312 delimiter = *p++;
3313
3314 if (isalnum(delimiter) || delimiter == '\\')
3315 {
3316 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3317 goto SKIP_DATA;
3318 }
3319
3320 pp = p;
3321 poffset = (int)(p - buffer);
3322
3323 for(;;)
3324 {
3325 while (*pp != 0)
3326 {
3327 if (*pp == '\\' && pp[1] != 0) pp++;
3328 else if (*pp == delimiter) break;
3329 pp++;
3330 }
3331 if (*pp != 0) break;
3332 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3333 {
3334 fprintf(outfile, "** Unexpected EOF\n");
3335 done = 1;
3336 goto CONTINUE;
3337 }
3338 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3339 }
3340
3341 /* The buffer may have moved while being extended; reset the start of data
3342 pointer to the correct relative point in the buffer. */
3343
3344 p = buffer + poffset;
3345
3346 /* If the first character after the delimiter is backslash, make
3347 the pattern end with backslash. This is purely to provide a way
3348 of testing for the error message when a pattern ends with backslash. */
3349
3350 if (pp[1] == '\\') *pp++ = '\\';
3351
3352 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3353 for callouts. */
3354
3355 *pp++ = 0;
3356 strcpy((char *)pbuffer, (char *)p);
3357
3358 /* Look for options after final delimiter */
3359
3360 options = 0;
3361 study_options = force_study_options;
3362 log_store = showstore; /* default from command line */
3363
3364 while (*pp != 0)
3365 {
3366 switch (*pp++)
3367 {
3368 case 'f': options |= PCRE_FIRSTLINE; break;
3369 case 'g': do_g = 1; break;
3370 case 'i': options |= PCRE_CASELESS; break;
3371 case 'm': options |= PCRE_MULTILINE; break;
3372 case 's': options |= PCRE_DOTALL; break;
3373 case 'x': options |= PCRE_EXTENDED; break;
3374
3375 case '+':
3376 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3377 break;
3378
3379 case '=': do_allcaps = 1; break;
3380 case 'A': options |= PCRE_ANCHORED; break;
3381 case 'B': do_debug = 1; break;
3382 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3383 case 'D': do_debug = do_showinfo = 1; break;
3384 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3385 case 'F': do_flip = 1; break;
3386 case 'G': do_G = 1; break;
3387 case 'I': do_showinfo = 1; break;
3388 case 'J': options |= PCRE_DUPNAMES; break;
3389 case 'K': do_mark = 1; break;
3390 case 'M': log_store = 1; break;
3391 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3392
3393 #if !defined NOPOSIX
3394 case 'P': do_posix = 1; break;
3395 #endif
3396
3397 case 'S':
3398 do_study = 1;
3399 for (;;)
3400 {
3401 switch (*pp++)
3402 {
3403 case 'S':
3404 do_study = 0;
3405 no_force_study = 1;
3406 break;
3407
3408 case '!':
3409 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3410 break;
3411
3412 case '+':
3413 if (*pp == '+')
3414 {
3415 verify_jit = TRUE;
3416 pp++;
3417 }
3418 if (*pp >= '1' && *pp <= '7')
3419 study_options |= jit_study_bits[*pp++ - '1'];
3420 else
3421 study_options |= jit_study_bits[6];
3422 break;
3423
3424 case '-':
3425 study_options &= ~PCRE_STUDY_ALLJIT;
3426 break;
3427
3428 default:
3429 pp--;
3430 goto ENDLOOP;
3431 }
3432 }
3433 ENDLOOP:
3434 break;
3435
3436 case 'U': options |= PCRE_UNGREEDY; break;
3437 case 'W': options |= PCRE_UCP; break;
3438 case 'X': options |= PCRE_EXTRA; break;
3439 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3440 case 'Z': debug_lengths = 0; break;
3441 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3442 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3443
3444 case 'T':
3445 switch (*pp++)
3446 {
3447 case '0': tables = tables0; break;
3448 case '1': tables = tables1; break;
3449
3450 case '\r':
3451 case '\n':
3452 case ' ':
3453 case 0:
3454 fprintf(outfile, "** Missing table number after /T\n");
3455 goto SKIP_DATA;
3456
3457 default:
3458 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3459 goto SKIP_DATA;
3460 }
3461 break;
3462
3463 case 'L':
3464 ppp = pp;
3465 /* The '\r' test here is so that it works on Windows. */
3466 /* The '0' test is just in case this is an unterminated line. */
3467 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3468 *ppp = 0;
3469 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3470 {
3471 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3472 goto SKIP_DATA;
3473 }
3474 locale_set = 1;
3475 tables = PCRE_MAKETABLES;
3476 pp = ppp;
3477 break;
3478
3479 case '>':
3480 to_file = pp;
3481 while (*pp != 0) pp++;
3482 while (isspace(pp[-1])) pp--;
3483 *pp = 0;
3484 break;
3485
3486 case '<':
3487 {
3488 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3489 {
3490 options |= PCRE_JAVASCRIPT_COMPAT;
3491 pp += 3;
3492 }
3493 else
3494 {
3495 int x = check_newline(pp, outfile);
3496 if (x == 0) goto SKIP_DATA;
3497 options |= x;
3498 while (*pp++ != '>');
3499 }
3500 }
3501 break;
3502
3503 case '\r': /* So that it works in Windows */
3504 case '\n':
3505 case ' ':
3506 break;
3507
3508 default:
3509 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3510 goto SKIP_DATA;
3511 }
3512 }
3513
3514 /* Handle compiling via the POSIX interface, which doesn't support the
3515 timing, showing, or debugging options, nor the ability to pass over
3516 local character tables. Neither does it have 16-bit support. */
3517
3518 #if !defined NOPOSIX
3519 if (posix || do_posix)
3520 {
3521 int rc;
3522 int cflags = 0;
3523
3524 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3525 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3526 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3527 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3528 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3529 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3530 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3531
3532 first_gotten_store = 0;
3533 rc = regcomp(&preg, (char *)p, cflags);
3534
3535 /* Compilation failed; go back for another re, skipping to blank line
3536 if non-interactive. */
3537
3538 if (rc != 0)
3539 {
3540 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3541 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3542 goto SKIP_DATA;
3543 }
3544 }
3545
3546 /* Handle compiling via the native interface */
3547
3548 else
3549 #endif /* !defined NOPOSIX */
3550
3551 {
3552 /* In 16- or 32-bit mode, convert the input. */
3553
3554 #ifdef SUPPORT_PCRE16
3555 if (pcre_mode == PCRE16_MODE)
3556 {
3557 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3558 {
3559 case -1:
3560 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3561 "converted to UTF-16\n");
3562 goto SKIP_DATA;
3563
3564 case -2:
3565 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3566 "cannot be converted to UTF-16\n");
3567 goto SKIP_DATA;
3568
3569 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3570 fprintf(outfile, "**Failed: character value greater than 0xffff "
3571 "cannot be converted to 16-bit in non-UTF mode\n");
3572 goto SKIP_DATA;
3573
3574 default:
3575 break;
3576 }
3577 p = (pcre_uint8 *)buffer16;
3578 }
3579 #endif
3580
3581 #ifdef SUPPORT_PCRE32
3582 if (pcre_mode == PCRE32_MODE)
3583 {
3584 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3585 {
3586 case -1:
3587 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3588 "converted to UTF-32\n");
3589 goto SKIP_DATA;
3590
3591 case -2:
3592 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3593 "cannot be converted to UTF-32\n");
3594 goto SKIP_DATA;
3595
3596 case -3:
3597 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3598 goto SKIP_DATA;
3599
3600 default:
3601 break;
3602 }
3603 p = (pcre_uint8 *)buffer32;
3604 }
3605 #endif
3606
3607 /* Compile many times when timing */
3608
3609 if (timeit > 0)
3610 {
3611 register int i;
3612 clock_t time_taken;
3613 clock_t start_time = clock();
3614 for (i = 0; i < timeit; i++)
3615 {
3616 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3617 if (re != NULL) free(re);
3618 }
3619 time_taken = clock() - start_time;
3620 fprintf(outfile, "Compile time %.4f milliseconds\n",
3621 (((double)time_taken * 1000.0) / (double)timeit) /
3622 (double)CLOCKS_PER_SEC);
3623 }
3624
3625 first_gotten_store = 0;
3626 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3627
3628 /* Compilation failed; go back for another re, skipping to blank line
3629 if non-interactive. */
3630
3631 if (re == NULL)
3632 {
3633 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3634 SKIP_DATA:
3635 if (infile != stdin)
3636 {
3637 for (;;)
3638 {
3639 if (extend_inputline(infile, buffer, NULL) == NULL)
3640 {
3641 done = 1;
3642 goto CONTINUE;
3643 }
3644 len = (int)strlen((char *)buffer);
3645 while (len > 0 && isspace(buffer[len-1])) len--;
3646 if (len == 0) break;
3647 }
3648 fprintf(outfile, "\n");
3649 }
3650 goto CONTINUE;
3651 }
3652
3653 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3654 within the regex; check for this so that we know how to process the data
3655 lines. */
3656
3657 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3658 goto SKIP_DATA;
3659 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3660
3661 /* Extract the size for possible writing before possibly flipping it,
3662 and remember the store that was got. */
3663
3664 true_size = REAL_PCRE_SIZE(re);
3665 regex_gotten_store = first_gotten_store;
3666
3667 /* Output code size information if requested */
3668
3669 if (log_store)
3670 {
3671 int name_count, name_entry_size, real_pcre_size;
3672
3673 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3674 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3675 #ifdef SUPPORT_PCRE8
3676 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3677 real_pcre_size = sizeof(real_pcre);
3678 #endif
3679 #ifdef SUPPORT_PCRE16
3680 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3681 real_pcre_size = sizeof(real_pcre16);
3682 #endif
3683 #ifdef SUPPORT_PCRE32
3684 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3685 real_pcre_size = sizeof(real_pcre32);
3686 #endif
3687 fprintf(outfile, "Memory allocation (code space): %d\n",
3688 (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3689 }
3690
3691 /* If -s or /S was present, study the regex to generate additional info to
3692 help with the matching, unless the pattern has the SS option, which
3693 suppresses the effect of /S (used for a few test patterns where studying is
3694 never sensible). */
3695
3696 if (do_study || (force_study >= 0 && !no_force_study))
3697 {
3698 if (timeit > 0)
3699 {
3700 register int i;
3701 clock_t time_taken;
3702 clock_t start_time = clock();
3703 for (i = 0; i < timeit; i++)
3704 {
3705 PCRE_STUDY(extra, re, study_options, &error);
3706 }
3707 time_taken = clock() - start_time;
3708 if (extra != NULL)
3709 {
3710 PCRE_FREE_STUDY(extra);
3711 }
3712 fprintf(outfile, " Study time %.4f milliseconds\n",
3713 (((double)time_taken * 1000.0) / (double)timeit) /
3714 (double)CLOCKS_PER_SEC);
3715 }
3716 PCRE_STUDY(extra, re, study_options, &error);
3717 if (error != NULL)
3718 fprintf(outfile, "Failed to study: %s\n", error);
3719 else if (extra != NULL)
3720 {
3721 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3722 if (log_store)
3723 {
3724 size_t jitsize;
3725 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3726 jitsize != 0)
3727 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3728 }
3729 }
3730 }
3731
3732 /* If /K was present, we set up for handling MARK data. */
3733
3734 if (do_mark)
3735 {
3736 if (extra == NULL)
3737 {
3738 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3739 extra->flags = 0;
3740 }
3741 extra->mark = &markptr;
3742 extra->flags |= PCRE_EXTRA_MARK;
3743 }
3744
3745 /* Extract and display information from the compiled data if required. */
3746
3747 SHOW_INFO:
3748
3749 if (do_debug)
3750 {
3751 fprintf(outfile, "------------------------------------------------------------------\n");
3752 PCRE_PRINTINT(re, outfile, debug_lengths);
3753 }
3754
3755 /* We already have the options in get_options (see above) */
3756
3757 if (do_showinfo)
3758 {
3759 unsigned long int all_options;
3760 pcre_uint32 first_char, need_char;
3761 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
3762 hascrorlf, maxlookbehind;
3763 int nameentrysize, namecount;
3764 const pcre_uint8 *nametable;
3765
3766 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3767 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3768 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3769 new_info(re, NULL, PCRE_INFO_FIRSTLITERAL, &first_char) +
3770 new_info(re, NULL, PCRE_INFO_FIRSTLITERALSET, &first_char_set) +
3771 new_info(re, NULL, PCRE_INFO_LASTLITERAL2, &need_char) +
3772 new_info(re, NULL, PCRE_INFO_LASTLITERAL2SET, &need_char_set) +
3773 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3774 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3775 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3776 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3777 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3778 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3779 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3780 != 0)
3781 goto SKIP_DATA;
3782
3783 if (size != regex_gotten_store) fprintf(outfile,
3784 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3785 (int)size, (int)regex_gotten_store);
3786
3787 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3788 if (backrefmax > 0)
3789 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3790
3791 if (namecount > 0)
3792 {
3793 fprintf(outfile, "Named capturing subpatterns:\n");
3794 while (namecount-- > 0)
3795 {
3796 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
3797 int length = (int)STRLEN(nametable + imm2_size);
3798 fprintf(outfile, " ");
3799 PCHARSV(nametable, imm2_size, length, outfile);
3800 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3801 #ifdef SUPPORT_PCRE32
3802 if (pcre_mode == PCRE32_MODE)
3803 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
3804 #endif
3805 #ifdef SUPPORT_PCRE16
3806 if (pcre_mode == PCRE16_MODE)
3807 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
3808 #endif
3809 #ifdef SUPPORT_PCRE8
3810 if (pcre_mode == PCRE8_MODE)
3811 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
3812 #endif
3813 nametable += nameentrysize * CHAR_SIZE;
3814 }
3815 }
3816
3817 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3818 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3819
3820 all_options = REAL_PCRE_OPTIONS(re);
3821 if (do_flip) all_options = swap_uint32(all_options);
3822
3823 if (get_options == 0) fprintf(outfile, "No options\n");
3824 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3825 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3826 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3827 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3828 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3829 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3830 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3831 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3832 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3833 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3834 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3835 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3836 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3837 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3838 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3839 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3840 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3841 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3842
3843 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3844
3845 switch (get_options & PCRE_NEWLINE_BITS)
3846 {
3847 case PCRE_NEWLINE_CR:
3848 fprintf(outfile, "Forced newline sequence: CR\n");
3849 break;
3850
3851 case PCRE_NEWLINE_LF:
3852 fprintf(outfile, "Forced newline sequence: LF\n");
3853 break;
3854
3855 case PCRE_NEWLINE_CRLF:
3856 fprintf(outfile, "Forced newline sequence: CRLF\n");
3857 break;
3858
3859 case PCRE_NEWLINE_ANYCRLF:
3860 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3861 break;
3862
3863 case PCRE_NEWLINE_ANY:
3864 fprintf(outfile, "Forced newline sequence: ANY\n");
3865 break;
3866
3867 default:
3868 break;
3869 }
3870
3871 if (first_char_set == 2)
3872 {
3873 fprintf(outfile, "First char at start or follows newline\n");
3874 }
3875 else if (first_char_set == 1)
3876 {
3877 const char *caseless =
3878 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
3879 "" : " (caseless)";
3880
3881 if (PRINTOK(first_char))
3882 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3883 else
3884 {
3885 fprintf(outfile, "First char = ");
3886 pchar(first_char, outfile);
3887 fprintf(outfile, "%s\n", caseless);
3888 }
3889 }
3890 else
3891 {
3892 fprintf(outfile, "No first char\n");
3893 }
3894
3895 if (need_char_set == 0)
3896 {
3897 fprintf(outfile, "No need char\n");
3898 }
3899 else
3900 {
3901 const char *caseless =
3902 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
3903 "" : " (caseless)";
3904
3905 if (PRINTOK(need_char))
3906 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3907 else
3908 {
3909 fprintf(outfile, "Need char = ");
3910 pchar(need_char, outfile);
3911 fprintf(outfile, "%s\n", caseless);
3912 }
3913 }
3914
3915 if (maxlookbehind > 0)
3916 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3917
3918 /* Don't output study size; at present it is in any case a fixed
3919 value, but it varies, depending on the computer architecture, and
3920 so messes up the test suite. (And with the /F option, it might be
3921 flipped.) If study was forced by an external -s, don't show this
3922 information unless -i or -d was also present. This means that, except
3923 when auto-callouts are involved, the output from runs with and without
3924 -s should be identical. */
3925
3926 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3927 {
3928 if (extra == NULL)
3929 fprintf(outfile, "Study returned NULL\n");
3930 else
3931 {
3932 pcre_uint8 *start_bits = NULL;
3933 int minlength;
3934
3935 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3936 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3937
3938 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3939 {
3940 if (start_bits == NULL)
3941 fprintf(outfile, "No set of starting bytes\n");
3942 else
3943 {
3944 int i;
3945 int c = 24;
3946 fprintf(outfile, "Starting byte set: ");
3947 for (i = 0; i < 256; i++)
3948 {
3949 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3950 {
3951 if (c > 75)
3952 {
3953 fprintf(outfile, "\n ");
3954 c = 2;
3955 }
3956 if (PRINTOK(i) && i != ' ')
3957 {
3958 fprintf(outfile, "%c ", i);
3959 c += 2;
3960 }
3961 else
3962 {
3963 fprintf(outfile, "\\x%02x ", i);
3964 c += 5;
3965 }
3966 }
3967 }
3968 fprintf(outfile, "\n");
3969 }
3970 }
3971 }
3972
3973 /* Show this only if the JIT was set by /S, not by -s. */
3974
3975 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3976 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3977 {
3978 int jit;
3979 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3980 {
3981 if (jit)
3982 fprintf(outfile, "JIT study was successful\n");
3983 else
3984 #ifdef SUPPORT_JIT
3985 fprintf(outfile, "JIT study was not successful\n");
3986 #else
3987 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3988 #endif
3989 }
3990 }
3991 }
3992 }
3993
3994 /* If the '>' option was present, we write out the regex to a file, and
3995 that is all. The first 8 bytes of the file are the regex length and then
3996 the study length, in big-endian order. */
3997
3998 if (to_file != NULL)
3999 {
4000 FILE *f = fopen((char *)to_file, "wb");
4001 if (f == NULL)
4002 {
4003 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4004 }
4005 else
4006 {
4007 pcre_uint8 sbuf[8];
4008
4009 if (do_flip) regexflip(re, extra);
4010 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4011 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4012 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4013 sbuf[3] = (pcre_uint8)((true_size) & 255);
4014 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4015 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4016 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4017 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4018
4019 if (fwrite(sbuf, 1, 8, f) < 8 ||
4020 fwrite(re, 1, true_size, f) < true_size)
4021 {
4022 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4023 }
4024 else
4025 {
4026 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4027
4028 /* If there is study data, write it. */
4029
4030 if (extra != NULL)
4031 {
4032 if (fwrite(extra->study_data, 1, true_study_size, f) <
4033 true_study_size)
4034 {
4035 fprintf(outfile, "Write error on %s: %s\n", to_file,
4036 strerror(errno));
4037 }
4038 else fprintf(outfile, "Study data written to %s\n", to_file);
4039 }
4040 }
4041 fclose(f);
4042 }
4043
4044 new_free(re);
4045 if (extra != NULL)
4046 {
4047 PCRE_FREE_STUDY(extra);
4048 }
4049 if (locale_set)
4050 {
4051 new_free((void *)tables);
4052 setlocale(LC_CTYPE, "C");
4053 locale_set = 0;
4054 }
4055 continue; /* With next regex */
4056 }
4057 } /* End of non-POSIX compile */
4058
4059 /* Read data lines and test them */
4060
4061 for (;;)
4062 {
4063 pcre_uint8 *q;
4064 pcre_uint8 *bptr;
4065 int *use_offsets = offsets;
4066 int use_size_offsets = size_offsets;
4067 int callout_data = 0;
4068 int callout_data_set = 0;
4069 int count, c;
4070 int copystrings = 0;
4071 int find_match_limit = default_find_match_limit;
4072 int getstrings = 0;
4073 int getlist = 0;
4074 int gmatched = 0;
4075 int start_offset = 0;
4076 int start_offset_sign = 1;
4077 int g_notempty = 0;
4078 int use_dfa = 0;
4079
4080 *copynames = 0;
4081 *getnames = 0;
4082
4083 #ifdef SUPPORT_PCRE32
4084 cn32ptr = copynames;
4085 gn32ptr = getnames;
4086 #endif
4087 #ifdef SUPPORT_PCRE16
4088 cn16ptr = copynames16;
4089 gn16ptr = getnames16;
4090 #endif
4091 #ifdef SUPPORT_PCRE8
4092 cn8ptr = copynames8;
4093 gn8ptr = getnames8;
4094 #endif
4095
4096 SET_PCRE_CALLOUT(callout);
4097 first_callout = 1;
4098 last_callout_mark = NULL;
4099 callout_extra = 0;
4100 callout_count = 0;
4101 callout_fail_count = 999999;
4102 callout_fail_id = -1;
4103 show_malloc = 0;
4104 options = 0;
4105
4106 if (extra != NULL) extra->flags &=
4107 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4108
4109 len = 0;
4110 for (;;)
4111 {
4112 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4113 {
4114 if (len > 0) /* Reached EOF without hitting a newline */
4115 {
4116 fprintf(outfile, "\n");
4117 break;
4118 }
4119 done = 1;
4120 goto CONTINUE;
4121 }
4122 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4123 len = (int)strlen((char *)buffer);
4124 if (buffer[len-1] == '\n') break;
4125 }
4126
4127 while (len > 0 && isspace(buffer[len-1])) len--;
4128 buffer[len] = 0;
4129 if (len == 0) break;
4130
4131 p = buffer;
4132 while (isspace(*p)) p++;
4133
4134 bptr = q = dbuffer;
4135 while ((c = *p++) != 0)
4136 {
4137 int i = 0;
4138 int n = 0;
4139
4140 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4141 In non-UTF mode, allow the value of the byte to fall through to later,
4142 where values greater than 127 are turned into UTF-8 when running in
4143 16-bit mode. */
4144
4145 if (c != '\\')
4146 {
4147 if (use_utf)
4148 {
4149 *q++ = c;
4150 continue;
4151 }
4152 }
4153
4154 /* Handle backslash escapes */
4155
4156 else switch ((c = *p++))
4157 {
4158 case 'a': c = 7; break;
4159 case 'b': c = '\b'; break;
4160 case 'e': c = 27; break;
4161 case 'f': c = '\f'; break;
4162 case 'n': c = '\n'; break;
4163 case 'r': c = '\r'; break;
4164 case 't': c = '\t'; break;
4165 case 'v': c = '\v'; break;
4166
4167 case '0': case '1': case '2': case '3':
4168 case '4': case '5': case '6': case '7':
4169 c -= '0';
4170 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4171 c = c * 8 + *p++ - '0';
4172 break;
4173
4174 case 'x':
4175 if (*p == '{')
4176 {
4177 pcre_uint8 *pt = p;
4178 c = 0;
4179
4180 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4181 when isxdigit() is a macro that refers to its argument more than
4182 once. This is banned by the C Standard, but apparently happens in at
4183 least one MacOS environment. */
4184
4185 for (pt++; isxdigit(*pt); pt++)
4186 {
4187 if (++i == 9)
4188 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4189 "using only the first eight.\n");
4190 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4191 }
4192 if (*pt == '}')
4193 {
4194 p = pt + 1;
4195 break;
4196 }
4197 /* Not correct form for \x{...}; fall through */
4198 }
4199
4200 /* \x without {} always defines just one byte in 8-bit mode. This
4201 allows UTF-8 characters to be constructed byte by byte, and also allows
4202 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4203 Otherwise, pass it down to later code so that it can be turned into
4204 UTF-8 when running in 16/32-bit mode. */
4205
4206 c = 0;
4207 while (i++ < 2 && isxdigit(*p))
4208 {
4209 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4210 p++;
4211 }
4212 if (use_utf)
4213 {
4214 *q++ = c;
4215 continue;
4216 }
4217 break;
4218
4219 case 0: /* \ followed by EOF allows for an empty line */
4220 p--;
4221 continue;
4222
4223 case '>':
4224 if (*p == '-')
4225 {
4226 start_offset_sign = -1;
4227 p++;
4228 }
4229 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4230 start_offset *= start_offset_sign;
4231 continue;
4232
4233 case 'A': /* Option setting */
4234 options |= PCRE_ANCHORED;
4235 continue;
4236
4237 case 'B':
4238 options |= PCRE_NOTBOL;
4239 continue;
4240
4241 case 'C':
4242 if (isdigit(*p)) /* Set copy string */
4243 {
4244 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4245 copystrings |= 1 << n;
4246 }
4247 else if (isalnum(*p))
4248 {
4249 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4250 }
4251 else if (*p == '+')
4252 {
4253 callout_extra = 1;
4254 p++;
4255 }
4256 else if (*p == '-')
4257 {
4258 SET_PCRE_CALLOUT(NULL);
4259 p++;
4260 }
4261 else if (*p == '!')
4262 {
4263 callout_fail_id = 0;
4264 p++;
4265 while(isdigit(*p))
4266 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4267 callout_fail_count = 0;
4268 if (*p == '!')
4269 {
4270 p++;
4271 while(isdigit(*p))
4272 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4273 }
4274 }
4275 else if (*p == '*')
4276 {
4277 int sign = 1;
4278 callout_data = 0;
4279 if (*(++p) == '-') { sign = -1; p++; }
4280 while(isdigit(*p))
4281 callout_data = callout_data * 10 + *p++ - '0';
4282 callout_data *= sign;
4283 callout_data_set = 1;
4284 }
4285 continue;
4286
4287 #if !defined NODFA
4288 case 'D':
4289 #if !defined NOPOSIX
4290 if (posix || do_posix)
4291 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4292 else
4293 #endif
4294 use_dfa = 1;
4295 continue;
4296 #endif
4297
4298 #if !defined NODFA
4299 case 'F':
4300 options |= PCRE_DFA_SHORTEST;
4301 continue;
4302 #endif
4303
4304 case 'G':
4305 if (isdigit(*p))
4306 {
4307 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4308 getstrings |= 1 << n;
4309 }
4310 else if (isalnum(*p))
4311 {
4312 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4313 }
4314 continue;
4315
4316 case 'J':
4317 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4318 if (extra != NULL
4319 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4320 && extra->executable_jit != NULL)
4321 {
4322 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4323 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4324 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4325 }
4326 continue;
4327
4328 case 'L':
4329 getlist = 1;
4330 continue;
4331
4332 case 'M':
4333 find_match_limit = 1;
4334 continue;
4335
4336 case 'N':
4337 if ((options & PCRE_NOTEMPTY) != 0)
4338 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4339 else
4340 options |= PCRE_NOTEMPTY;
4341 continue;
4342
4343 case 'O':
4344 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4345 if (n > size_offsets_max)
4346 {
4347 size_offsets_max = n;
4348 free(offsets);
4349 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4350 if (offsets == NULL)
4351 {
4352 printf("** Failed to get %d bytes of memory for offsets vector\n",
4353 (int)(size_offsets_max * sizeof(int)));
4354 yield = 1;
4355 goto EXIT;
4356 }
4357 }
4358 use_size_offsets = n;
4359 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4360 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4361 continue;
4362
4363 case 'P':
4364 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4365 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4366 continue;
4367
4368 case 'Q':
4369 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4370 if (extra == NULL)
4371 {
4372 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4373 extra->flags = 0;
4374 }
4375 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4376 extra->match_limit_recursion = n;
4377 continue;
4378
4379 case 'q':
4380 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4381 if (extra == NULL)
4382 {
4383 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4384 extra->flags = 0;
4385 }
4386 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4387 extra->match_limit = n;
4388 continue;
4389
4390 #if !defined NODFA
4391 case 'R':
4392 options |= PCRE_DFA_RESTART;
4393 continue;
4394 #endif
4395
4396 case 'S':
4397 show_malloc = 1;
4398 continue;
4399
4400 case 'Y':
4401 options |= PCRE_NO_START_OPTIMIZE;
4402 continue;
4403
4404 case 'Z':
4405 options |= PCRE_NOTEOL;
4406 continue;
4407
4408 case '?':
4409 options |= PCRE_NO_UTF8_CHECK;
4410 continue;
4411
4412 case '<':
4413 {
4414 int x = check_newline(p, outfile);
4415 if (x == 0) goto NEXT_DATA;
4416 options |= x;
4417 while (*p++ != '>');
4418 }
4419 continue;
4420 }
4421
4422 /* We now have a character value in c that may be greater than 255. In
4423 16-bit mode, we always convert characters to UTF-8 so that values greater
4424 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
4425 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
4426 mode must have come from \x{...} or octal constructs because values from
4427 \x.. get this far only in non-UTF mode. */
4428
4429 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
4430 if (pcre_mode != PCRE8_MODE || use_utf)
4431 {
4432 pcre_uint8 buff8[8];
4433 int ii, utn;
4434 utn = ord2utf8(c, buff8);
4435 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
4436 }
4437 else
4438 #endif
4439 {
4440 if (c > 255)
4441 {
4442 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4443 "and UTF-8 mode is not enabled.\n", c);
4444 fprintf(outfile, "** Truncation will probably give the wrong "
4445 "result.\n");
4446 }
4447 *q++ = c;
4448 }
4449 }
4450
4451 /* Reached end of subject string */
4452
4453 *q = 0;
4454 len = (int)(q - dbuffer);
4455
4456 /* Move the data to the end of the buffer so that a read over the end of
4457 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
4458 we are using the POSIX interface, we must include the terminating zero. */
4459
4460 #if !defined NOPOSIX
4461 if (posix || do_posix)
4462 {
4463 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
4464 bptr += buffer_size - len - 1;
4465 }
4466 else
4467 #endif
4468 {
4469 memmove(bptr + buffer_size - len, bptr, len);
4470 bptr += buffer_size - len;
4471 }
4472
4473 if ((all_use_dfa || use_dfa) && find_match_limit)
4474 {
4475 printf("**Match limit not relevant for DFA matching: ignored\n");
4476 find_match_limit = 0;
4477 }
4478
4479 /* Handle matching via the POSIX interface, which does not
4480 support timing or playing with the match limit or callout data. */
4481
4482 #if !defined NOPOSIX
4483 if (posix || do_posix)
4484 {
4485 int rc;
4486 int eflags = 0;
4487 regmatch_t *pmatch = NULL;
4488 if (use_size_offsets > 0)
4489 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4490 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4491 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4492 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
4493
4494 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
4495
4496 if (rc != 0)
4497 {
4498 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4499 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4500 }
4501 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
4502 {
4503 fprintf(outfile, "Matched with REG_NOSUB\n");
4504 }
4505 else
4506 {
4507 size_t i;
4508 for (i = 0; i < (size_t)use_size_offsets; i++)
4509 {
4510 if (pmatch[i].rm_so >= 0)
4511 {
4512 fprintf(outfile, "%2d: ", (int)i);
4513 PCHARSV(dbuffer, pmatch[i].rm_so,
4514 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
4515 fprintf(outfile, "\n");
4516 if (do_showcaprest || (i == 0 && do_showrest))
4517 {
4518 fprintf(outfile, "%2d+ ", (int)i);
4519 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
4520 outfile);
4521 fprintf(outfile, "\n");
4522 }
4523 }
4524 }
4525 }
4526 free(pmatch);
4527 goto NEXT_DATA;
4528 }
4529
4530 #endif /* !defined NOPOSIX */
4531
4532 /* Handle matching via the native interface - repeats for /g and /G */
4533
4534 #ifdef SUPPORT_PCRE16
4535 if (pcre_mode == PCRE16_MODE)
4536 {
4537 len = to16(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF8, len);
4538 switch(len)
4539 {
4540 case -1:
4541 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
4542 "converted to UTF-16\n");
4543 goto NEXT_DATA;
4544
4545 case -2:
4546 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4547 "cannot be converted to UTF-16\n");
4548 goto NEXT_DATA;
4549
4550 case -3:
4551 fprintf(outfile, "**Failed: character value greater than 0xffff "
4552 "cannot be converted to 16-bit in non-UTF mode\n");
4553 goto NEXT_DATA;
4554
4555 default:
4556 break;
4557 }
4558 bptr = (pcre_uint8 *)buffer16;
4559 }
4560 #endif
4561
4562 #ifdef SUPPORT_PCRE32
4563 if (pcre_mode == PCRE32_MODE)
4564 {
4565 len = to32(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF32, len);
4566 switch(len)
4567 {
4568 case -1:
4569 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
4570 "converted to UTF-32\n");
4571 goto NEXT_DATA;
4572
4573 case -2:
4574 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4575 "cannot be converted to UTF-32\n");
4576 goto NEXT_DATA;
4577
4578 case -3:
4579 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
4580 goto NEXT_DATA;
4581
4582 default:
4583 break;
4584 }
4585 bptr = (pcre_uint8 *)buffer32;
4586 }
4587 #endif
4588
4589 /* Ensure that there is a JIT callback if we want to verify that JIT was
4590 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4591
4592 if (verify_jit && jit_stack == NULL && extra != NULL)
4593 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4594
4595 for (;; gmatched++) /* Loop for /g or /G */
4596 {
4597 markptr = NULL;
4598 jit_was_used = FALSE;
4599
4600 if (timeitm > 0)
4601 {
4602 register int i;
4603 clock_t time_taken;
4604 clock_t start_time = clock();
4605
4606 #if !defined NODFA
4607 if (all_use_dfa || use_dfa)
4608 {
4609 if ((options & PCRE_DFA_RESTART) != 0)
4610 {
4611 fprintf(outfile, "Timing DFA restarts is not supported\n");
4612 break;
4613 }
4614 if (dfa_workspace == NULL)
4615 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4616 for (i = 0; i < timeitm; i++)
4617 {
4618 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4619 (options | g_notempty), use_offsets, use_size_offsets,
4620 dfa_workspace, DFA_WS_DIMENSION);
4621 }
4622 }
4623 else
4624 #endif
4625
4626 for (i = 0; i < timeitm; i++)
4627 {
4628 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4629 (options | g_notempty), use_offsets, use_size_offsets);
4630 }
4631 time_taken = clock() - start_time;
4632 fprintf(outfile, "Execute time %.4f milliseconds\n",
4633 (((double)time_taken * 1000.0) / (double)timeitm) /
4634 (double)CLOCKS_PER_SEC);
4635 }
4636
4637 /* If find_match_limit is set, we want to do repeated matches with
4638 varying limits in order to find the minimum value for the match limit and
4639 for the recursion limit. The match limits are relevant only to the normal
4640 running of pcre_exec(), so disable the JIT optimization. This makes it
4641 possible to run the same set of tests with and without JIT externally
4642 requested. */
4643
4644 if (find_match_limit)
4645 {
4646 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4647 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4648 extra->flags = 0;
4649
4650 (void)check_match_limit(re, extra, bptr, len, start_offset,
4651 options|g_notempty, use_offsets, use_size_offsets,
4652 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4653 PCRE_ERROR_MATCHLIMIT, "match()");
4654
4655 count = check_match_limit(re, extra, bptr, len, start_offset,
4656 options|g_notempty, use_offsets, use_size_offsets,
4657 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4658 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4659 }
4660
4661 /* If callout_data is set, use the interface with additional data */
4662
4663 else if (callout_data_set)
4664 {
4665 if (extra == NULL)
4666 {
4667 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4668 extra->flags = 0;
4669 }
4670 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4671 extra->callout_data = &callout_data;
4672 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4673 options | g_notempty, use_offsets, use_size_offsets);
4674 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4675 }
4676
4677 /* The normal case is just to do the match once, with the default
4678 value of match_limit. */
4679
4680 #if !defined NODFA
4681 else if (all_use_dfa || use_dfa)
4682 {
4683 if (dfa_workspace == NULL)
4684 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4685 if (dfa_matched++ == 0)
4686 dfa_workspace[0] = -1; /* To catch bad restart */
4687 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4688 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4689 DFA_WS_DIMENSION);
4690 if (count == 0)
4691 {
4692 fprintf(outfile, "Matched, but too many subsidiary matches\n");
4693 count = use_size_offsets/2;
4694 }
4695 }
4696 #endif
4697
4698 else
4699 {
4700 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4701 options | g_notempty, use_offsets, use_size_offsets);
4702 if (count == 0)
4703 {
4704 fprintf(outfile, "Matched, but too many substrings\n");
4705 count = use_size_offsets/3;
4706 }
4707 }
4708
4709 /* Matched */
4710
4711 if (count >= 0)
4712 {
4713 int i, maxcount;
4714 void *cnptr, *gnptr;
4715
4716 #if !defined NODFA
4717 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4718 #endif
4719 maxcount = use_size_offsets/3;
4720
4721 /* This is a check against a lunatic return value. */
4722
4723 if (count > maxcount)
4724 {
4725 fprintf(outfile,
4726 "** PCRE error: returned count %d is too big for offset size %d\n",
4727 count, use_size_offsets);
4728 count = use_size_offsets/3;
4729 if (do_g || do_G)
4730 {
4731 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4732 do_g = do_G = FALSE; /* Break g/G loop */
4733 }
4734 }
4735
4736 /* do_allcaps requests showing of all captures in the pattern, to check
4737 unset ones at the end. */
4738
4739 if (do_allcaps)
4740 {
4741 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4742 goto SKIP_DATA;
4743 count++; /* Allow for full match */
4744 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4745 }
4746
4747 /* Output the captured substrings */
4748
4749 for (i = 0; i < count * 2; i += 2)
4750 {
4751 if (use_offsets[i] < 0)
4752 {
4753 if (use_offsets[i] != -1)
4754 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4755 use_offsets[i], i);
4756 if (use_offsets[i+1] != -1)
4757 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4758 use_offsets[i+1], i+1);
4759 fprintf(outfile, "%2d: <unset>\n", i/2);
4760 }
4761 else
4762 {
4763 fprintf(outfile, "%2d: ", i/2);
4764 PCHARSV(bptr, use_offsets[i],
4765 use_offsets[i+1] - use_offsets[i], outfile);
4766 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4767 fprintf(outfile, "\n");
4768 if (do_showcaprest || (i == 0 && do_showrest))
4769 {
4770 fprintf(outfile, "%2d+ ", i/2);
4771 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4772 outfile);
4773 fprintf(outfile, "\n");
4774 }
4775 }
4776 }
4777
4778 if (markptr != NULL)
4779 {
4780 fprintf(outfile, "MK: ");
4781 PCHARSV(markptr, 0, -1, outfile);
4782 fprintf(outfile, "\n");
4783 }
4784
4785 for (i = 0; i < 32; i++)
4786 {
4787 if ((copystrings & (1 << i)) != 0)
4788 {
4789 int rc;
4790 char copybuffer[256];
4791 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4792 copybuffer, sizeof(copybuffer));
4793 if (rc < 0)
4794 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4795 else
4796 {
4797 fprintf(outfile, "%2dC ", i);
4798 PCHARSV(copybuffer, 0, rc, outfile);
4799 fprintf(outfile, " (%d)\n", rc);
4800 }
4801 }
4802 }
4803
4804 cnptr = copynames;
4805 for (;;)
4806 {
4807 int rc;
4808 char copybuffer[256];
4809
4810 if (pcre_mode == PCRE16_MODE)
4811 {
4812 if (*(pcre_uint16 *)cnptr == 0) break;
4813 }
4814 else
4815 {
4816 if (*(pcre_uint8 *)cnptr == 0) break;
4817 }
4818
4819 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4820 cnptr, copybuffer, sizeof(copybuffer));
4821
4822 if (rc < 0)
4823 {
4824 fprintf(outfile, "copy substring ");
4825 PCHARSV(cnptr, 0, -1, outfile);
4826 fprintf(outfile, " failed %d\n", rc);
4827 }
4828 else
4829 {
4830 fprintf(outfile, " C ");
4831 PCHARSV(copybuffer, 0, rc, outfile);
4832 fprintf(outfile, " (%d) ", rc);
4833 PCHARSV(cnptr, 0, -1, outfile);
4834 putc('\n', outfile);
4835 }
4836
4837 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4838 }
4839
4840 for (i = 0; i < 32; i++)
4841 {
4842 if ((getstrings & (1 << i)) != 0)
4843 {
4844 int rc;
4845 const char *substring;
4846 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4847 if (rc < 0)
4848 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4849 else
4850 {
4851 fprintf(outfile, "%2dG ", i);
4852 PCHARSV(substring, 0, rc, outfile);
4853 fprintf(outfile, " (%d)\n", rc);
4854 PCRE_FREE_SUBSTRING(substring);
4855 }
4856 }
4857 }
4858
4859 gnptr = getnames;
4860 for (;;)
4861 {
4862 int rc;
4863 const char *substring;
4864
4865 if (pcre_mode == PCRE16_MODE)
4866 {
4867 if (*(pcre_uint16 *)gnptr == 0) break;
4868 }
4869 else
4870 {
4871 if (*(pcre_uint8 *)gnptr == 0) break;
4872 }
4873
4874 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4875 gnptr, &substring);
4876 if (rc < 0)
4877 {
4878 fprintf(outfile, "get substring ");
4879 PCHARSV(gnptr, 0, -1, outfile);
4880 fprintf(outfile, " failed %d\n", rc);
4881 }
4882 else
4883 {
4884 fprintf(outfile, " G ");
4885 PCHARSV(substring, 0, rc, outfile);
4886 fprintf(outfile, " (%d) ", rc);
4887 PCHARSV(gnptr, 0, -1, outfile);
4888 PCRE_FREE_SUBSTRING(substring);
4889 putc('\n', outfile);
4890 }
4891
4892 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4893 }
4894
4895 if (getlist)
4896 {
4897 int rc;
4898 const char **stringlist;
4899 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4900 if (rc < 0)
4901 fprintf(outfile, "get substring list failed %d\n", rc);
4902 else
4903 {
4904 for (i = 0; i < count; i++)
4905 {
4906 fprintf(outfile, "%2dL ", i);
4907 PCHARSV(stringlist[i], 0, -1, outfile);
4908 putc('\n', outfile);
4909 }
4910 if (stringlist[i] != NULL)
4911 fprintf(outfile, "string list not terminated by NULL\n");
4912 PCRE_FREE_SUBSTRING_LIST(stringlist);
4913 }
4914 }
4915 }
4916
4917 /* There was a partial match */
4918
4919 else if (count == PCRE_ERROR_PARTIAL)
4920 {
4921 if (markptr == NULL) fprintf(outfile, "Partial match");
4922 else
4923 {
4924 fprintf(outfile, "Partial match, mark=");
4925 PCHARSV(markptr, 0, -1, outfile);
4926 }
4927 if (use_size_offsets > 1)
4928 {
4929 fprintf(outfile, ": ");
4930 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4931 outfile);
4932 }
4933 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4934 fprintf(outfile, "\n");
4935 break; /* Out of the /g loop */
4936 }
4937
4938 /* Failed to match. If this is a /g or /G loop and we previously set
4939 g_notempty after a null match, this is not necessarily the end. We want
4940 to advance the start offset, and continue. We won't be at the end of the
4941 string - that was checked before setting g_notempty.
4942
4943 Complication arises in the case when the newline convention is "any",
4944 "crlf", or "anycrlf". If the previous match was at the end of a line
4945 terminated by CRLF, an advance of one character just passes the \r,
4946 whereas we should prefer the longer newline sequence, as does the code in
4947 pcre_exec(). Fudge the offset value to achieve this. We check for a
4948 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4949 find the default.
4950
4951 Otherwise, in the case of UTF-8 matching, the advance must be one
4952 character, not one byte. */
4953
4954 else
4955 {
4956 if (g_notempty != 0)
4957 {
4958 int onechar = 1;
4959 unsigned int obits = REAL_PCRE_OPTIONS(re);
4960 use_offsets[0] = start_offset;
4961 if ((obits & PCRE_NEWLINE_BITS) == 0)
4962 {
4963 int d;
4964 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4965 /* Note that these values are always the ASCII ones, even in
4966 EBCDIC environments. CR = 13, NL = 10. */
4967 obits = (d == 13)? PCRE_NEWLINE_CR :
4968 (d == 10)? PCRE_NEWLINE_LF :
4969 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4970 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4971 (d == -1)? PCRE_NEWLINE_ANY : 0;
4972 }
4973 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4974 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4975 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4976 &&
4977 start_offset < len - 1 && (
4978 #ifdef SUPPORT_PCRE8
4979 (pcre_mode == PCRE8_MODE &&
4980 bptr[start_offset] == '\r' &&
4981 bptr[start_offset + 1] == '\n') ||
4982 #endif
4983 #ifdef SUPPORT_PCRE16
4984 (pcre_mode == PCRE16_MODE &&
4985 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
4986 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
4987 #endif
4988 #ifdef SUPPORT_PCRE32
4989 (pcre_mode == PCRE32_MODE &&
4990 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
4991 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
4992 #endif
4993 0))
4994 onechar++;
4995 else if (use_utf)
4996 {
4997 while (start_offset + onechar < len)
4998 {
4999 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5000 onechar++;
5001 }
5002 }
5003 use_offsets[1] = start_offset + onechar;
5004 }
5005 else
5006 {
5007 switch(count)
5008 {
5009 case PCRE_ERROR_NOMATCH:
5010 if (gmatched == 0)
5011 {
5012 if (markptr == NULL)
5013 {
5014 fprintf(outfile, "No match");
5015 }
5016 else
5017 {
5018 fprintf(outfile, "No match, mark = ");
5019 PCHARSV(markptr, 0, -1, outfile);
5020 }
5021 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5022 putc('\n', outfile);
5023 }
5024 break;
5025
5026 case PCRE_ERROR_BADUTF8:
5027 case PCRE_ERROR_SHORTUTF8:
5028 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5029 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5030 8 * CHAR_SIZE);
5031 if (use_size_offsets >= 2)
5032 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5033 use_offsets[1]);
5034 fprintf(outfile, "\n");
5035 break;
5036
5037 case PCRE_ERROR_BADUTF8_OFFSET:
5038 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5039 8 * CHAR_SIZE);
5040 break;
5041
5042 default:
5043 if (count < 0 &&
5044 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5045 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5046 else
5047 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5048 break;
5049 }
5050
5051 break; /* Out of the /g loop */
5052 }
5053 }
5054
5055 /* If not /g or /G we are done */
5056
5057 if (!do_g && !do_G) break;
5058
5059 /* If we have matched an empty string, first check to see if we are at
5060 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5061 Perl's /g options does. This turns out to be rather cunning. First we set
5062 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5063 same point. If this fails (picked up above) we advance to the next
5064 character. */
5065
5066 g_notempty = 0;
5067
5068 if (use_offsets[0] == use_offsets[1])
5069 {
5070 if (use_offsets[0] == len) break;
5071 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5072 }
5073
5074 /* For /g, update the start offset, leaving the rest alone */
5075
5076 if (do_g) start_offset = use_offsets[1];
5077
5078 /* For /G, update the pointer and length */
5079
5080 else
5081 {
5082 bptr += use_offsets[1] * CHAR_SIZE;
5083 len -= use_offsets[1];
5084 }
5085 } /* End of loop for /g and /G */
5086
5087 NEXT_DATA: continue;
5088 } /* End of loop for data lines */
5089
5090 CONTINUE:
5091
5092 #if !defined NOPOSIX
5093 if (posix || do_posix) regfree(&preg);
5094 #endif
5095
5096 if (re != NULL) new_free(re);
5097 if (extra != NULL)
5098 {
5099 PCRE_FREE_STUDY(extra);
5100 }
5101 if (locale_set)
5102 {
5103 new_free((void *)tables);
5104 setlocale(LC_CTYPE, "C");
5105 locale_set = 0;
5106 }
5107 if (jit_stack != NULL)
5108 {
5109 PCRE_JIT_STACK_FREE(jit_stack);
5110 jit_stack = NULL;
5111 }
5112 }
5113
5114 if (infile == stdin) fprintf(outfile, "\n");
5115
5116 EXIT:
5117
5118 if (infile != NULL && infile != stdin) fclose(infile);
5119 if (outfile != NULL && outfile != stdout) fclose(outfile);
5120
5121 free(buffer);
5122 free(dbuffer);
5123 free(pbuffer);
5124 free(offsets);
5125
5126 #ifdef SUPPORT_PCRE16
5127 if (buffer16 != NULL) free(buffer16);
5128 #endif
5129 #ifdef SUPPORT_PCRE32
5130 if (buffer32 != NULL) free(buffer32);
5131 #endif
5132
5133 #if !defined NODFA
5134 if (dfa_workspace != NULL)
5135 free(dfa_workspace);
5136 #endif
5137
5138 return yield;
5139 }
5140
5141 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5