/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1079 - (show annotations)
Tue Oct 16 15:55:04 2012 UTC (7 years ago) by chpe
File MIME type: text/plain
File size: 153615 byte(s)
pcre32: pcretest: Make pchar 32-bit clean

Use pcre_uint32 for characters.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136
137 #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138 /* Configure internal macros to 32 bit mode. */
139 #define COMPILE_PCRE32
140 #endif
141 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142 /* Configure internal macros to 16 bit mode. */
143 #define COMPILE_PCRE16
144 #endif
145 #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146 /* Configure internal macros to 16 bit mode. */
147 #define COMPILE_PCRE8
148 #endif
149
150 #include "pcre_internal.h"
151
152 /* The pcre_printint() function, which prints the internal form of a compiled
153 regex, is held in a separate file so that (a) it can be compiled in either
154 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
155 when that is compiled in debug mode. */
156
157 #ifdef SUPPORT_PCRE8
158 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159 #endif
160 #ifdef SUPPORT_PCRE16
161 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162 #endif
163 #ifdef SUPPORT_PCRE32
164 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165 #endif
166
167 /* We need access to some of the data tables that PCRE uses. So as not to have
168 to keep two copies, we include the source files here, changing the names of the
169 external symbols to prevent clashes. */
170
171 #define PCRE_INCLUDED
172
173 #include "pcre_tables.c"
174 #include "pcre_ucd.c"
175
176 /* The definition of the macro PRINTABLE, which determines whether to print an
177 output character as-is or as a hex value when showing compiled patterns, is
178 the same as in the printint.src file. We uses it here in cases when the locale
179 has not been explicitly changed, so as to get consistent output from systems
180 that differ in their output from isprint() even in the "C" locale. */
181
182 #ifdef EBCDIC
183 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
184 #else
185 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
186 #endif
187
188 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189
190 /* Posix support is disabled in 16 or 32 bit only mode. */
191 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192 #define NOPOSIX
193 #endif
194
195 /* It is possible to compile this test program without including support for
196 testing the POSIX interface, though this is not available via the standard
197 Makefile. */
198
199 #if !defined NOPOSIX
200 #include "pcreposix.h"
201 #endif
202
203 /* It is also possible, originally for the benefit of a version that was
204 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
205 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
206 automatically cut out the UTF support if PCRE is built without it. */
207
208 #ifndef SUPPORT_UTF
209 #ifndef NOUTF
210 #define NOUTF
211 #endif
212 #endif
213
214 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
215 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216 only from one place and is handled differently). I couldn't dream up any way of
217 using a single macro to do this in a generic way, because of the many different
218 argument requirements. We know that at least one of SUPPORT_PCRE8 and
219 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
220 use these in the definitions of generic macros.
221
222 **** Special note about the PCHARSxxx macros: the address of the string to be
223 printed is always given as two arguments: a base address followed by an offset.
224 The base address is cast to the correct data size for 8 or 16 bit data; the
225 offset is in units of this size. If the string were given as base+offset in one
226 argument, the casting might be incorrectly applied. */
227
228 #ifdef SUPPORT_PCRE8
229
230 #define PCHARS8(lv, p, offset, len, f) \
231 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
232
233 #define PCHARSV8(p, offset, len, f) \
234 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235
236 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237 p = read_capture_name8(p, cn8, re)
238
239 #define STRLEN8(p) ((int)strlen((char *)p))
240
241 #define SET_PCRE_CALLOUT8(callout) \
242 pcre_callout = callout
243
244 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
245 pcre_assign_jit_stack(extra, callback, userdata)
246
247 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
248 re = pcre_compile((char *)pat, options, error, erroffset, tables)
249
250 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251 namesptr, cbuffer, size) \
252 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
253 (char *)namesptr, cbuffer, size)
254
255 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
256 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
257
258 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259 offsets, size_offsets, workspace, size_workspace) \
260 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
261 offsets, size_offsets, workspace, size_workspace)
262
263 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
264 offsets, size_offsets) \
265 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
266 offsets, size_offsets)
267
268 #define PCRE_FREE_STUDY8(extra) \
269 pcre_free_study(extra)
270
271 #define PCRE_FREE_SUBSTRING8(substring) \
272 pcre_free_substring(substring)
273
274 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
275 pcre_free_substring_list(listptr)
276
277 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
278 getnamesptr, subsptr) \
279 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
280 (char *)getnamesptr, subsptr)
281
282 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
283 n = pcre_get_stringnumber(re, (char *)ptr)
284
285 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
286 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
287
288 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
289 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
290
291 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
292 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
293
294 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
295 pcre_printint(re, outfile, debug_lengths)
296
297 #define PCRE_STUDY8(extra, re, options, error) \
298 extra = pcre_study(re, options, error)
299
300 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
301 pcre_jit_stack_alloc(startsize, maxsize)
302
303 #define PCRE_JIT_STACK_FREE8(stack) \
304 pcre_jit_stack_free(stack)
305
306 #endif /* SUPPORT_PCRE8 */
307
308 /* -----------------------------------------------------------*/
309
310 #ifdef SUPPORT_PCRE16
311
312 #define PCHARS16(lv, p, offset, len, f) \
313 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
314
315 #define PCHARSV16(p, offset, len, f) \
316 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
317
318 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319 p = read_capture_name16(p, cn16, re)
320
321 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
322
323 #define SET_PCRE_CALLOUT16(callout) \
324 pcre16_callout = (int (*)(pcre16_callout_block *))callout
325
326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327 pcre16_assign_jit_stack((pcre16_extra *)extra, \
328 (pcre16_jit_callback)callback, userdata)
329
330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332 tables)
333
334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335 namesptr, cbuffer, size) \
336 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338
339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341 (PCRE_UCHAR16 *)cbuffer, size/2)
342
343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344 offsets, size_offsets, workspace, size_workspace) \
345 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347 workspace, size_workspace)
348
349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350 offsets, size_offsets) \
351 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352 len, start_offset, options, offsets, size_offsets)
353
354 #define PCRE_FREE_STUDY16(extra) \
355 pcre16_free_study((pcre16_extra *)extra)
356
357 #define PCRE_FREE_SUBSTRING16(substring) \
358 pcre16_free_substring((PCRE_SPTR16)substring)
359
360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362
363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364 getnamesptr, subsptr) \
365 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367
368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370
371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373 (PCRE_SPTR16 *)(void*)subsptr)
374
375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377 (PCRE_SPTR16 **)(void*)listptr)
378
379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381 tables)
382
383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384 pcre16_printint(re, outfile, debug_lengths)
385
386 #define PCRE_STUDY16(extra, re, options, error) \
387 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388
389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391
392 #define PCRE_JIT_STACK_FREE16(stack) \
393 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394
395 #endif /* SUPPORT_PCRE16 */
396
397 /* -----------------------------------------------------------*/
398
399 #ifdef SUPPORT_PCRE32
400
401 #define PCHARS32(lv, p, offset, len, f) \
402 lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)
403
404 #define PCHARSV32(p, offset, len, f) \
405 (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)
406
407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408 p = read_capture_name32(p, cn32, re)
409
410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411
412 #define SET_PCRE_CALLOUT32(callout) \
413 pcre32_callout = (int (*)(pcre32_callout_block *))callout
414
415 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416 pcre32_assign_jit_stack((pcre32_extra *)extra, \
417 (pcre32_jit_callback)callback, userdata)
418
419 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421 tables)
422
423 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424 namesptr, cbuffer, size) \
425 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427
428 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430 (PCRE_UCHAR32 *)cbuffer, size/2)
431
432 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433 offsets, size_offsets, workspace, size_workspace) \
434 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436 workspace, size_workspace)
437
438 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439 offsets, size_offsets) \
440 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441 len, start_offset, options, offsets, size_offsets)
442
443 #define PCRE_FREE_STUDY32(extra) \
444 pcre32_free_study((pcre32_extra *)extra)
445
446 #define PCRE_FREE_SUBSTRING32(substring) \
447 pcre32_free_substring((PCRE_SPTR32)substring)
448
449 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
451
452 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453 getnamesptr, subsptr) \
454 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456
457 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
459
460 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462 (PCRE_SPTR32 *)(void*)subsptr)
463
464 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466 (PCRE_SPTR32 **)(void*)listptr)
467
468 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470 tables)
471
472 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473 pcre32_printint(re, outfile, debug_lengths)
474
475 #define PCRE_STUDY32(extra, re, options, error) \
476 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477
478 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480
481 #define PCRE_JIT_STACK_FREE32(stack) \
482 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483
484 #endif /* SUPPORT_PCRE32 */
485
486
487 /* ----- Both modes are supported; a runtime test is needed, except for
488 pcre_config(), and the JIT stack functions, when it doesn't matter which
489 version is called. ----- */
490
491 enum {
492 PCRE8_MODE,
493 PCRE16_MODE,
494 PCRE32_MODE
495 };
496
497 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2
498
499 #define CHAR_SIZE (1 << pcre_mode)
500
501 #define PCHARS(lv, p, offset, len, f) \
502 if (pcre_mode == PCRE32_MODE) \
503 PCHARS32(lv, p, offset, len, f); \
504 else if (pcre_mode == PCRE16_MODE) \
505 PCHARS16(lv, p, offset, len, f); \
506 else \
507 PCHARS8(lv, p, offset, len, f)
508
509 #define PCHARSV(p, offset, len, f) \
510 if (pcre_mode == PCRE32_MODE) \
511 PCHARSV32(p, offset, len, f); \
512 else if (pcre_mode == PCRE16_MODE) \
513 PCHARSV16(p, offset, len, f); \
514 else \
515 PCHARSV8(p, offset, len, f)
516
517 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
518 if (pcre_mode == PCRE32_MODE) \
519 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
520 else if (pcre_mode == PCRE16_MODE) \
521 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
522 else \
523 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
524
525 #define SET_PCRE_CALLOUT(callout) \
526 if (pcre_mode == PCRE32_MODE) \
527 SET_PCRE_CALLOUT32(callout); \
528 else if (pcre_mode == PCRE16_MODE) \
529 SET_PCRE_CALLOUT16(callout); \
530 else \
531 SET_PCRE_CALLOUT8(callout)
532
533 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
534
535 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
536 if (pcre_mode == PCRE32_MODE) \
537 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
538 else if (pcre_mode == PCRE16_MODE) \
539 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
540 else \
541 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
542
543 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
544 if (pcre_mode == PCRE32_MODE) \
545 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
546 else if (pcre_mode == PCRE16_MODE) \
547 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
548 else \
549 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
550
551 #define PCRE_CONFIG pcre_config
552
553 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
554 namesptr, cbuffer, size) \
555 if (pcre_mode == PCRE32_MODE) \
556 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
557 namesptr, cbuffer, size); \
558 else if (pcre_mode == PCRE16_MODE) \
559 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
560 namesptr, cbuffer, size); \
561 else \
562 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
563 namesptr, cbuffer, size)
564
565 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
566 if (pcre_mode == PCRE32_MODE) \
567 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
568 else if (pcre_mode == PCRE16_MODE) \
569 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
570 else \
571 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
572
573 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
574 offsets, size_offsets, workspace, size_workspace) \
575 if (pcre_mode == PCRE32_MODE) \
576 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
577 offsets, size_offsets, workspace, size_workspace); \
578 else if (pcre_mode == PCRE16_MODE) \
579 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
580 offsets, size_offsets, workspace, size_workspace); \
581 else \
582 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets, workspace, size_workspace)
584
585 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets) \
587 if (pcre_mode == PCRE32_MODE) \
588 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets); \
590 else if (pcre_mode == PCRE16_MODE) \
591 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets); \
593 else \
594 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
595 offsets, size_offsets)
596
597 #define PCRE_FREE_STUDY(extra) \
598 if (pcre_mode == PCRE32_MODE) \
599 PCRE_FREE_STUDY32(extra); \
600 else if (pcre_mode == PCRE16_MODE) \
601 PCRE_FREE_STUDY16(extra); \
602 else \
603 PCRE_FREE_STUDY8(extra)
604
605 #define PCRE_FREE_SUBSTRING(substring) \
606 if (pcre_mode == PCRE32_MODE) \
607 PCRE_FREE_SUBSTRING32(substring); \
608 else if (pcre_mode == PCRE16_MODE) \
609 PCRE_FREE_SUBSTRING16(substring); \
610 else \
611 PCRE_FREE_SUBSTRING8(substring)
612
613 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
614 if (pcre_mode == PCRE32_MODE) \
615 PCRE_FREE_SUBSTRING_LIST32(listptr); \
616 else if (pcre_mode == PCRE16_MODE) \
617 PCRE_FREE_SUBSTRING_LIST16(listptr); \
618 else \
619 PCRE_FREE_SUBSTRING_LIST8(listptr)
620
621 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
622 getnamesptr, subsptr) \
623 if (pcre_mode == PCRE32_MODE) \
624 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
625 getnamesptr, subsptr); \
626 else if (pcre_mode == PCRE16_MODE) \
627 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
628 getnamesptr, subsptr); \
629 else \
630 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
631 getnamesptr, subsptr)
632
633 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
634 if (pcre_mode == PCRE32_MODE) \
635 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
636 else if (pcre_mode == PCRE16_MODE) \
637 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
638 else \
639 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
640
641 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
642 if (pcre_mode == PCRE32_MODE) \
643 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
644 else if (pcre_mode == PCRE16_MODE) \
645 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
646 else \
647 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
648
649 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
650 if (pcre_mode == PCRE32_MODE) \
651 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
652 else if (pcre_mode == PCRE16_MODE) \
653 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
654 else \
655 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
656
657 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
658 (pcre_mode == PCRE32_MODE ? \
659 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
660 : pcre_mode == PCRE16_MODE ? \
661 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
662 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
663
664 #define PCRE_JIT_STACK_FREE(stack) \
665 if (pcre_mode == PCRE32_MODE) \
666 PCRE_JIT_STACK_FREE32(stack); \
667 else if (pcre_mode == PCRE16_MODE) \
668 PCRE_JIT_STACK_FREE16(stack); \
669 else \
670 PCRE_JIT_STACK_FREE8(stack)
671
672 #define PCRE_MAKETABLES \
673 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
674
675 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
676 if (pcre_mode == PCRE32_MODE) \
677 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
678 else if (pcre_mode == PCRE16_MODE) \
679 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
680 else \
681 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
682
683 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
684 if (pcre_mode == PCRE32_MODE) \
685 PCRE_PRINTINT32(re, outfile, debug_lengths); \
686 else if (pcre_mode == PCRE16_MODE) \
687 PCRE_PRINTINT16(re, outfile, debug_lengths); \
688 else \
689 PCRE_PRINTINT8(re, outfile, debug_lengths)
690
691 #define PCRE_STUDY(extra, re, options, error) \
692 if (pcre_mode == PCRE32_MODE) \
693 PCRE_STUDY32(extra, re, options, error); \
694 else if (pcre_mode == PCRE16_MODE) \
695 PCRE_STUDY16(extra, re, options, error); \
696 else \
697 PCRE_STUDY8(extra, re, options, error)
698
699 /* ----- Only 8-bit mode is supported ----- */
700
701 #elif defined SUPPORT_PCRE8
702 #define CHAR_SIZE 1
703 #define PCHARS PCHARS8
704 #define PCHARSV PCHARSV8
705 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
706 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
707 #define STRLEN STRLEN8
708 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
709 #define PCRE_COMPILE PCRE_COMPILE8
710 #define PCRE_CONFIG pcre_config
711 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
712 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
713 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
714 #define PCRE_EXEC PCRE_EXEC8
715 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
716 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
717 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
718 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
719 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
720 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
721 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
722 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
723 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
724 #define PCRE_MAKETABLES pcre_maketables()
725 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
726 #define PCRE_PRINTINT PCRE_PRINTINT8
727 #define PCRE_STUDY PCRE_STUDY8
728
729 /* ----- Only 16-bit mode is supported ----- */
730
731 #elif defined SUPPORT_PCRE16
732 #define CHAR_SIZE 2
733 #define PCHARS PCHARS16
734 #define PCHARSV PCHARSV16
735 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
736 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
737 #define STRLEN STRLEN16
738 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
739 #define PCRE_COMPILE PCRE_COMPILE16
740 #define PCRE_CONFIG pcre16_config
741 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
742 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
743 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
744 #define PCRE_EXEC PCRE_EXEC16
745 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
746 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
747 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
748 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
749 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
750 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
751 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
752 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
753 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
754 #define PCRE_MAKETABLES pcre16_maketables()
755 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
756 #define PCRE_PRINTINT PCRE_PRINTINT16
757 #define PCRE_STUDY PCRE_STUDY16
758
759 /* ----- Only 32-bit mode is supported ----- */
760
761 #elif defined SUPPORT_PCRE32
762 #define CHAR_SIZE 4
763 #define PCHARS PCHARS32
764 #define PCHARSV PCHARSV32
765 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
766 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
767 #define STRLEN STRLEN32
768 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
769 #define PCRE_COMPILE PCRE_COMPILE32
770 #define PCRE_CONFIG pcre32_config
771 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
772 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
773 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
774 #define PCRE_EXEC PCRE_EXEC32
775 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
776 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
777 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
778 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
779 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
780 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
781 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
782 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
783 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
784 #define PCRE_MAKETABLES pcre32_maketables()
785 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
786 #define PCRE_PRINTINT PCRE_PRINTINT32
787 #define PCRE_STUDY PCRE_STUDY32
788
789 #endif
790
791 /* ----- End of mode-specific function call macros ----- */
792
793
794 /* Other parameters */
795
796 #ifndef CLOCKS_PER_SEC
797 #ifdef CLK_TCK
798 #define CLOCKS_PER_SEC CLK_TCK
799 #else
800 #define CLOCKS_PER_SEC 100
801 #endif
802 #endif
803
804 #if !defined NODFA
805 #define DFA_WS_DIMENSION 1000
806 #endif
807
808 /* This is the default loop count for timing. */
809
810 #define LOOPREPEAT 500000
811
812 /* Static variables */
813
814 static FILE *outfile;
815 static int log_store = 0;
816 static int callout_count;
817 static int callout_extra;
818 static int callout_fail_count;
819 static int callout_fail_id;
820 static int debug_lengths;
821 static int first_callout;
822 static int jit_was_used;
823 static int locale_set = 0;
824 static int show_malloc;
825 static int use_utf;
826 static size_t gotten_store;
827 static size_t first_gotten_store = 0;
828 static const unsigned char *last_callout_mark = NULL;
829
830 /* The buffers grow automatically if very long input lines are encountered. */
831
832 static int buffer_size = 50000;
833 static pcre_uint8 *buffer = NULL;
834 static pcre_uint8 *dbuffer = NULL;
835 static pcre_uint8 *pbuffer = NULL;
836
837 /* Another buffer is needed translation to 16-bit character strings. It will
838 obtained and extended as required. */
839
840 #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
841
842 /* We need the table of operator lengths that is used for 16/32-bit compiling, in
843 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
844 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
845 appropriately for the 16/32-bit world. Just as a safety check, make sure that
846 COMPILE_PCRE[16|32] is *not* set. */
847
848 #ifdef COMPILE_PCRE16
849 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
850 #endif
851
852 #ifdef COMPILE_PCRE32
853 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
854 #endif
855
856 #if LINK_SIZE == 2
857 #undef LINK_SIZE
858 #define LINK_SIZE 1
859 #elif LINK_SIZE == 3 || LINK_SIZE == 4
860 #undef LINK_SIZE
861 #define LINK_SIZE 2
862 #else
863 #error LINK_SIZE must be either 2, 3, or 4
864 #endif
865
866 #undef IMM2_SIZE
867 #define IMM2_SIZE 1
868
869 #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
870
871 #ifdef SUPPORT_PCRE16
872 static int buffer16_size = 0;
873 static pcre_uint16 *buffer16 = NULL;
874 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
875 #endif /* SUPPORT_PCRE16 */
876
877 #ifdef SUPPORT_PCRE32
878 static int buffer32_size = 0;
879 static pcre_uint32 *buffer32 = NULL;
880 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
881 #endif /* SUPPORT_PCRE32 */
882
883 /* If we have 8-bit support, default to it; if there is also
884 16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
885 there must be 16-or 32-bit support, so default it to 1. */
886
887 #if defined SUPPORT_PCRE8
888 static int pcre_mode = PCRE8_MODE;
889 #elif defined SUPPORT_PCRE16
890 static int pcre_mode = PCRE16_MODE;
891 #elif defined SUPPORT_PCRE32
892 static int pcre_mode = PCRE32_MODE;
893 #endif
894
895 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
896
897 static int jit_study_bits[] =
898 {
899 PCRE_STUDY_JIT_COMPILE,
900 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
901 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
902 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
903 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
904 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
905 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
906 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
907 };
908
909 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
910 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
911
912 /* Textual explanations for runtime error codes */
913
914 static const char *errtexts[] = {
915 NULL, /* 0 is no error */
916 NULL, /* NOMATCH is handled specially */
917 "NULL argument passed",
918 "bad option value",
919 "magic number missing",
920 "unknown opcode - pattern overwritten?",
921 "no more memory",
922 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
923 "match limit exceeded",
924 "callout error code",
925 NULL, /* BADUTF8/16 is handled specially */
926 NULL, /* BADUTF8/16 offset is handled specially */
927 NULL, /* PARTIAL is handled specially */
928 "not used - internal error",
929 "internal error - pattern overwritten?",
930 "bad count value",
931 "item unsupported for DFA matching",
932 "backreference condition or recursion test not supported for DFA matching",
933 "match limit not supported for DFA matching",
934 "workspace size exceeded in DFA matching",
935 "too much recursion for DFA matching",
936 "recursion limit exceeded",
937 "not used - internal error",
938 "invalid combination of newline options",
939 "bad offset value",
940 NULL, /* SHORTUTF8/16 is handled specially */
941 "nested recursion at the same subject position",
942 "JIT stack limit reached",
943 "pattern compiled in wrong mode: 8-bit/16-bit error",
944 "pattern compiled with other endianness",
945 "invalid data in workspace for DFA restart"
946 };
947
948
949 /*************************************************
950 * Alternate character tables *
951 *************************************************/
952
953 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
954 using the default tables of the library. However, the T option can be used to
955 select alternate sets of tables, for different kinds of testing. Note also that
956 the L (locale) option also adjusts the tables. */
957
958 /* This is the set of tables distributed as default with PCRE. It recognizes
959 only ASCII characters. */
960
961 static const pcre_uint8 tables0[] = {
962
963 /* This table is a lower casing table. */
964
965 0, 1, 2, 3, 4, 5, 6, 7,
966 8, 9, 10, 11, 12, 13, 14, 15,
967 16, 17, 18, 19, 20, 21, 22, 23,
968 24, 25, 26, 27, 28, 29, 30, 31,
969 32, 33, 34, 35, 36, 37, 38, 39,
970 40, 41, 42, 43, 44, 45, 46, 47,
971 48, 49, 50, 51, 52, 53, 54, 55,
972 56, 57, 58, 59, 60, 61, 62, 63,
973 64, 97, 98, 99,100,101,102,103,
974 104,105,106,107,108,109,110,111,
975 112,113,114,115,116,117,118,119,
976 120,121,122, 91, 92, 93, 94, 95,
977 96, 97, 98, 99,100,101,102,103,
978 104,105,106,107,108,109,110,111,
979 112,113,114,115,116,117,118,119,
980 120,121,122,123,124,125,126,127,
981 128,129,130,131,132,133,134,135,
982 136,137,138,139,140,141,142,143,
983 144,145,146,147,148,149,150,151,
984 152,153,154,155,156,157,158,159,
985 160,161,162,163,164,165,166,167,
986 168,169,170,171,172,173,174,175,
987 176,177,178,179,180,181,182,183,
988 184,185,186,187,188,189,190,191,
989 192,193,194,195,196,197,198,199,
990 200,201,202,203,204,205,206,207,
991 208,209,210,211,212,213,214,215,
992 216,217,218,219,220,221,222,223,
993 224,225,226,227,228,229,230,231,
994 232,233,234,235,236,237,238,239,
995 240,241,242,243,244,245,246,247,
996 248,249,250,251,252,253,254,255,
997
998 /* This table is a case flipping table. */
999
1000 0, 1, 2, 3, 4, 5, 6, 7,
1001 8, 9, 10, 11, 12, 13, 14, 15,
1002 16, 17, 18, 19, 20, 21, 22, 23,
1003 24, 25, 26, 27, 28, 29, 30, 31,
1004 32, 33, 34, 35, 36, 37, 38, 39,
1005 40, 41, 42, 43, 44, 45, 46, 47,
1006 48, 49, 50, 51, 52, 53, 54, 55,
1007 56, 57, 58, 59, 60, 61, 62, 63,
1008 64, 97, 98, 99,100,101,102,103,
1009 104,105,106,107,108,109,110,111,
1010 112,113,114,115,116,117,118,119,
1011 120,121,122, 91, 92, 93, 94, 95,
1012 96, 65, 66, 67, 68, 69, 70, 71,
1013 72, 73, 74, 75, 76, 77, 78, 79,
1014 80, 81, 82, 83, 84, 85, 86, 87,
1015 88, 89, 90,123,124,125,126,127,
1016 128,129,130,131,132,133,134,135,
1017 136,137,138,139,140,141,142,143,
1018 144,145,146,147,148,149,150,151,
1019 152,153,154,155,156,157,158,159,
1020 160,161,162,163,164,165,166,167,
1021 168,169,170,171,172,173,174,175,
1022 176,177,178,179,180,181,182,183,
1023 184,185,186,187,188,189,190,191,
1024 192,193,194,195,196,197,198,199,
1025 200,201,202,203,204,205,206,207,
1026 208,209,210,211,212,213,214,215,
1027 216,217,218,219,220,221,222,223,
1028 224,225,226,227,228,229,230,231,
1029 232,233,234,235,236,237,238,239,
1030 240,241,242,243,244,245,246,247,
1031 248,249,250,251,252,253,254,255,
1032
1033 /* This table contains bit maps for various character classes. Each map is 32
1034 bytes long and the bits run from the least significant end of each byte. The
1035 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1036 graph, print, punct, and cntrl. Other classes are built from combinations. */
1037
1038 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1039 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1040 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1041 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1042
1043 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1044 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1045 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1046 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1047
1048 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1049 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1050 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1051 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1052
1053 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1054 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1055 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1056 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1057
1058 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1059 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1060 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1061 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1062
1063 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1064 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1065 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1066 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1067
1068 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1069 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1070 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1071 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1072
1073 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1074 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1075 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1076 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1077
1078 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1079 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1080 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1081 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1082
1083 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1084 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1085 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1086 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1087
1088 /* This table identifies various classes of character by individual bits:
1089 0x01 white space character
1090 0x02 letter
1091 0x04 decimal digit
1092 0x08 hexadecimal digit
1093 0x10 alphanumeric or '_'
1094 0x80 regular expression metacharacter or binary zero
1095 */
1096
1097 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1098 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1099 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1100 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1101 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1102 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1103 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1104 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1105 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1106 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1107 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1108 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1109 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1110 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1111 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1112 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1113 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1114 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1115 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1116 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1117 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1118 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1119 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1120 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1121 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1122 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1123 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1124 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1125 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1126 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1127 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1128 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1129
1130 /* This is a set of tables that came orginally from a Windows user. It seems to
1131 be at least an approximation of ISO 8859. In particular, there are characters
1132 greater than 128 that are marked as spaces, letters, etc. */
1133
1134 static const pcre_uint8 tables1[] = {
1135 0,1,2,3,4,5,6,7,
1136 8,9,10,11,12,13,14,15,
1137 16,17,18,19,20,21,22,23,
1138 24,25,26,27,28,29,30,31,
1139 32,33,34,35,36,37,38,39,
1140 40,41,42,43,44,45,46,47,
1141 48,49,50,51,52,53,54,55,
1142 56,57,58,59,60,61,62,63,
1143 64,97,98,99,100,101,102,103,
1144 104,105,106,107,108,109,110,111,
1145 112,113,114,115,116,117,118,119,
1146 120,121,122,91,92,93,94,95,
1147 96,97,98,99,100,101,102,103,
1148 104,105,106,107,108,109,110,111,
1149 112,113,114,115,116,117,118,119,
1150 120,121,122,123,124,125,126,127,
1151 128,129,130,131,132,133,134,135,
1152 136,137,138,139,140,141,142,143,
1153 144,145,146,147,148,149,150,151,
1154 152,153,154,155,156,157,158,159,
1155 160,161,162,163,164,165,166,167,
1156 168,169,170,171,172,173,174,175,
1157 176,177,178,179,180,181,182,183,
1158 184,185,186,187,188,189,190,191,
1159 224,225,226,227,228,229,230,231,
1160 232,233,234,235,236,237,238,239,
1161 240,241,242,243,244,245,246,215,
1162 248,249,250,251,252,253,254,223,
1163 224,225,226,227,228,229,230,231,
1164 232,233,234,235,236,237,238,239,
1165 240,241,242,243,244,245,246,247,
1166 248,249,250,251,252,253,254,255,
1167 0,1,2,3,4,5,6,7,
1168 8,9,10,11,12,13,14,15,
1169 16,17,18,19,20,21,22,23,
1170 24,25,26,27,28,29,30,31,
1171 32,33,34,35,36,37,38,39,
1172 40,41,42,43,44,45,46,47,
1173 48,49,50,51,52,53,54,55,
1174 56,57,58,59,60,61,62,63,
1175 64,97,98,99,100,101,102,103,
1176 104,105,106,107,108,109,110,111,
1177 112,113,114,115,116,117,118,119,
1178 120,121,122,91,92,93,94,95,
1179 96,65,66,67,68,69,70,71,
1180 72,73,74,75,76,77,78,79,
1181 80,81,82,83,84,85,86,87,
1182 88,89,90,123,124,125,126,127,
1183 128,129,130,131,132,133,134,135,
1184 136,137,138,139,140,141,142,143,
1185 144,145,146,147,148,149,150,151,
1186 152,153,154,155,156,157,158,159,
1187 160,161,162,163,164,165,166,167,
1188 168,169,170,171,172,173,174,175,
1189 176,177,178,179,180,181,182,183,
1190 184,185,186,187,188,189,190,191,
1191 224,225,226,227,228,229,230,231,
1192 232,233,234,235,236,237,238,239,
1193 240,241,242,243,244,245,246,215,
1194 248,249,250,251,252,253,254,223,
1195 192,193,194,195,196,197,198,199,
1196 200,201,202,203,204,205,206,207,
1197 208,209,210,211,212,213,214,247,
1198 216,217,218,219,220,221,222,255,
1199 0,62,0,0,1,0,0,0,
1200 0,0,0,0,0,0,0,0,
1201 32,0,0,0,1,0,0,0,
1202 0,0,0,0,0,0,0,0,
1203 0,0,0,0,0,0,255,3,
1204 126,0,0,0,126,0,0,0,
1205 0,0,0,0,0,0,0,0,
1206 0,0,0,0,0,0,0,0,
1207 0,0,0,0,0,0,255,3,
1208 0,0,0,0,0,0,0,0,
1209 0,0,0,0,0,0,12,2,
1210 0,0,0,0,0,0,0,0,
1211 0,0,0,0,0,0,0,0,
1212 254,255,255,7,0,0,0,0,
1213 0,0,0,0,0,0,0,0,
1214 255,255,127,127,0,0,0,0,
1215 0,0,0,0,0,0,0,0,
1216 0,0,0,0,254,255,255,7,
1217 0,0,0,0,0,4,32,4,
1218 0,0,0,128,255,255,127,255,
1219 0,0,0,0,0,0,255,3,
1220 254,255,255,135,254,255,255,7,
1221 0,0,0,0,0,4,44,6,
1222 255,255,127,255,255,255,127,255,
1223 0,0,0,0,254,255,255,255,
1224 255,255,255,255,255,255,255,127,
1225 0,0,0,0,254,255,255,255,
1226 255,255,255,255,255,255,255,255,
1227 0,2,0,0,255,255,255,255,
1228 255,255,255,255,255,255,255,127,
1229 0,0,0,0,255,255,255,255,
1230 255,255,255,255,255,255,255,255,
1231 0,0,0,0,254,255,0,252,
1232 1,0,0,248,1,0,0,120,
1233 0,0,0,0,254,255,255,255,
1234 0,0,128,0,0,0,128,0,
1235 255,255,255,255,0,0,0,0,
1236 0,0,0,0,0,0,0,128,
1237 255,255,255,255,0,0,0,0,
1238 0,0,0,0,0,0,0,0,
1239 128,0,0,0,0,0,0,0,
1240 0,1,1,0,1,1,0,0,
1241 0,0,0,0,0,0,0,0,
1242 0,0,0,0,0,0,0,0,
1243 1,0,0,0,128,0,0,0,
1244 128,128,128,128,0,0,128,0,
1245 28,28,28,28,28,28,28,28,
1246 28,28,0,0,0,0,0,128,
1247 0,26,26,26,26,26,26,18,
1248 18,18,18,18,18,18,18,18,
1249 18,18,18,18,18,18,18,18,
1250 18,18,18,128,128,0,128,16,
1251 0,26,26,26,26,26,26,18,
1252 18,18,18,18,18,18,18,18,
1253 18,18,18,18,18,18,18,18,
1254 18,18,18,128,128,0,0,0,
1255 0,0,0,0,0,1,0,0,
1256 0,0,0,0,0,0,0,0,
1257 0,0,0,0,0,0,0,0,
1258 0,0,0,0,0,0,0,0,
1259 1,0,0,0,0,0,0,0,
1260 0,0,18,0,0,0,0,0,
1261 0,0,20,20,0,18,0,0,
1262 0,20,18,0,0,0,0,0,
1263 18,18,18,18,18,18,18,18,
1264 18,18,18,18,18,18,18,18,
1265 18,18,18,18,18,18,18,0,
1266 18,18,18,18,18,18,18,18,
1267 18,18,18,18,18,18,18,18,
1268 18,18,18,18,18,18,18,18,
1269 18,18,18,18,18,18,18,0,
1270 18,18,18,18,18,18,18,18
1271 };
1272
1273
1274
1275
1276 #ifndef HAVE_STRERROR
1277 /*************************************************
1278 * Provide strerror() for non-ANSI libraries *
1279 *************************************************/
1280
1281 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1282 in their libraries, but can provide the same facility by this simple
1283 alternative function. */
1284
1285 extern int sys_nerr;
1286 extern char *sys_errlist[];
1287
1288 char *
1289 strerror(int n)
1290 {
1291 if (n < 0 || n >= sys_nerr) return "unknown error number";
1292 return sys_errlist[n];
1293 }
1294 #endif /* HAVE_STRERROR */
1295
1296
1297
1298 /*************************************************
1299 * Print newline configuration *
1300 *************************************************/
1301
1302 /*
1303 Arguments:
1304 rc the return code from PCRE_CONFIG_NEWLINE
1305 isc TRUE if called from "-C newline"
1306 Returns: nothing
1307 */
1308
1309 static void
1310 print_newline_config(int rc, BOOL isc)
1311 {
1312 const char *s = NULL;
1313 if (!isc) printf(" Newline sequence is ");
1314 switch(rc)
1315 {
1316 case CHAR_CR: s = "CR"; break;
1317 case CHAR_LF: s = "LF"; break;
1318 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1319 case -1: s = "ANY"; break;
1320 case -2: s = "ANYCRLF"; break;
1321
1322 default:
1323 printf("a non-standard value: 0x%04x\n", rc);
1324 return;
1325 }
1326
1327 printf("%s\n", s);
1328 }
1329
1330
1331
1332 /*************************************************
1333 * JIT memory callback *
1334 *************************************************/
1335
1336 static pcre_jit_stack* jit_callback(void *arg)
1337 {
1338 jit_was_used = TRUE;
1339 return (pcre_jit_stack *)arg;
1340 }
1341
1342
1343 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1344 /*************************************************
1345 * Convert UTF-8 string to value *
1346 *************************************************/
1347
1348 /* This function takes one or more bytes that represents a UTF-8 character,
1349 and returns the value of the character.
1350
1351 Argument:
1352 utf8bytes a pointer to the byte vector
1353 vptr a pointer to an int to receive the value
1354
1355 Returns: > 0 => the number of bytes consumed
1356 -6 to 0 => malformed UTF-8 character at offset = (-return)
1357 */
1358
1359 static int
1360 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1361 {
1362 int c = *utf8bytes++;
1363 int d = c;
1364 int i, j, s;
1365
1366 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1367 {
1368 if ((d & 0x80) == 0) break;
1369 d <<= 1;
1370 }
1371
1372 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1373 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1374
1375 /* i now has a value in the range 1-5 */
1376
1377 s = 6*i;
1378 d = (c & utf8_table3[i]) << s;
1379
1380 for (j = 0; j < i; j++)
1381 {
1382 c = *utf8bytes++;
1383 if ((c & 0xc0) != 0x80) return -(j+1);
1384 s -= 6;
1385 d |= (c & 0x3f) << s;
1386 }
1387
1388 /* Check that encoding was the correct unique one */
1389
1390 for (j = 0; j < utf8_table1_size; j++)
1391 if (d <= utf8_table1[j]) break;
1392 if (j != i) return -(i+1);
1393
1394 /* Valid value */
1395
1396 *vptr = d;
1397 return i+1;
1398 }
1399 #endif /* NOUTF || SUPPORT_PCRE16 */
1400
1401
1402
1403 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1404 /*************************************************
1405 * Convert character value to UTF-8 *
1406 *************************************************/
1407
1408 /* This function takes an integer value in the range 0 - 0x7fffffff
1409 and encodes it as a UTF-8 character in 0 to 6 bytes.
1410
1411 Arguments:
1412 cvalue the character value
1413 utf8bytes pointer to buffer for result - at least 6 bytes long
1414
1415 Returns: number of characters placed in the buffer
1416 */
1417
1418 static int
1419 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1420 {
1421 register int i, j;
1422 for (i = 0; i < utf8_table1_size; i++)
1423 if (cvalue <= utf8_table1[i]) break;
1424 utf8bytes += i;
1425 for (j = i; j > 0; j--)
1426 {
1427 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1428 cvalue >>= 6;
1429 }
1430 *utf8bytes = utf8_table2[i] | cvalue;
1431 return i + 1;
1432 }
1433 #endif
1434
1435
1436 #ifdef SUPPORT_PCRE16
1437 /*************************************************
1438 * Convert a string to 16-bit *
1439 *************************************************/
1440
1441 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1442 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1443 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1444 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1445 result is always left in buffer16.
1446
1447 Note that this function does not object to surrogate values. This is
1448 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1449 for the purpose of testing that they are correctly faulted.
1450
1451 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1452 in UTF-8 so that values greater than 255 can be handled.
1453
1454 Arguments:
1455 data TRUE if converting a data line; FALSE for a regex
1456 p points to a byte string
1457 utf true if UTF-8 (to be converted to UTF-16)
1458 len number of bytes in the string (excluding trailing zero)
1459
1460 Returns: number of 16-bit data items used (excluding trailing zero)
1461 OR -1 if a UTF-8 string is malformed
1462 OR -2 if a value > 0x10ffff is encountered
1463 OR -3 if a value > 0xffff is encountered when not in UTF mode
1464 */
1465
1466 static int
1467 to16(int data, pcre_uint8 *p, int utf, int len)
1468 {
1469 pcre_uint16 *pp;
1470
1471 if (buffer16_size < 2*len + 2)
1472 {
1473 if (buffer16 != NULL) free(buffer16);
1474 buffer16_size = 2*len + 2;
1475 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1476 if (buffer16 == NULL)
1477 {
1478 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1479 exit(1);
1480 }
1481 }
1482
1483 pp = buffer16;
1484
1485 if (!utf && !data)
1486 {
1487 while (len-- > 0) *pp++ = *p++;
1488 }
1489
1490 else
1491 {
1492 int c = 0;
1493 while (len > 0)
1494 {
1495 int chlen = utf82ord(p, &c);
1496 if (chlen <= 0) return -1;
1497 if (c > 0x10ffff) return -2;
1498 p += chlen;
1499 len -= chlen;
1500 if (c < 0x10000) *pp++ = c; else
1501 {
1502 if (!utf) return -3;
1503 c -= 0x10000;
1504 *pp++ = 0xD800 | (c >> 10);
1505 *pp++ = 0xDC00 | (c & 0x3ff);
1506 }
1507 }
1508 }
1509
1510 *pp = 0;
1511 return pp - buffer16;
1512 }
1513 #endif
1514
1515 #ifdef SUPPORT_PCRE32
1516 /*************************************************
1517 * Convert a string to 32-bit *
1518 *************************************************/
1519
1520 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1521 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1522 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1523 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1524 result is always left in buffer32.
1525
1526 Note that this function does not object to surrogate values. This is
1527 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1528 for the purpose of testing that they are correctly faulted.
1529
1530 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1531 in UTF-8 so that values greater than 255 can be handled.
1532
1533 Arguments:
1534 data TRUE if converting a data line; FALSE for a regex
1535 p points to a byte string
1536 utf true if UTF-8 (to be converted to UTF-32)
1537 len number of bytes in the string (excluding trailing zero)
1538
1539 Returns: number of 32-bit data items used (excluding trailing zero)
1540 OR -1 if a UTF-8 string is malformed
1541 OR -2 if a value > 0x10ffff is encountered
1542 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1543 */
1544
1545 static int
1546 to32(int data, pcre_uint8 *p, int utf, int len)
1547 {
1548 pcre_uint32 *pp;
1549
1550 if (buffer32_size < 4*len + 4)
1551 {
1552 if (buffer32 != NULL) free(buffer32);
1553 buffer32_size = 4*len + 4;
1554 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1555 if (buffer32 == NULL)
1556 {
1557 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1558 exit(1);
1559 }
1560 }
1561
1562 pp = buffer32;
1563
1564 if (!utf && !data)
1565 {
1566 while (len-- > 0) *pp++ = *p++;
1567 }
1568
1569 else
1570 {
1571 int c = 0;
1572 while (len > 0)
1573 {
1574 int chlen = utf82ord(p, &c);
1575 if (chlen <= 0) return -1;
1576 if (utf)
1577 {
1578 if (c > 0x10ffff) return -2;
1579 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1580 }
1581
1582 p += chlen;
1583 len -= chlen;
1584 *pp++ = c;
1585 }
1586 }
1587
1588 *pp = 0;
1589 return pp - buffer32;
1590 }
1591 #endif
1592
1593 /*************************************************
1594 * Read or extend an input line *
1595 *************************************************/
1596
1597 /* Input lines are read into buffer, but both patterns and data lines can be
1598 continued over multiple input lines. In addition, if the buffer fills up, we
1599 want to automatically expand it so as to be able to handle extremely large
1600 lines that are needed for certain stress tests. When the input buffer is
1601 expanded, the other two buffers must also be expanded likewise, and the
1602 contents of pbuffer, which are a copy of the input for callouts, must be
1603 preserved (for when expansion happens for a data line). This is not the most
1604 optimal way of handling this, but hey, this is just a test program!
1605
1606 Arguments:
1607 f the file to read
1608 start where in buffer to start (this *must* be within buffer)
1609 prompt for stdin or readline()
1610
1611 Returns: pointer to the start of new data
1612 could be a copy of start, or could be moved
1613 NULL if no data read and EOF reached
1614 */
1615
1616 static pcre_uint8 *
1617 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1618 {
1619 pcre_uint8 *here = start;
1620
1621 for (;;)
1622 {
1623 size_t rlen = (size_t)(buffer_size - (here - buffer));
1624
1625 if (rlen > 1000)
1626 {
1627 int dlen;
1628
1629 /* If libreadline or libedit support is required, use readline() to read a
1630 line if the input is a terminal. Note that readline() removes the trailing
1631 newline, so we must put it back again, to be compatible with fgets(). */
1632
1633 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1634 if (isatty(fileno(f)))
1635 {
1636 size_t len;
1637 char *s = readline(prompt);
1638 if (s == NULL) return (here == start)? NULL : start;
1639 len = strlen(s);
1640 if (len > 0) add_history(s);
1641 if (len > rlen - 1) len = rlen - 1;
1642 memcpy(here, s, len);
1643 here[len] = '\n';
1644 here[len+1] = 0;
1645 free(s);
1646 }
1647 else
1648 #endif
1649
1650 /* Read the next line by normal means, prompting if the file is stdin. */
1651
1652 {
1653 if (f == stdin) printf("%s", prompt);
1654 if (fgets((char *)here, rlen, f) == NULL)
1655 return (here == start)? NULL : start;
1656 }
1657
1658 dlen = (int)strlen((char *)here);
1659 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1660 here += dlen;
1661 }
1662
1663 else
1664 {
1665 int new_buffer_size = 2*buffer_size;
1666 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1667 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1668 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1669
1670 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1671 {
1672 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1673 exit(1);
1674 }
1675
1676 memcpy(new_buffer, buffer, buffer_size);
1677 memcpy(new_pbuffer, pbuffer, buffer_size);
1678
1679 buffer_size = new_buffer_size;
1680
1681 start = new_buffer + (start - buffer);
1682 here = new_buffer + (here - buffer);
1683
1684 free(buffer);
1685 free(dbuffer);
1686 free(pbuffer);
1687
1688 buffer = new_buffer;
1689 dbuffer = new_dbuffer;
1690 pbuffer = new_pbuffer;
1691 }
1692 }
1693
1694 return NULL; /* Control never gets here */
1695 }
1696
1697
1698
1699 /*************************************************
1700 * Read number from string *
1701 *************************************************/
1702
1703 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1704 around with conditional compilation, just do the job by hand. It is only used
1705 for unpicking arguments, so just keep it simple.
1706
1707 Arguments:
1708 str string to be converted
1709 endptr where to put the end pointer
1710
1711 Returns: the unsigned long
1712 */
1713
1714 static int
1715 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1716 {
1717 int result = 0;
1718 while(*str != 0 && isspace(*str)) str++;
1719 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1720 *endptr = str;
1721 return(result);
1722 }
1723
1724
1725
1726 /*************************************************
1727 * Print one character *
1728 *************************************************/
1729
1730 /* Print a single character either literally, or as a hex escape. */
1731
1732 static int pchar(pcre_uint32 c, FILE *f)
1733 {
1734 if (PRINTOK(c))
1735 {
1736 if (f != NULL) fprintf(f, "%c", c);
1737 return 1;
1738 }
1739
1740 if (c < 0x100)
1741 {
1742 if (use_utf)
1743 {
1744 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1745 return 6;
1746 }
1747 else
1748 {
1749 if (f != NULL) fprintf(f, "\\x%02x", c);
1750 return 4;
1751 }
1752 }
1753
1754 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1755 return (c <= 0x000000ff)? 6 :
1756 (c <= 0x00000fff)? 7 :
1757 (c <= 0x0000ffff)? 8 :
1758 (c <= 0x000fffff)? 9 : 10;
1759 }
1760
1761
1762
1763 #ifdef SUPPORT_PCRE8
1764 /*************************************************
1765 * Print 8-bit character string *
1766 *************************************************/
1767
1768 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1769 If handed a NULL file, just counts chars without printing. */
1770
1771 static int pchars(pcre_uint8 *p, int length, FILE *f)
1772 {
1773 int c = 0;
1774 int yield = 0;
1775
1776 if (length < 0)
1777 length = strlen((char *)p);
1778
1779 while (length-- > 0)
1780 {
1781 #if !defined NOUTF
1782 if (use_utf)
1783 {
1784 int rc = utf82ord(p, &c);
1785 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1786 {
1787 length -= rc - 1;
1788 p += rc;
1789 yield += pchar(c, f);
1790 continue;
1791 }
1792 }
1793 #endif
1794 c = *p++;
1795 yield += pchar(c, f);
1796 }
1797
1798 return yield;
1799 }
1800 #endif
1801
1802
1803
1804 #ifdef SUPPORT_PCRE16
1805 /*************************************************
1806 * Find length of 0-terminated 16-bit string *
1807 *************************************************/
1808
1809 static int strlen16(PCRE_SPTR16 p)
1810 {
1811 int len = 0;
1812 while (*p++ != 0) len++;
1813 return len;
1814 }
1815 #endif /* SUPPORT_PCRE16 */
1816
1817
1818
1819 #ifdef SUPPORT_PCRE32
1820 /*************************************************
1821 * Find length of 0-terminated 32-bit string *
1822 *************************************************/
1823
1824 static int strlen32(PCRE_SPTR32 p)
1825 {
1826 int len = 0;
1827 while (*p++ != 0) len++;
1828 return len;
1829 }
1830 #endif /* SUPPORT_PCRE32 */
1831
1832
1833
1834 #ifdef SUPPORT_PCRE16
1835 /*************************************************
1836 * Print 16-bit character string *
1837 *************************************************/
1838
1839 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1840 If handed a NULL file, just counts chars without printing. */
1841
1842 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1843 {
1844 int yield = 0;
1845
1846 if (length < 0)
1847 length = strlen16(p);
1848
1849 while (length-- > 0)
1850 {
1851 pcre_uint32 c = *p++ & 0xffff;
1852 #if !defined NOUTF
1853 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1854 {
1855 int d = *p & 0xffff;
1856 if (d >= 0xDC00 && d < 0xDFFF)
1857 {
1858 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1859 length--;
1860 p++;
1861 }
1862 }
1863 #endif
1864 yield += pchar(c, f);
1865 }
1866
1867 return yield;
1868 }
1869 #endif /* SUPPORT_PCRE16 */
1870
1871
1872
1873 #ifdef SUPPORT_PCRE32
1874 /*************************************************
1875 * Print 32-bit character string *
1876 *************************************************/
1877
1878 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
1879 If handed a NULL file, just counts chars without printing. */
1880
1881 static int pchars32(PCRE_SPTR32 p, int length, FILE *f)
1882 {
1883 int yield = 0;
1884
1885 if (length < 0)
1886 length = strlen32(p);
1887
1888 while (length-- > 0)
1889 {
1890 pcre_uint32 c = *p++;
1891 yield += pchar(c, f);
1892 }
1893
1894 return yield;
1895 }
1896 #endif /* SUPPORT_PCRE32 */
1897
1898
1899
1900 #ifdef SUPPORT_PCRE8
1901 /*************************************************
1902 * Read a capture name (8-bit) and check it *
1903 *************************************************/
1904
1905 static pcre_uint8 *
1906 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1907 {
1908 pcre_uint8 *npp = *pp;
1909 while (isalnum(*p)) *npp++ = *p++;
1910 *npp++ = 0;
1911 *npp = 0;
1912 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1913 {
1914 fprintf(outfile, "no parentheses with name \"");
1915 PCHARSV(*pp, 0, -1, outfile);
1916 fprintf(outfile, "\"\n");
1917 }
1918
1919 *pp = npp;
1920 return p;
1921 }
1922 #endif /* SUPPORT_PCRE8 */
1923
1924
1925
1926 #ifdef SUPPORT_PCRE16
1927 /*************************************************
1928 * Read a capture name (16-bit) and check it *
1929 *************************************************/
1930
1931 /* Note that the text being read is 8-bit. */
1932
1933 static pcre_uint8 *
1934 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1935 {
1936 pcre_uint16 *npp = *pp;
1937 while (isalnum(*p)) *npp++ = *p++;
1938 *npp++ = 0;
1939 *npp = 0;
1940 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1941 {
1942 fprintf(outfile, "no parentheses with name \"");
1943 PCHARSV(*pp, 0, -1, outfile);
1944 fprintf(outfile, "\"\n");
1945 }
1946 *pp = npp;
1947 return p;
1948 }
1949 #endif /* SUPPORT_PCRE16 */
1950
1951
1952
1953 #ifdef SUPPORT_PCRE32
1954 /*************************************************
1955 * Read a capture name (32-bit) and check it *
1956 *************************************************/
1957
1958 /* Note that the text being read is 8-bit. */
1959
1960 static pcre_uint8 *
1961 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
1962 {
1963 pcre_uint32 *npp = *pp;
1964 while (isalnum(*p)) *npp++ = *p++;
1965 *npp++ = 0;
1966 *npp = 0;
1967 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
1968 {
1969 fprintf(outfile, "no parentheses with name \"");
1970 PCHARSV(*pp, 0, -1, outfile);
1971 fprintf(outfile, "\"\n");
1972 }
1973 *pp = npp;
1974 return p;
1975 }
1976 #endif /* SUPPORT_PCRE32 */
1977
1978
1979
1980 /*************************************************
1981 * Callout function *
1982 *************************************************/
1983
1984 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1985 the match. Yield zero unless more callouts than the fail count, or the callout
1986 data is not zero. */
1987
1988 static int callout(pcre_callout_block *cb)
1989 {
1990 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1991 int i, pre_start, post_start, subject_length;
1992
1993 if (callout_extra)
1994 {
1995 fprintf(f, "Callout %d: last capture = %d\n",
1996 cb->callout_number, cb->capture_last);
1997
1998 for (i = 0; i < cb->capture_top * 2; i += 2)
1999 {
2000 if (cb->offset_vector[i] < 0)
2001 fprintf(f, "%2d: <unset>\n", i/2);
2002 else
2003 {
2004 fprintf(f, "%2d: ", i/2);
2005 PCHARSV(cb->subject, cb->offset_vector[i],
2006 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2007 fprintf(f, "\n");
2008 }
2009 }
2010 }
2011
2012 /* Re-print the subject in canonical form, the first time or if giving full
2013 datails. On subsequent calls in the same match, we use pchars just to find the
2014 printed lengths of the substrings. */
2015
2016 if (f != NULL) fprintf(f, "--->");
2017
2018 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2019 PCHARS(post_start, cb->subject, cb->start_match,
2020 cb->current_position - cb->start_match, f);
2021
2022 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2023
2024 PCHARSV(cb->subject, cb->current_position,
2025 cb->subject_length - cb->current_position, f);
2026
2027 if (f != NULL) fprintf(f, "\n");
2028
2029 /* Always print appropriate indicators, with callout number if not already
2030 shown. For automatic callouts, show the pattern offset. */
2031
2032 if (cb->callout_number == 255)
2033 {
2034 fprintf(outfile, "%+3d ", cb->pattern_position);
2035 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2036 }
2037 else
2038 {
2039 if (callout_extra) fprintf(outfile, " ");
2040 else fprintf(outfile, "%3d ", cb->callout_number);
2041 }
2042
2043 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2044 fprintf(outfile, "^");
2045
2046 if (post_start > 0)
2047 {
2048 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2049 fprintf(outfile, "^");
2050 }
2051
2052 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2053 fprintf(outfile, " ");
2054
2055 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2056 pbuffer + cb->pattern_position);
2057
2058 fprintf(outfile, "\n");
2059 first_callout = 0;
2060
2061 if (cb->mark != last_callout_mark)
2062 {
2063 if (cb->mark == NULL)
2064 fprintf(outfile, "Latest Mark: <unset>\n");
2065 else
2066 {
2067 fprintf(outfile, "Latest Mark: ");
2068 PCHARSV(cb->mark, 0, -1, outfile);
2069 putc('\n', outfile);
2070 }
2071 last_callout_mark = cb->mark;
2072 }
2073
2074 if (cb->callout_data != NULL)
2075 {
2076 int callout_data = *((int *)(cb->callout_data));
2077 if (callout_data != 0)
2078 {
2079 fprintf(outfile, "Callout data = %d\n", callout_data);
2080 return callout_data;
2081 }
2082 }
2083
2084 return (cb->callout_number != callout_fail_id)? 0 :
2085 (++callout_count >= callout_fail_count)? 1 : 0;
2086 }
2087
2088
2089 /*************************************************
2090 * Local malloc functions *
2091 *************************************************/
2092
2093 /* Alternative malloc function, to test functionality and save the size of a
2094 compiled re, which is the first store request that pcre_compile() makes. The
2095 show_malloc variable is set only during matching. */
2096
2097 static void *new_malloc(size_t size)
2098 {
2099 void *block = malloc(size);
2100 gotten_store = size;
2101 if (first_gotten_store == 0) first_gotten_store = size;
2102 if (show_malloc)
2103 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2104 return block;
2105 }
2106
2107 static void new_free(void *block)
2108 {
2109 if (show_malloc)
2110 fprintf(outfile, "free %p\n", block);
2111 free(block);
2112 }
2113
2114 /* For recursion malloc/free, to test stacking calls */
2115
2116 static void *stack_malloc(size_t size)
2117 {
2118 void *block = malloc(size);
2119 if (show_malloc)
2120 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2121 return block;
2122 }
2123
2124 static void stack_free(void *block)
2125 {
2126 if (show_malloc)
2127 fprintf(outfile, "stack_free %p\n", block);
2128 free(block);
2129 }
2130
2131
2132 /*************************************************
2133 * Call pcre_fullinfo() *
2134 *************************************************/
2135
2136 /* Get one piece of information from the pcre_fullinfo() function. When only
2137 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2138 value, but the code is defensive.
2139
2140 Arguments:
2141 re compiled regex
2142 study study data
2143 option PCRE_INFO_xxx option
2144 ptr where to put the data
2145
2146 Returns: 0 when OK, < 0 on error
2147 */
2148
2149 static int
2150 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2151 {
2152 int rc;
2153
2154 if (pcre_mode == PCRE32_MODE)
2155 #ifdef SUPPORT_PCRE32
2156 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2157 #else
2158 rc = PCRE_ERROR_BADMODE;
2159 #endif
2160 else if (pcre_mode == PCRE16_MODE)
2161 #ifdef SUPPORT_PCRE16
2162 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2163 #else
2164 rc = PCRE_ERROR_BADMODE;
2165 #endif
2166 else
2167 #ifdef SUPPORT_PCRE8
2168 rc = pcre_fullinfo(re, study, option, ptr);
2169 #else
2170 rc = PCRE_ERROR_BADMODE;
2171 #endif
2172
2173 if (rc < 0)
2174 {
2175 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2176 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2177 if (rc == PCRE_ERROR_BADMODE)
2178 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2179 "%d-bit mode\n", 8 * CHAR_SIZE,
2180 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2181 }
2182
2183 return rc;
2184 }
2185
2186
2187
2188 /*************************************************
2189 * Swap byte functions *
2190 *************************************************/
2191
2192 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2193 value, respectively.
2194
2195 Arguments:
2196 value any number
2197
2198 Returns: the byte swapped value
2199 */
2200
2201 static pcre_uint32
2202 swap_uint32(pcre_uint32 value)
2203 {
2204 return ((value & 0x000000ff) << 24) |
2205 ((value & 0x0000ff00) << 8) |
2206 ((value & 0x00ff0000) >> 8) |
2207 (value >> 24);
2208 }
2209
2210 static pcre_uint16
2211 swap_uint16(pcre_uint16 value)
2212 {
2213 return (value >> 8) | (value << 8);
2214 }
2215
2216
2217
2218 /*************************************************
2219 * Flip bytes in a compiled pattern *
2220 *************************************************/
2221
2222 /* This function is called if the 'F' option was present on a pattern that is
2223 to be written to a file. We flip the bytes of all the integer fields in the
2224 regex data block and the study block. In 16-bit mode this also flips relevant
2225 bytes in the pattern itself. This is to make it possible to test PCRE's
2226 ability to reload byte-flipped patterns, e.g. those compiled on a different
2227 architecture. */
2228
2229 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2230 static void
2231 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2232 {
2233 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2234 #ifdef SUPPORT_PCRE16
2235 int op;
2236 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2237 int length = re->name_count * re->name_entry_size;
2238 #ifdef SUPPORT_UTF
2239 BOOL utf = (re->options & PCRE_UTF16) != 0;
2240 BOOL utf16_char = FALSE;
2241 #endif /* SUPPORT_UTF */
2242 #endif /* SUPPORT_PCRE16 */
2243
2244 /* Always flip the bytes in the main data block and study blocks. */
2245
2246 re->magic_number = REVERSED_MAGIC_NUMBER;
2247 re->size = swap_uint32(re->size);
2248 re->options = swap_uint32(re->options);
2249 re->flags = swap_uint16(re->flags);
2250 re->top_bracket = swap_uint16(re->top_bracket);
2251 re->top_backref = swap_uint16(re->top_backref);
2252 re->first_char = swap_uint16(re->first_char);
2253 re->req_char = swap_uint16(re->req_char);
2254 re->name_table_offset = swap_uint16(re->name_table_offset);
2255 re->name_entry_size = swap_uint16(re->name_entry_size);
2256 re->name_count = swap_uint16(re->name_count);
2257
2258 if (extra != NULL)
2259 {
2260 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2261 rsd->size = swap_uint32(rsd->size);
2262 rsd->flags = swap_uint32(rsd->flags);
2263 rsd->minlength = swap_uint32(rsd->minlength);
2264 }
2265
2266 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2267 in the name table, if present, and then in the pattern itself. */
2268
2269 #ifdef SUPPORT_PCRE16
2270 if (pcre_mode != PCRE16_MODE) return;
2271
2272 while(TRUE)
2273 {
2274 /* Swap previous characters. */
2275 while (length-- > 0)
2276 {
2277 *ptr = swap_uint16(*ptr);
2278 ptr++;
2279 }
2280 #ifdef SUPPORT_UTF
2281 if (utf16_char)
2282 {
2283 if ((ptr[-1] & 0xfc00) == 0xd800)
2284 {
2285 /* We know that there is only one extra character in UTF-16. */
2286 *ptr = swap_uint16(*ptr);
2287 ptr++;
2288 }
2289 }
2290 utf16_char = FALSE;
2291 #endif /* SUPPORT_UTF */
2292
2293 /* Get next opcode. */
2294
2295 length = 0;
2296 op = *ptr;
2297 *ptr++ = swap_uint16(op);
2298
2299 switch (op)
2300 {
2301 case OP_END:
2302 return;
2303
2304 #ifdef SUPPORT_UTF
2305 case OP_CHAR:
2306 case OP_CHARI:
2307 case OP_NOT:
2308 case OP_NOTI:
2309 case OP_STAR:
2310 case OP_MINSTAR:
2311 case OP_PLUS:
2312 case OP_MINPLUS:
2313 case OP_QUERY:
2314 case OP_MINQUERY:
2315 case OP_UPTO:
2316 case OP_MINUPTO:
2317 case OP_EXACT:
2318 case OP_POSSTAR:
2319 case OP_POSPLUS:
2320 case OP_POSQUERY:
2321 case OP_POSUPTO:
2322 case OP_STARI:
2323 case OP_MINSTARI:
2324 case OP_PLUSI:
2325 case OP_MINPLUSI:
2326 case OP_QUERYI:
2327 case OP_MINQUERYI:
2328 case OP_UPTOI:
2329 case OP_MINUPTOI:
2330 case OP_EXACTI:
2331 case OP_POSSTARI:
2332 case OP_POSPLUSI:
2333 case OP_POSQUERYI:
2334 case OP_POSUPTOI:
2335 case OP_NOTSTAR:
2336 case OP_NOTMINSTAR:
2337 case OP_NOTPLUS:
2338 case OP_NOTMINPLUS:
2339 case OP_NOTQUERY:
2340 case OP_NOTMINQUERY:
2341 case OP_NOTUPTO:
2342 case OP_NOTMINUPTO:
2343 case OP_NOTEXACT:
2344 case OP_NOTPOSSTAR:
2345 case OP_NOTPOSPLUS:
2346 case OP_NOTPOSQUERY:
2347 case OP_NOTPOSUPTO:
2348 case OP_NOTSTARI:
2349 case OP_NOTMINSTARI:
2350 case OP_NOTPLUSI:
2351 case OP_NOTMINPLUSI:
2352 case OP_NOTQUERYI:
2353 case OP_NOTMINQUERYI:
2354 case OP_NOTUPTOI:
2355 case OP_NOTMINUPTOI:
2356 case OP_NOTEXACTI:
2357 case OP_NOTPOSSTARI:
2358 case OP_NOTPOSPLUSI:
2359 case OP_NOTPOSQUERYI:
2360 case OP_NOTPOSUPTOI:
2361 if (utf) utf16_char = TRUE;
2362 #endif
2363 /* Fall through. */
2364
2365 default:
2366 length = OP_lengths16[op] - 1;
2367 break;
2368
2369 case OP_CLASS:
2370 case OP_NCLASS:
2371 /* Skip the character bit map. */
2372 ptr += 32/sizeof(pcre_uint16);
2373 length = 0;
2374 break;
2375
2376 case OP_XCLASS:
2377 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2378 if (LINK_SIZE > 1)
2379 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2380 - (1 + LINK_SIZE + 1));
2381 else
2382 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2383
2384 /* Reverse the size of the XCLASS instance. */
2385 *ptr = swap_uint16(*ptr);
2386 ptr++;
2387 if (LINK_SIZE > 1)
2388 {
2389 *ptr = swap_uint16(*ptr);
2390 ptr++;
2391 }
2392
2393 op = *ptr;
2394 *ptr = swap_uint16(op);
2395 ptr++;
2396 if ((op & XCL_MAP) != 0)
2397 {
2398 /* Skip the character bit map. */
2399 ptr += 32/sizeof(pcre_uint16);
2400 length -= 32/sizeof(pcre_uint16);
2401 }
2402 break;
2403 }
2404 }
2405 /* Control should never reach here in 16 bit mode. */
2406 #endif /* SUPPORT_PCRE16 */
2407 }
2408 #endif /* SUPPORT_PCRE[8|16] */
2409
2410
2411
2412 #if defined SUPPORT_PCRE32
2413 static void
2414 regexflip_32(pcre *ere, pcre_extra *extra)
2415 {
2416 real_pcre32 *re = (real_pcre32 *)ere;
2417 int op;
2418 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2419 int length = re->name_count * re->name_entry_size;
2420 #ifdef SUPPORT_UTF
2421 BOOL utf = (re->options & PCRE_UTF32) != 0;
2422 #endif /* SUPPORT_UTF */
2423
2424 /* Always flip the bytes in the main data block and study blocks. */
2425
2426 re->magic_number = REVERSED_MAGIC_NUMBER;
2427 re->size = swap_uint32(re->size);
2428 re->options = swap_uint32(re->options);
2429 re->flags = swap_uint16(re->flags);
2430 re->top_bracket = swap_uint16(re->top_bracket);
2431 re->top_backref = swap_uint16(re->top_backref);
2432 re->first_char = swap_uint32(re->first_char);
2433 re->req_char = swap_uint32(re->req_char);
2434 re->name_table_offset = swap_uint16(re->name_table_offset);
2435 re->name_entry_size = swap_uint16(re->name_entry_size);
2436 re->name_count = swap_uint16(re->name_count);
2437
2438 if (extra != NULL)
2439 {
2440 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2441 rsd->size = swap_uint32(rsd->size);
2442 rsd->flags = swap_uint32(rsd->flags);
2443 rsd->minlength = swap_uint32(rsd->minlength);
2444 }
2445
2446 /* In 32-bit mode we must swap bytes
2447 in the name table, if present, and then in the pattern itself. */
2448
2449 while(TRUE)
2450 {
2451 /* Swap previous characters. */
2452 while (length-- > 0)
2453 {
2454 *ptr = swap_uint32(*ptr);
2455 ptr++;
2456 }
2457
2458 /* Get next opcode. */
2459
2460 length = 0;
2461 op = *ptr;
2462 *ptr++ = swap_uint32(op);
2463
2464 switch (op)
2465 {
2466 case OP_END:
2467 return;
2468
2469 default:
2470 length = OP_lengths32[op] - 1;
2471 break;
2472
2473 case OP_CLASS:
2474 case OP_NCLASS:
2475 /* Skip the character bit map. */
2476 ptr += 32/sizeof(pcre_uint32);
2477 length = 0;
2478 break;
2479
2480 case OP_XCLASS:
2481 /* LINK_SIZE can only be 1 in 32-bit mode. */
2482 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2483
2484 /* Reverse the size of the XCLASS instance. */
2485 *ptr = swap_uint32(*ptr);
2486 ptr++;
2487
2488 op = *ptr;
2489 *ptr = swap_uint32(op);
2490 ptr++;
2491 if ((op & XCL_MAP) != 0)
2492 {
2493 /* Skip the character bit map. */
2494 ptr += 32/sizeof(pcre_uint32);
2495 length -= 32/sizeof(pcre_uint32);
2496 }
2497 break;
2498 }
2499 }
2500 /* Control should never reach here in 32 bit mode. */
2501 }
2502
2503 #endif /* SUPPORT_PCRE32 */
2504
2505
2506
2507 static void
2508 regexflip(pcre *ere, pcre_extra *extra)
2509 {
2510 #if defined SUPPORT_PCRE32
2511 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2512 regexflip_32(ere, extra);
2513 #endif
2514 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2515 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2516 regexflip8_or_16(ere, extra);
2517 #endif
2518 }
2519
2520
2521
2522 /*************************************************
2523 * Check match or recursion limit *
2524 *************************************************/
2525
2526 static int
2527 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2528 int start_offset, int options, int *use_offsets, int use_size_offsets,
2529 int flag, unsigned long int *limit, int errnumber, const char *msg)
2530 {
2531 int count;
2532 int min = 0;
2533 int mid = 64;
2534 int max = -1;
2535
2536 extra->flags |= flag;
2537
2538 for (;;)
2539 {
2540 *limit = mid;
2541
2542 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2543 use_offsets, use_size_offsets);
2544
2545 if (count == errnumber)
2546 {
2547 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2548 min = mid;
2549 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2550 }
2551
2552 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2553 count == PCRE_ERROR_PARTIAL)
2554 {
2555 if (mid == min + 1)
2556 {
2557 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2558 break;
2559 }
2560 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2561 max = mid;
2562 mid = (min + mid)/2;
2563 }
2564 else break; /* Some other error */
2565 }
2566
2567 extra->flags &= ~flag;
2568 return count;
2569 }
2570
2571
2572
2573 /*************************************************
2574 * Case-independent strncmp() function *
2575 *************************************************/
2576
2577 /*
2578 Arguments:
2579 s first string
2580 t second string
2581 n number of characters to compare
2582
2583 Returns: < 0, = 0, or > 0, according to the comparison
2584 */
2585
2586 static int
2587 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2588 {
2589 while (n--)
2590 {
2591 int c = tolower(*s++) - tolower(*t++);
2592 if (c) return c;
2593 }
2594 return 0;
2595 }
2596
2597
2598
2599 /*************************************************
2600 * Check newline indicator *
2601 *************************************************/
2602
2603 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2604 a message and return 0 if there is no match.
2605
2606 Arguments:
2607 p points after the leading '<'
2608 f file for error message
2609
2610 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2611 */
2612
2613 static int
2614 check_newline(pcre_uint8 *p, FILE *f)
2615 {
2616 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2617 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2618 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2619 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2620 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2621 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2622 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2623 fprintf(f, "Unknown newline type at: <%s\n", p);
2624 return 0;
2625 }
2626
2627
2628
2629 /*************************************************
2630 * Usage function *
2631 *************************************************/
2632
2633 static void
2634 usage(void)
2635 {
2636 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2637 printf("Input and output default to stdin and stdout.\n");
2638 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2639 printf("If input is a terminal, readline() is used to read from it.\n");
2640 #else
2641 printf("This version of pcretest is not linked with readline().\n");
2642 #endif
2643 printf("\nOptions:\n");
2644 #ifdef SUPPORT_PCRE16
2645 printf(" -16 use the 16-bit library\n");
2646 #endif
2647 #ifdef SUPPORT_PCRE32
2648 printf(" -32 use the 32-bit library\n");
2649 #endif
2650 printf(" -b show compiled code\n");
2651 printf(" -C show PCRE compile-time options and exit\n");
2652 printf(" -C arg show a specific compile-time option\n");
2653 printf(" and exit with its value. The arg can be:\n");
2654 printf(" linksize internal link size [2, 3, 4]\n");
2655 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2656 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2657 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2658 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2659 printf(" ucp Unicode Properties supported [0, 1]\n");
2660 printf(" jit Just-in-time compiler supported [0, 1]\n");
2661 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2662 printf(" -d debug: show compiled code and information (-b and -i)\n");
2663 #if !defined NODFA
2664 printf(" -dfa force DFA matching for all subjects\n");
2665 #endif
2666 printf(" -help show usage information\n");
2667 printf(" -i show information about compiled patterns\n"
2668 " -M find MATCH_LIMIT minimum for each subject\n"
2669 " -m output memory used information\n"
2670 " -o <n> set size of offsets vector to <n>\n");
2671 #if !defined NOPOSIX
2672 printf(" -p use POSIX interface\n");
2673 #endif
2674 printf(" -q quiet: do not output PCRE version number at start\n");
2675 printf(" -S <n> set stack size to <n> megabytes\n");
2676 printf(" -s force each pattern to be studied at basic level\n"
2677 " -s+ force each pattern to be studied, using JIT if available\n"
2678 " -s++ ditto, verifying when JIT was actually used\n"
2679 " -s+n force each pattern to be studied, using JIT if available,\n"
2680 " where 1 <= n <= 7 selects JIT options\n"
2681 " -s++n ditto, verifying when JIT was actually used\n"
2682 " -t time compilation and execution\n");
2683 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2684 printf(" -tm time execution (matching) only\n");
2685 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2686 }
2687
2688
2689
2690 /*************************************************
2691 * Main Program *
2692 *************************************************/
2693
2694 /* Read lines from named file or stdin and write to named file or stdout; lines
2695 consist of a regular expression, in delimiters and optionally followed by
2696 options, followed by a set of test data, terminated by an empty line. */
2697
2698 int main(int argc, char **argv)
2699 {
2700 FILE *infile = stdin;
2701 const char *version;
2702 int options = 0;
2703 int study_options = 0;
2704 int default_find_match_limit = FALSE;
2705 int op = 1;
2706 int timeit = 0;
2707 int timeitm = 0;
2708 int showinfo = 0;
2709 int showstore = 0;
2710 int force_study = -1;
2711 int force_study_options = 0;
2712 int quiet = 0;
2713 int size_offsets = 45;
2714 int size_offsets_max;
2715 int *offsets = NULL;
2716 int debug = 0;
2717 int done = 0;
2718 int all_use_dfa = 0;
2719 int verify_jit = 0;
2720 int yield = 0;
2721 int stack_size;
2722
2723 #if !defined NOPOSIX
2724 int posix = 0;
2725 #endif
2726 #if !defined NODFA
2727 int *dfa_workspace = NULL;
2728 #endif
2729
2730 pcre_jit_stack *jit_stack = NULL;
2731
2732 /* These vectors store, end-to-end, a list of zero-terminated captured
2733 substring names, each list itself being terminated by an empty name. Assume
2734 that 1024 is plenty long enough for the few names we'll be testing. It is
2735 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2736 for the actual memory, to ensure alignment. */
2737
2738 pcre_uint32 copynames[1024];
2739 pcre_uint32 getnames[1024];
2740
2741 #ifdef SUPPORT_PCRE32
2742 pcre_uint32 *cn32ptr;
2743 pcre_uint32 *gn32ptr;
2744 #endif
2745
2746 #ifdef SUPPORT_PCRE16
2747 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2748 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2749 pcre_uint16 *cn16ptr;
2750 pcre_uint16 *gn16ptr;
2751 #endif
2752
2753 #ifdef SUPPORT_PCRE8
2754 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2755 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2756 pcre_uint8 *cn8ptr;
2757 pcre_uint8 *gn8ptr;
2758 #endif
2759
2760 /* Get buffers from malloc() so that valgrind will check their misuse when
2761 debugging. They grow automatically when very long lines are read. The 16-
2762 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2763
2764 buffer = (pcre_uint8 *)malloc(buffer_size);
2765 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2766 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2767
2768 /* The outfile variable is static so that new_malloc can use it. */
2769
2770 outfile = stdout;
2771
2772 /* The following _setmode() stuff is some Windows magic that tells its runtime
2773 library to translate CRLF into a single LF character. At least, that's what
2774 I've been told: never having used Windows I take this all on trust. Originally
2775 it set 0x8000, but then I was advised that _O_BINARY was better. */
2776
2777 #if defined(_WIN32) || defined(WIN32)
2778 _setmode( _fileno( stdout ), _O_BINARY );
2779 #endif
2780
2781 /* Get the version number: both pcre_version() and pcre16_version() give the
2782 same answer. We just need to ensure that we call one that is available. */
2783
2784 #if defined SUPPORT_PCRE8
2785 version = pcre_version();
2786 #elif defined SUPPORT_PCRE16
2787 version = pcre16_version();
2788 #elif defined SUPPORT_PCRE32
2789 version = pcre32_version();
2790 #endif
2791
2792 /* Scan options */
2793
2794 while (argc > 1 && argv[op][0] == '-')
2795 {
2796 pcre_uint8 *endptr;
2797 char *arg = argv[op];
2798
2799 if (strcmp(arg, "-m") == 0) showstore = 1;
2800 else if (strcmp(arg, "-s") == 0) force_study = 0;
2801
2802 else if (strncmp(arg, "-s+", 3) == 0)
2803 {
2804 arg += 3;
2805 if (*arg == '+') { arg++; verify_jit = TRUE; }
2806 force_study = 1;
2807 if (*arg == 0)
2808 force_study_options = jit_study_bits[6];
2809 else if (*arg >= '1' && *arg <= '7')
2810 force_study_options = jit_study_bits[*arg - '1'];
2811 else goto BAD_ARG;
2812 }
2813 else if (strcmp(arg, "-16") == 0)
2814 {
2815 #ifdef SUPPORT_PCRE16
2816 pcre_mode = PCRE16_MODE;
2817 #else
2818 printf("** This version of PCRE was built without 16-bit support\n");
2819 exit(1);
2820 #endif
2821 }
2822 else if (strcmp(arg, "-32") == 0)
2823 {
2824 #ifdef SUPPORT_PCRE32
2825 pcre_mode = PCRE32_MODE;
2826 #else
2827 printf("** This version of PCRE was built without 32-bit support\n");
2828 exit(1);
2829 #endif
2830 }
2831 else if (strcmp(arg, "-q") == 0) quiet = 1;
2832 else if (strcmp(arg, "-b") == 0) debug = 1;
2833 else if (strcmp(arg, "-i") == 0) showinfo = 1;
2834 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2835 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2836 #if !defined NODFA
2837 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2838 #endif
2839 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2840 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2841 *endptr == 0))
2842 {
2843 op++;
2844 argc--;
2845 }
2846 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2847 {
2848 int both = arg[2] == 0;
2849 int temp;
2850 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2851 *endptr == 0))
2852 {
2853 timeitm = temp;
2854 op++;
2855 argc--;
2856 }
2857 else timeitm = LOOPREPEAT;
2858 if (both) timeit = timeitm;
2859 }
2860 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2861 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2862 *endptr == 0))
2863 {
2864 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2865 printf("PCRE: -S not supported on this OS\n");
2866 exit(1);
2867 #else
2868 int rc;
2869 struct rlimit rlim;
2870 getrlimit(RLIMIT_STACK, &rlim);
2871 rlim.rlim_cur = stack_size * 1024 * 1024;
2872 rc = setrlimit(RLIMIT_STACK, &rlim);
2873 if (rc != 0)
2874 {
2875 printf("PCRE: setrlimit() failed with error %d\n", rc);
2876 exit(1);
2877 }
2878 op++;
2879 argc--;
2880 #endif
2881 }
2882 #if !defined NOPOSIX
2883 else if (strcmp(arg, "-p") == 0) posix = 1;
2884 #endif
2885 else if (strcmp(arg, "-C") == 0)
2886 {
2887 int rc;
2888 unsigned long int lrc;
2889
2890 if (argc > 2)
2891 {
2892 if (strcmp(argv[op + 1], "linksize") == 0)
2893 {
2894 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2895 printf("%d\n", rc);
2896 yield = rc;
2897 }
2898 else if (strcmp(argv[op + 1], "pcre8") == 0)
2899 {
2900 #ifdef SUPPORT_PCRE8
2901 printf("1\n");
2902 yield = 1;
2903 #else
2904 printf("0\n");
2905 yield = 0;
2906 #endif
2907 }
2908 else if (strcmp(argv[op + 1], "pcre16") == 0)
2909 {
2910 #ifdef SUPPORT_PCRE16
2911 printf("1\n");
2912 yield = 1;
2913 #else
2914 printf("0\n");
2915 yield = 0;
2916 #endif
2917 }
2918 else if (strcmp(argv[op + 1], "pcre32") == 0)
2919 {
2920 #ifdef SUPPORT_PCRE32
2921 printf("1\n");
2922 yield = 1;
2923 #else
2924 printf("0\n");
2925 yield = 0;
2926 #endif
2927 goto EXIT;
2928 }
2929 if (strcmp(argv[op + 1], "utf") == 0)
2930 {
2931 #ifdef SUPPORT_PCRE8
2932 if (pcre_mode == PCRE8_MODE)
2933 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2934 #endif
2935 #ifdef SUPPORT_PCRE16
2936 if (pcre_mode == PCRE16_MODE)
2937 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2938 #endif
2939 #ifdef SUPPORT_PCRE32
2940 if (pcre_mode == PCRE32_MODE)
2941 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
2942 #endif
2943 printf("%d\n", rc);
2944 yield = rc;
2945 goto EXIT;
2946 }
2947 else if (strcmp(argv[op + 1], "ucp") == 0)
2948 {
2949 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2950 printf("%d\n", rc);
2951 yield = rc;
2952 }
2953 else if (strcmp(argv[op + 1], "jit") == 0)
2954 {
2955 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2956 printf("%d\n", rc);
2957 yield = rc;
2958 }
2959 else if (strcmp(argv[op + 1], "newline") == 0)
2960 {
2961 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2962 print_newline_config(rc, TRUE);
2963 }
2964 else if (strcmp(argv[op + 1], "ebcdic") == 0)
2965 {
2966 #ifdef EBCDIC
2967 printf("1\n");
2968 yield = 1;
2969 #else
2970 printf("0\n");
2971 #endif
2972 }
2973 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2974 {
2975 #ifdef EBCDIC
2976 printf("0x%02x\n", CHAR_LF);
2977 #else
2978 printf("0\n");
2979 #endif
2980 }
2981 else
2982 {
2983 printf("Unknown -C option: %s\n", argv[op + 1]);
2984 }
2985 goto EXIT;
2986 }
2987
2988 /* No argument for -C: output all configuration information. */
2989
2990 printf("PCRE version %s\n", version);
2991 printf("Compiled with\n");
2992
2993 #ifdef EBCDIC
2994 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2995 #endif
2996
2997 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2998 are set, either both UTFs are supported or both are not supported. */
2999
3000 #ifdef SUPPORT_PCRE8
3001 printf(" 8-bit support\n");
3002 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3003 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3004 #endif
3005 #ifdef SUPPORT_PCRE16
3006 printf(" 16-bit support\n");
3007 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3008 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3009 #endif
3010 #ifdef SUPPORT_PCRE32
3011 printf(" 32-bit support\n");
3012 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3013 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3014 #endif
3015
3016 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3017 printf(" %sUnicode properties support\n", rc? "" : "No ");
3018 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3019 if (rc)
3020 {
3021 const char *arch;
3022 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3023 printf(" Just-in-time compiler support: %s\n", arch);
3024 }
3025 else
3026 printf(" No just-in-time compiler support\n");
3027 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3028 print_newline_config(rc, FALSE);
3029 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3030 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3031 "all Unicode newlines");
3032 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3033 printf(" Internal link size = %d\n", rc);
3034 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3035 printf(" POSIX malloc threshold = %d\n", rc);
3036 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3037 printf(" Default match limit = %ld\n", lrc);
3038 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3039 printf(" Default recursion depth limit = %ld\n", lrc);
3040 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3041 printf(" Match recursion uses %s", rc? "stack" : "heap");
3042 if (showstore)
3043 {
3044 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3045 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3046 }
3047 printf("\n");
3048 goto EXIT;
3049 }
3050 else if (strcmp(arg, "-help") == 0 ||
3051 strcmp(arg, "--help") == 0)
3052 {
3053 usage();
3054 goto EXIT;
3055 }
3056 else
3057 {
3058 BAD_ARG:
3059 printf("** Unknown or malformed option %s\n", arg);
3060 usage();
3061 yield = 1;
3062 goto EXIT;
3063 }
3064 op++;
3065 argc--;
3066 }
3067
3068 /* Get the store for the offsets vector, and remember what it was */
3069
3070 size_offsets_max = size_offsets;
3071 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3072 if (offsets == NULL)
3073 {
3074 printf("** Failed to get %d bytes of memory for offsets vector\n",
3075 (int)(size_offsets_max * sizeof(int)));
3076 yield = 1;
3077 goto EXIT;
3078 }
3079
3080 /* Sort out the input and output files */
3081
3082 if (argc > 1)
3083 {
3084 infile = fopen(argv[op], INPUT_MODE);
3085 if (infile == NULL)
3086 {
3087 printf("** Failed to open %s\n", argv[op]);
3088 yield = 1;
3089 goto EXIT;
3090 }
3091 }
3092
3093 if (argc > 2)
3094 {
3095 outfile = fopen(argv[op+1], OUTPUT_MODE);
3096 if (outfile == NULL)
3097 {
3098 printf("** Failed to open %s\n", argv[op+1]);
3099 yield = 1;
3100 goto EXIT;
3101 }
3102 }
3103
3104 /* Set alternative malloc function */
3105
3106 #ifdef SUPPORT_PCRE8
3107 pcre_malloc = new_malloc;
3108 pcre_free = new_free;
3109 pcre_stack_malloc = stack_malloc;
3110 pcre_stack_free = stack_free;
3111 #endif
3112
3113 #ifdef SUPPORT_PCRE16
3114 pcre16_malloc = new_malloc;
3115 pcre16_free = new_free;
3116 pcre16_stack_malloc = stack_malloc;
3117 pcre16_stack_free = stack_free;
3118 #endif
3119
3120 #ifdef SUPPORT_PCRE32
3121 pcre32_malloc = new_malloc;
3122 pcre32_free = new_free;
3123 pcre32_stack_malloc = stack_malloc;
3124 pcre32_stack_free = stack_free;
3125 #endif
3126
3127 /* Heading line unless quiet, then prompt for first regex if stdin */
3128
3129 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3130
3131 /* Main loop */
3132
3133 while (!done)
3134 {
3135 pcre *re = NULL;
3136 pcre_extra *extra = NULL;
3137
3138 #if !defined NOPOSIX /* There are still compilers that require no indent */
3139 regex_t preg;
3140 int do_posix = 0;
3141 #endif
3142
3143 const char *error;
3144 pcre_uint8 *markptr;
3145 pcre_uint8 *p, *pp, *ppp;
3146 pcre_uint8 *to_file = NULL;
3147 const pcre_uint8 *tables = NULL;
3148 unsigned long int get_options;
3149 unsigned long int true_size, true_study_size = 0;
3150 size_t size, regex_gotten_store;
3151 int do_allcaps = 0;
3152 int do_mark = 0;
3153 int do_study = 0;
3154 int no_force_study = 0;
3155 int do_debug = debug;
3156 int do_G = 0;
3157 int do_g = 0;
3158 int do_showinfo = showinfo;
3159 int do_showrest = 0;
3160 int do_showcaprest = 0;
3161 int do_flip = 0;
3162 int erroroffset, len, delimiter, poffset;
3163
3164 #if !defined NODFA
3165 int dfa_matched = 0;
3166 #endif
3167
3168 use_utf = 0;
3169 debug_lengths = 1;
3170
3171 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3172 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3173 fflush(outfile);
3174
3175 p = buffer;
3176 while (isspace(*p)) p++;
3177 if (*p == 0) continue;
3178
3179 /* See if the pattern is to be loaded pre-compiled from a file. */
3180
3181 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3182 {
3183 pcre_uint32 magic;
3184 pcre_uint8 sbuf[8];
3185 FILE *f;
3186
3187 p++;
3188 if (*p == '!')
3189 {
3190 do_debug = TRUE;
3191 do_showinfo = TRUE;
3192 p++;
3193 }
3194
3195 pp = p + (int)strlen((char *)p);
3196 while (isspace(pp[-1])) pp--;
3197 *pp = 0;
3198
3199 f = fopen((char *)p, "rb");
3200 if (f == NULL)
3201 {
3202 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3203 continue;
3204 }
3205
3206 first_gotten_store = 0;
3207 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3208
3209 true_size =
3210 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3211 true_study_size =
3212 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3213
3214 re = (pcre *)new_malloc(true_size);
3215 if (re == NULL)
3216 {
3217 printf("** Failed to get %d bytes of memory for pcre object\n",
3218 (int)true_size);
3219 yield = 1;
3220 goto EXIT;
3221 }
3222 regex_gotten_store = first_gotten_store;
3223
3224 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3225
3226 magic = REAL_PCRE_MAGIC(re);
3227 if (magic != MAGIC_NUMBER)
3228 {
3229 if (swap_uint32(magic) == MAGIC_NUMBER)
3230 {
3231 do_flip = 1;
3232 }
3233 else
3234 {
3235 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3236 new_free(re);
3237 fclose(f);
3238 continue;
3239 }
3240 }
3241
3242 /* We hide the byte-invert info for little and big endian tests. */
3243 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3244 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3245
3246 /* Now see if there is any following study data. */
3247
3248 if (true_study_size != 0)
3249 {
3250 pcre_study_data *psd;
3251
3252 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3253 extra->flags = PCRE_EXTRA_STUDY_DATA;
3254
3255 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3256 extra->study_data = psd;
3257
3258 if (fread(psd, 1, true_study_size, f) != true_study_size)
3259 {
3260 FAIL_READ:
3261 fprintf(outfile, "Failed to read data from %s\n", p);
3262 if (extra != NULL)
3263 {
3264 PCRE_FREE_STUDY(extra);
3265 }
3266 new_free(re);
3267 fclose(f);
3268 continue;
3269 }
3270 fprintf(outfile, "Study data loaded from %s\n", p);
3271 do_study = 1; /* To get the data output if requested */
3272 }
3273 else fprintf(outfile, "No study data\n");
3274
3275 /* Flip the necessary bytes. */
3276 if (do_flip)
3277 {
3278 int rc;
3279 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3280 if (rc == PCRE_ERROR_BADMODE)
3281 {
3282 /* Simulate the result of the function call below. */
3283 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3284 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3285 PCRE_INFO_OPTIONS);
3286 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3287 "%d-bit mode\n", 8 * CHAR_SIZE,
3288 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3289 new_free(re);
3290 fclose(f);
3291 continue;
3292 }
3293 }
3294
3295 /* Need to know if UTF-8 for printing data strings. */
3296
3297 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3298 {
3299 new_free(re);
3300 fclose(f);
3301 continue;
3302 }
3303 use_utf = (get_options & PCRE_UTF8) != 0;
3304
3305 fclose(f);
3306 goto SHOW_INFO;
3307 }
3308
3309 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3310 the pattern; if it isn't complete, read more. */
3311
3312 delimiter = *p++;
3313
3314 if (isalnum(delimiter) || delimiter == '\\')
3315 {
3316 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3317 goto SKIP_DATA;
3318 }
3319
3320 pp = p;
3321 poffset = (int)(p - buffer);
3322
3323 for(;;)
3324 {
3325 while (*pp != 0)
3326 {
3327 if (*pp == '\\' && pp[1] != 0) pp++;
3328 else if (*pp == delimiter) break;
3329 pp++;
3330 }
3331 if (*pp != 0) break;
3332 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3333 {
3334 fprintf(outfile, "** Unexpected EOF\n");
3335 done = 1;
3336 goto CONTINUE;
3337 }
3338 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3339 }
3340
3341 /* The buffer may have moved while being extended; reset the start of data
3342 pointer to the correct relative point in the buffer. */
3343
3344 p = buffer + poffset;
3345
3346 /* If the first character after the delimiter is backslash, make
3347 the pattern end with backslash. This is purely to provide a way
3348 of testing for the error message when a pattern ends with backslash. */
3349
3350 if (pp[1] == '\\') *pp++ = '\\';
3351
3352 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3353 for callouts. */
3354
3355 *pp++ = 0;
3356 strcpy((char *)pbuffer, (char *)p);
3357
3358 /* Look for options after final delimiter */
3359
3360 options = 0;
3361 study_options = force_study_options;
3362 log_store = showstore; /* default from command line */
3363
3364 while (*pp != 0)
3365 {
3366 switch (*pp++)
3367 {
3368 case 'f': options |= PCRE_FIRSTLINE; break;
3369 case 'g': do_g = 1; break;
3370 case 'i': options |= PCRE_CASELESS; break;
3371 case 'm': options |= PCRE_MULTILINE; break;
3372 case 's': options |= PCRE_DOTALL; break;
3373 case 'x': options |= PCRE_EXTENDED; break;
3374
3375 case '+':
3376 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3377 break;
3378
3379 case '=': do_allcaps = 1; break;
3380 case 'A': options |= PCRE_ANCHORED; break;
3381 case 'B': do_debug = 1; break;
3382 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3383 case 'D': do_debug = do_showinfo = 1; break;
3384 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3385 case 'F': do_flip = 1; break;
3386 case 'G': do_G = 1; break;
3387 case 'I': do_showinfo = 1; break;
3388 case 'J': options |= PCRE_DUPNAMES; break;
3389 case 'K': do_mark = 1; break;
3390 case 'M': log_store = 1; break;
3391 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3392
3393 #if !defined NOPOSIX
3394 case 'P': do_posix = 1; break;
3395 #endif
3396
3397 case 'S':
3398 do_study = 1;
3399 for (;;)
3400 {
3401 switch (*pp++)
3402 {
3403 case 'S':
3404 do_study = 0;
3405 no_force_study = 1;
3406 break;
3407
3408 case '!':
3409 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3410 break;
3411
3412 case '+':
3413 if (*pp == '+')
3414 {
3415 verify_jit = TRUE;
3416 pp++;
3417 }
3418 if (*pp >= '1' && *pp <= '7')
3419 study_options |= jit_study_bits[*pp++ - '1'];
3420 else
3421 study_options |= jit_study_bits[6];
3422 break;
3423
3424 case '-':
3425 study_options &= ~PCRE_STUDY_ALLJIT;
3426 break;
3427
3428 default:
3429 pp--;
3430 goto ENDLOOP;
3431 }
3432 }
3433 ENDLOOP:
3434 break;
3435
3436 case 'U': options |= PCRE_UNGREEDY; break;
3437 case 'W': options |= PCRE_UCP; break;
3438 case 'X': options |= PCRE_EXTRA; break;
3439 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3440 case 'Z': debug_lengths = 0; break;
3441 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3442 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3443
3444 case 'T':
3445 switch (*pp++)
3446 {
3447 case '0': tables = tables0; break;
3448 case '1': tables = tables1; break;
3449
3450 case '\r':
3451 case '\n':
3452 case ' ':
3453 case 0:
3454 fprintf(outfile, "** Missing table number after /T\n");
3455 goto SKIP_DATA;
3456
3457 default:
3458 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3459 goto SKIP_DATA;
3460 }
3461 break;
3462
3463 case 'L':
3464 ppp = pp;
3465 /* The '\r' test here is so that it works on Windows. */
3466 /* The '0' test is just in case this is an unterminated line. */
3467 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3468 *ppp = 0;
3469 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3470 {
3471 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3472 goto SKIP_DATA;
3473 }
3474 locale_set = 1;
3475 tables = PCRE_MAKETABLES;
3476 pp = ppp;
3477 break;
3478
3479 case '>':
3480 to_file = pp;
3481 while (*pp != 0) pp++;
3482 while (isspace(pp[-1])) pp--;
3483 *pp = 0;
3484 break;
3485
3486 case '<':
3487 {
3488 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3489 {
3490 options |= PCRE_JAVASCRIPT_COMPAT;
3491 pp += 3;
3492 }
3493 else
3494 {
3495 int x = check_newline(pp, outfile);
3496 if (x == 0) goto SKIP_DATA;
3497 options |= x;
3498 while (*pp++ != '>');
3499 }
3500 }
3501 break;
3502
3503 case '\r': /* So that it works in Windows */
3504 case '\n':
3505 case ' ':
3506 break;
3507
3508 default:
3509 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3510 goto SKIP_DATA;
3511 }
3512 }
3513
3514 /* Handle compiling via the POSIX interface, which doesn't support the
3515 timing, showing, or debugging options, nor the ability to pass over
3516 local character tables. Neither does it have 16-bit support. */
3517
3518 #if !defined NOPOSIX
3519 if (posix || do_posix)
3520 {
3521 int rc;
3522 int cflags = 0;
3523
3524 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3525 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3526 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3527 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3528 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3529 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3530 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3531
3532 first_gotten_store = 0;
3533 rc = regcomp(&preg, (char *)p, cflags);
3534
3535 /* Compilation failed; go back for another re, skipping to blank line
3536 if non-interactive. */
3537
3538 if (rc != 0)
3539 {
3540 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3541 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3542 goto SKIP_DATA;
3543 }
3544 }
3545
3546 /* Handle compiling via the native interface */
3547
3548 else
3549 #endif /* !defined NOPOSIX */
3550
3551 {
3552 /* In 16- or 32-bit mode, convert the input. */
3553
3554 #ifdef SUPPORT_PCRE16
3555 if (pcre_mode == PCRE16_MODE)
3556 {
3557 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3558 {
3559 case -1:
3560 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3561 "converted to UTF-16\n");
3562 goto SKIP_DATA;
3563
3564 case -2:
3565 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3566 "cannot be converted to UTF-16\n");
3567 goto SKIP_DATA;
3568
3569 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3570 fprintf(outfile, "**Failed: character value greater than 0xffff "
3571 "cannot be converted to 16-bit in non-UTF mode\n");
3572 goto SKIP_DATA;
3573
3574 default:
3575 break;
3576 }
3577 p = (pcre_uint8 *)buffer16;
3578 }
3579 #endif
3580
3581 #ifdef SUPPORT_PCRE32
3582 if (pcre_mode == PCRE32_MODE)
3583 {
3584 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3585 {
3586 case -1:
3587 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3588 "converted to UTF-32\n");
3589 goto SKIP_DATA;
3590
3591 case -2:
3592 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3593 "cannot be converted to UTF-32\n");
3594 goto SKIP_DATA;
3595
3596 case -3:
3597 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3598 goto SKIP_DATA;
3599
3600 default:
3601 break;
3602 }
3603 p = (pcre_uint8 *)buffer32;
3604 }
3605 #endif
3606
3607 /* Compile many times when timing */
3608
3609 if (timeit > 0)
3610 {
3611 register int i;
3612 clock_t time_taken;
3613 clock_t start_time = clock();
3614 for (i = 0; i < timeit; i++)
3615 {
3616 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3617 if (re != NULL) free(re);
3618 }
3619 time_taken = clock() - start_time;
3620 fprintf(outfile, "Compile time %.4f milliseconds\n",
3621 (((double)time_taken * 1000.0) / (double)timeit) /
3622 (double)CLOCKS_PER_SEC);
3623 }
3624
3625 first_gotten_store = 0;
3626 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3627
3628 /* Compilation failed; go back for another re, skipping to blank line
3629 if non-interactive. */
3630
3631 if (re == NULL)
3632 {
3633 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3634 SKIP_DATA:
3635 if (infile != stdin)
3636 {
3637 for (;;)
3638 {
3639 if (extend_inputline(infile, buffer, NULL) == NULL)
3640 {
3641 done = 1;
3642 goto CONTINUE;
3643 }
3644 len = (int)strlen((char *)buffer);
3645 while (len > 0 && isspace(buffer[len-1])) len--;
3646 if (len == 0) break;
3647 }
3648 fprintf(outfile, "\n");
3649 }
3650 goto CONTINUE;
3651 }
3652
3653 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3654 within the regex; check for this so that we know how to process the data
3655 lines. */
3656
3657 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3658 goto SKIP_DATA;
3659 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3660
3661 /* Extract the size for possible writing before possibly flipping it,
3662 and remember the store that was got. */
3663
3664 true_size = REAL_PCRE_SIZE(re);
3665 regex_gotten_store = first_gotten_store;
3666
3667 /* Output code size information if requested */
3668
3669 if (log_store)
3670 {
3671 int name_count, name_entry_size, real_pcre_size;
3672
3673 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3674 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3675 #ifdef SUPPORT_PCRE8
3676 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3677 real_pcre_size = sizeof(real_pcre);
3678 #endif
3679 #ifdef SUPPORT_PCRE16
3680 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3681 real_pcre_size = sizeof(real_pcre16);
3682 #endif
3683 #ifdef SUPPORT_PCRE32
3684 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3685 real_pcre_size = sizeof(real_pcre32);
3686 #endif
3687 fprintf(outfile, "Memory allocation (code space): %d\n",
3688 (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3689 }
3690
3691 /* If -s or /S was present, study the regex to generate additional info to
3692 help with the matching, unless the pattern has the SS option, which
3693 suppresses the effect of /S (used for a few test patterns where studying is
3694 never sensible). */
3695
3696 if (do_study || (force_study >= 0 && !no_force_study))
3697 {
3698 if (timeit > 0)
3699 {
3700 register int i;
3701 clock_t time_taken;
3702 clock_t start_time = clock();
3703 for (i = 0; i < timeit; i++)
3704 {
3705 PCRE_STUDY(extra, re, study_options, &error);
3706 }
3707 time_taken = clock() - start_time;
3708 if (extra != NULL)
3709 {
3710 PCRE_FREE_STUDY(extra);
3711 }
3712 fprintf(outfile, " Study time %.4f milliseconds\n",
3713 (((double)time_taken * 1000.0) / (double)timeit) /
3714 (double)CLOCKS_PER_SEC);
3715 }
3716 PCRE_STUDY(extra, re, study_options, &error);
3717 if (error != NULL)
3718 fprintf(outfile, "Failed to study: %s\n", error);
3719 else if (extra != NULL)
3720 {
3721 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3722 if (log_store)
3723 {
3724 size_t jitsize;
3725 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3726 jitsize != 0)
3727 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3728 }
3729 }
3730 }
3731
3732 /* If /K was present, we set up for handling MARK data. */
3733
3734 if (do_mark)
3735 {
3736 if (extra == NULL)
3737 {
3738 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3739 extra->flags = 0;
3740 }
3741 extra->mark = &markptr;
3742 extra->flags |= PCRE_EXTRA_MARK;
3743 }
3744
3745 /* Extract and display information from the compiled data if required. */
3746
3747 SHOW_INFO:
3748
3749 if (do_debug)
3750 {
3751 fprintf(outfile, "------------------------------------------------------------------\n");
3752 PCRE_PRINTINT(re, outfile, debug_lengths);
3753 }
3754
3755 /* We already have the options in get_options (see above) */
3756
3757 if (do_showinfo)
3758 {
3759 unsigned long int all_options;
3760 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3761 hascrorlf, maxlookbehind;
3762 int nameentrysize, namecount;
3763 const pcre_uint8 *nametable;
3764
3765 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3766 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3767 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3768 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3769 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3770 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3771 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3772 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3773 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3774 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3775 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3776 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3777 != 0)
3778 goto SKIP_DATA;
3779
3780 if (size != regex_gotten_store) fprintf(outfile,
3781 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3782 (int)size, (int)regex_gotten_store);
3783
3784 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3785 if (backrefmax > 0)
3786 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3787
3788 if (namecount > 0)
3789 {
3790 fprintf(outfile, "Named capturing subpatterns:\n");
3791 while (namecount-- > 0)
3792 {
3793 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
3794 int length = (int)STRLEN(nametable + imm2_size);
3795 fprintf(outfile, " ");
3796 PCHARSV(nametable, imm2_size, length, outfile);
3797 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3798 #ifdef SUPPORT_PCRE32
3799 if (pcre_mode == PCRE32_MODE)
3800 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
3801 #endif
3802 #ifdef SUPPORT_PCRE16
3803 if (pcre_mode == PCRE16_MODE)
3804 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
3805 #endif
3806 #ifdef SUPPORT_PCRE8
3807 if (pcre_mode == PCRE8_MODE)
3808 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
3809 #endif
3810 nametable += nameentrysize * CHAR_SIZE;
3811 }
3812 }
3813
3814 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3815 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3816
3817 all_options = REAL_PCRE_OPTIONS(re);
3818 if (do_flip) all_options = swap_uint32(all_options);
3819
3820 if (get_options == 0) fprintf(outfile, "No options\n");
3821 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3822 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3823 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3824 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3825 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3826 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3827 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3828 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3829 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3830 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3831 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3832 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3833 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3834 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3835 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3836 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3837 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3838 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3839
3840 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3841
3842 switch (get_options & PCRE_NEWLINE_BITS)
3843 {
3844 case PCRE_NEWLINE_CR:
3845 fprintf(outfile, "Forced newline sequence: CR\n");
3846 break;
3847
3848 case PCRE_NEWLINE_LF:
3849 fprintf(outfile, "Forced newline sequence: LF\n");
3850 break;
3851
3852 case PCRE_NEWLINE_CRLF:
3853 fprintf(outfile, "Forced newline sequence: CRLF\n");
3854 break;
3855
3856 case PCRE_NEWLINE_ANYCRLF:
3857 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3858 break;
3859
3860 case PCRE_NEWLINE_ANY:
3861 fprintf(outfile, "Forced newline sequence: ANY\n");
3862 break;
3863
3864 default:
3865 break;
3866 }
3867
3868 if (first_char == -1)
3869 {
3870 fprintf(outfile, "First char at start or follows newline\n");
3871 }
3872 else if (first_char < 0)
3873 {
3874 fprintf(outfile, "No first char\n");
3875 }
3876 else
3877 {
3878 const char *caseless =
3879 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
3880 "" : " (caseless)";
3881
3882 if (PRINTOK(first_char))
3883 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3884 else
3885 {
3886 fprintf(outfile, "First char = ");
3887 pchar(first_char, outfile);
3888 fprintf(outfile, "%s\n", caseless);
3889 }
3890 }
3891
3892 if (need_char < 0)
3893 {
3894 fprintf(outfile, "No need char\n");
3895 }
3896 else
3897 {
3898 const char *caseless =
3899 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
3900 "" : " (caseless)";
3901
3902 if (PRINTOK(need_char))
3903 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3904 else
3905 {
3906 fprintf(outfile, "Need char = ");
3907 pchar(need_char, outfile);
3908 fprintf(outfile, "%s\n", caseless);
3909 }
3910 }
3911
3912 if (maxlookbehind > 0)
3913 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3914
3915 /* Don't output study size; at present it is in any case a fixed
3916 value, but it varies, depending on the computer architecture, and
3917 so messes up the test suite. (And with the /F option, it might be
3918 flipped.) If study was forced by an external -s, don't show this
3919 information unless -i or -d was also present. This means that, except
3920 when auto-callouts are involved, the output from runs with and without
3921 -s should be identical. */
3922
3923 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3924 {
3925 if (extra == NULL)
3926 fprintf(outfile, "Study returned NULL\n");
3927 else
3928 {
3929 pcre_uint8 *start_bits = NULL;
3930 int minlength;
3931
3932 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3933 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3934
3935 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3936 {
3937 if (start_bits == NULL)
3938 fprintf(outfile, "No set of starting bytes\n");
3939 else
3940 {
3941 int i;
3942 int c = 24;
3943 fprintf(outfile, "Starting byte set: ");
3944 for (i = 0; i < 256; i++)
3945 {
3946 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3947 {
3948 if (c > 75)
3949 {
3950 fprintf(outfile, "\n ");
3951 c = 2;
3952 }
3953 if (PRINTOK(i) && i != ' ')
3954 {
3955 fprintf(outfile, "%c ", i);
3956 c += 2;
3957 }
3958 else
3959 {
3960 fprintf(outfile, "\\x%02x ", i);
3961 c += 5;
3962 }
3963 }
3964 }
3965 fprintf(outfile, "\n");
3966 }
3967 }
3968 }
3969
3970 /* Show this only if the JIT was set by /S, not by -s. */
3971
3972 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3973 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3974 {
3975 int jit;
3976 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3977 {
3978 if (jit)
3979 fprintf(outfile, "JIT study was successful\n");
3980 else
3981 #ifdef SUPPORT_JIT
3982 fprintf(outfile, "JIT study was not successful\n");
3983 #else
3984 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3985 #endif
3986 }
3987 }
3988 }
3989 }
3990
3991 /* If the '>' option was present, we write out the regex to a file, and
3992 that is all. The first 8 bytes of the file are the regex length and then
3993 the study length, in big-endian order. */
3994
3995 if (to_file != NULL)
3996 {
3997 FILE *f = fopen((char *)to_file, "wb");
3998 if (f == NULL)
3999 {
4000 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4001 }
4002 else
4003 {
4004 pcre_uint8 sbuf[8];
4005
4006 if (do_flip) regexflip(re, extra);
4007 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4008 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4009 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4010 sbuf[3] = (pcre_uint8)((true_size) & 255);
4011 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4012 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4013 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4014 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4015
4016 if (fwrite(sbuf, 1, 8, f) < 8 ||
4017 fwrite(re, 1, true_size, f) < true_size)
4018 {
4019 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4020 }
4021 else
4022 {
4023 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4024
4025 /* If there is study data, write it. */
4026
4027 if (extra != NULL)
4028 {
4029 if (fwrite(extra->study_data, 1, true_study_size, f) <
4030 true_study_size)
4031 {
4032 fprintf(outfile, "Write error on %s: %s\n", to_file,
4033 strerror(errno));
4034 }
4035 else fprintf(outfile, "Study data written to %s\n", to_file);
4036 }
4037 }
4038 fclose(f);
4039 }
4040
4041 new_free(re);
4042 if (extra != NULL)
4043 {
4044 PCRE_FREE_STUDY(extra);
4045 }
4046 if (locale_set)
4047 {
4048 new_free((void *)tables);
4049 setlocale(LC_CTYPE, "C");
4050 locale_set = 0;
4051 }
4052 continue; /* With next regex */
4053 }
4054 } /* End of non-POSIX compile */
4055
4056 /* Read data lines and test them */
4057
4058 for (;;)
4059 {
4060 pcre_uint8 *q;
4061 pcre_uint8 *bptr;
4062 int *use_offsets = offsets;
4063 int use_size_offsets = size_offsets;
4064 int callout_data = 0;
4065 int callout_data_set = 0;
4066 int count, c;
4067 int copystrings = 0;
4068 int find_match_limit = default_find_match_limit;
4069 int getstrings = 0;
4070 int getlist = 0;
4071 int gmatched = 0;
4072 int start_offset = 0;
4073 int start_offset_sign = 1;
4074 int g_notempty = 0;
4075 int use_dfa = 0;
4076
4077 *copynames = 0;
4078 *getnames = 0;
4079
4080 #ifdef SUPPORT_PCRE32
4081 cn32ptr = copynames;
4082 gn32ptr = getnames;
4083 #endif
4084 #ifdef SUPPORT_PCRE16
4085 cn16ptr = copynames16;
4086 gn16ptr = getnames16;
4087 #endif
4088 #ifdef SUPPORT_PCRE8
4089 cn8ptr = copynames8;
4090 gn8ptr = getnames8;
4091 #endif
4092
4093 SET_PCRE_CALLOUT(callout);
4094 first_callout = 1;
4095 last_callout_mark = NULL;
4096 callout_extra = 0;
4097 callout_count = 0;
4098 callout_fail_count = 999999;
4099 callout_fail_id = -1;
4100 show_malloc = 0;
4101 options = 0;
4102
4103 if (extra != NULL) extra->flags &=
4104 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4105
4106 len = 0;
4107 for (;;)
4108 {
4109 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4110 {
4111 if (len > 0) /* Reached EOF without hitting a newline */
4112 {
4113 fprintf(outfile, "\n");
4114 break;
4115 }
4116 done = 1;
4117 goto CONTINUE;
4118 }
4119 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4120 len = (int)strlen((char *)buffer);
4121 if (buffer[len-1] == '\n') break;
4122 }
4123
4124 while (len > 0 && isspace(buffer[len-1])) len--;
4125 buffer[len] = 0;
4126 if (len == 0) break;
4127
4128 p = buffer;
4129 while (isspace(*p)) p++;
4130
4131 bptr = q = dbuffer;
4132 while ((c = *p++) != 0)
4133 {
4134 int i = 0;
4135 int n = 0;
4136
4137 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4138 In non-UTF mode, allow the value of the byte to fall through to later,
4139 where values greater than 127 are turned into UTF-8 when running in
4140 16-bit mode. */
4141
4142 if (c != '\\')
4143 {
4144 if (use_utf)
4145 {
4146 *q++ = c;
4147 continue;
4148 }
4149 }
4150
4151 /* Handle backslash escapes */
4152
4153 else switch ((c = *p++))
4154 {
4155 case 'a': c = 7; break;
4156 case 'b': c = '\b'; break;
4157 case 'e': c = 27; break;
4158 case 'f': c = '\f'; break;
4159 case 'n': c = '\n'; break;
4160 case 'r': c = '\r'; break;
4161 case 't': c = '\t'; break;
4162 case 'v': c = '\v'; break;
4163
4164 case '0': case '1': case '2': case '3':
4165 case '4': case '5': case '6': case '7':
4166 c -= '0';
4167 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4168 c = c * 8 + *p++ - '0';
4169 break;
4170
4171 case 'x':
4172 if (*p == '{')
4173 {
4174 pcre_uint8 *pt = p;
4175 c = 0;
4176
4177 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4178 when isxdigit() is a macro that refers to its argument more than
4179 once. This is banned by the C Standard, but apparently happens in at
4180 least one MacOS environment. */
4181
4182 for (pt++; isxdigit(*pt); pt++)
4183 {
4184 if (++i == 9)
4185 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4186 "using only the first eight.\n");
4187 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4188 }
4189 if (*pt == '}')
4190 {
4191 p = pt + 1;
4192 break;
4193 }
4194 /* Not correct form for \x{...}; fall through */
4195 }
4196
4197 /* \x without {} always defines just one byte in 8-bit mode. This
4198 allows UTF-8 characters to be constructed byte by byte, and also allows
4199 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4200 Otherwise, pass it down to later code so that it can be turned into
4201 UTF-8 when running in 16/32-bit mode. */
4202
4203 c = 0;
4204 while (i++ < 2 && isxdigit(*p))
4205 {
4206 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4207 p++;
4208 }
4209 if (use_utf)
4210 {
4211 *q++ = c;
4212 continue;
4213 }
4214 break;
4215
4216 case 0: /* \ followed by EOF allows for an empty line */
4217 p--;
4218 continue;
4219
4220 case '>':
4221 if (*p == '-')
4222 {
4223 start_offset_sign = -1;
4224 p++;
4225 }
4226 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4227 start_offset *= start_offset_sign;
4228 continue;
4229
4230 case 'A': /* Option setting */
4231 options |= PCRE_ANCHORED;
4232 continue;
4233
4234 case 'B':
4235 options |= PCRE_NOTBOL;
4236 continue;
4237
4238 case 'C':
4239 if (isdigit(*p)) /* Set copy string */
4240 {
4241 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4242 copystrings |= 1 << n;
4243 }
4244 else if (isalnum(*p))
4245 {
4246 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4247 }
4248 else if (*p == '+')
4249 {
4250 callout_extra = 1;
4251 p++;
4252 }
4253 else if (*p == '-')
4254 {
4255 SET_PCRE_CALLOUT(NULL);
4256 p++;
4257 }
4258 else if (*p == '!')
4259 {
4260 callout_fail_id = 0;
4261 p++;
4262 while(isdigit(*p))
4263 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4264 callout_fail_count = 0;
4265 if (*p == '!')
4266 {
4267 p++;
4268 while(isdigit(*p))
4269 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4270 }
4271 }
4272 else if (*p == '*')
4273 {
4274 int sign = 1;
4275 callout_data = 0;
4276 if (*(++p) == '-') { sign = -1; p++; }
4277 while(isdigit(*p))
4278 callout_data = callout_data * 10 + *p++ - '0';
4279 callout_data *= sign;
4280 callout_data_set = 1;
4281 }
4282 continue;
4283
4284 #if !defined NODFA
4285 case 'D':
4286 #if !defined NOPOSIX
4287 if (posix || do_posix)
4288 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4289 else
4290 #endif
4291 use_dfa = 1;
4292 continue;
4293 #endif
4294
4295 #if !defined NODFA
4296 case 'F':
4297 options |= PCRE_DFA_SHORTEST;
4298 continue;
4299 #endif
4300
4301 case 'G':
4302 if (isdigit(*p))
4303 {
4304 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4305 getstrings |= 1 << n;
4306 }
4307 else if (isalnum(*p))
4308 {
4309 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4310 }
4311 continue;
4312
4313 case 'J':
4314 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4315 if (extra != NULL
4316 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4317 && extra->executable_jit != NULL)
4318 {
4319 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4320 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4321 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4322 }
4323 continue;
4324
4325 case 'L':
4326 getlist = 1;
4327 continue;
4328
4329 case 'M':
4330 find_match_limit = 1;
4331 continue;
4332
4333 case 'N':
4334 if ((options & PCRE_NOTEMPTY) != 0)
4335 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4336 else
4337 options |= PCRE_NOTEMPTY;
4338 continue;
4339
4340 case 'O':
4341 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4342 if (n > size_offsets_max)
4343 {
4344 size_offsets_max = n;
4345 free(offsets);
4346 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4347 if (offsets == NULL)
4348 {
4349 printf("** Failed to get %d bytes of memory for offsets vector\n",
4350 (int)(size_offsets_max * sizeof(int)));
4351 yield = 1;
4352 goto EXIT;
4353 }
4354 }
4355 use_size_offsets = n;
4356 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4357 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4358 continue;
4359
4360 case 'P':
4361 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4362 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4363 continue;
4364
4365 case 'Q':
4366 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4367 if (extra == NULL)
4368 {
4369 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4370 extra->flags = 0;
4371 }
4372 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4373 extra->match_limit_recursion = n;
4374 continue;
4375
4376 case 'q':
4377 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4378 if (extra == NULL)
4379 {
4380 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4381 extra->flags = 0;
4382 }
4383 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4384 extra->match_limit = n;
4385 continue;
4386
4387 #if !defined NODFA
4388 case 'R':
4389 options |= PCRE_DFA_RESTART;
4390 continue;
4391 #endif
4392
4393 case 'S':
4394 show_malloc = 1;
4395 continue;
4396
4397 case 'Y':
4398 options |= PCRE_NO_START_OPTIMIZE;
4399 continue;
4400
4401 case 'Z':
4402 options |= PCRE_NOTEOL;
4403 continue;
4404
4405 case '?':
4406 options |= PCRE_NO_UTF8_CHECK;
4407 continue;
4408
4409 case '<':
4410 {
4411 int x = check_newline(p, outfile);
4412 if (x == 0) goto NEXT_DATA;
4413 options |= x;
4414 while (*p++ != '>');
4415 }
4416 continue;
4417 }
4418
4419 /* We now have a character value in c that may be greater than 255. In
4420 16-bit mode, we always convert characters to UTF-8 so that values greater
4421 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
4422 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
4423 mode must have come from \x{...} or octal constructs because values from
4424 \x.. get this far only in non-UTF mode. */
4425
4426 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
4427 if (pcre_mode != PCRE8_MODE || use_utf)
4428 {
4429 pcre_uint8 buff8[8];
4430 int ii, utn;
4431 utn = ord2utf8(c, buff8);
4432 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
4433 }
4434 else
4435 #endif
4436 {
4437 if (c > 255)
4438 {
4439 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4440 "and UTF-8 mode is not enabled.\n", c);
4441 fprintf(outfile, "** Truncation will probably give the wrong "
4442 "result.\n");
4443 }
4444 *q++ = c;
4445 }
4446 }
4447
4448 /* Reached end of subject string */
4449
4450 *q = 0;
4451 len = (int)(q - dbuffer);
4452
4453 /* Move the data to the end of the buffer so that a read over the end of
4454 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
4455 we are using the POSIX interface, we must include the terminating zero. */
4456
4457 #if !defined NOPOSIX
4458 if (posix || do_posix)
4459 {
4460 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
4461 bptr += buffer_size - len - 1;
4462 }
4463 else
4464 #endif
4465 {
4466 memmove(bptr + buffer_size - len, bptr, len);
4467 bptr += buffer_size - len;
4468 }
4469
4470 if ((all_use_dfa || use_dfa) && find_match_limit)
4471 {
4472 printf("**Match limit not relevant for DFA matching: ignored\n");
4473 find_match_limit = 0;
4474 }
4475
4476 /* Handle matching via the POSIX interface, which does not
4477 support timing or playing with the match limit or callout data. */
4478
4479 #if !defined NOPOSIX
4480 if (posix || do_posix)
4481 {
4482 int rc;
4483 int eflags = 0;
4484 regmatch_t *pmatch = NULL;
4485 if (use_size_offsets > 0)
4486 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4487 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4488 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4489 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
4490
4491 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
4492
4493 if (rc != 0)
4494 {
4495 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4496 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4497 }
4498 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
4499 {
4500 fprintf(outfile, "Matched with REG_NOSUB\n");
4501 }
4502 else
4503 {
4504 size_t i;
4505 for (i = 0; i < (size_t)use_size_offsets; i++)
4506 {
4507 if (pmatch[i].rm_so >= 0)
4508 {
4509 fprintf(outfile, "%2d: ", (int)i);
4510 PCHARSV(dbuffer, pmatch[i].rm_so,
4511 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
4512 fprintf(outfile, "\n");
4513 if (do_showcaprest || (i == 0 && do_showrest))
4514 {
4515 fprintf(outfile, "%2d+ ", (int)i);
4516 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
4517 outfile);
4518 fprintf(outfile, "\n");
4519 }
4520 }
4521 }
4522 }
4523 free(pmatch);
4524 goto NEXT_DATA;
4525 }
4526
4527 #endif /* !defined NOPOSIX */
4528
4529 /* Handle matching via the native interface - repeats for /g and /G */
4530
4531 #ifdef SUPPORT_PCRE16
4532 if (pcre_mode == PCRE16_MODE)
4533 {
4534 len = to16(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF8, len);
4535 switch(len)
4536 {
4537 case -1:
4538 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
4539 "converted to UTF-16\n");
4540 goto NEXT_DATA;
4541
4542 case -2:
4543 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4544 "cannot be converted to UTF-16\n");
4545 goto NEXT_DATA;
4546
4547 case -3:
4548 fprintf(outfile, "**Failed: character value greater than 0xffff "
4549 "cannot be converted to 16-bit in non-UTF mode\n");
4550 goto NEXT_DATA;
4551
4552 default:
4553 break;
4554 }
4555 bptr = (pcre_uint8 *)buffer16;
4556 }
4557 #endif
4558
4559 #ifdef SUPPORT_PCRE32
4560 if (pcre_mode == PCRE32_MODE)
4561 {
4562 len = to32(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF32, len);
4563 switch(len)
4564 {
4565 case -1:
4566 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
4567 "converted to UTF-32\n");
4568 goto NEXT_DATA;
4569
4570 case -2:
4571 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4572 "cannot be converted to UTF-32\n");
4573 goto NEXT_DATA;
4574
4575 case -3:
4576 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
4577 goto NEXT_DATA;
4578
4579 default:
4580 break;
4581 }
4582 bptr = (pcre_uint8 *)buffer32;
4583 }
4584 #endif
4585
4586 /* Ensure that there is a JIT callback if we want to verify that JIT was
4587 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4588
4589 if (verify_jit && jit_stack == NULL && extra != NULL)
4590 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4591
4592 for (;; gmatched++) /* Loop for /g or /G */
4593 {
4594 markptr = NULL;
4595 jit_was_used = FALSE;
4596
4597 if (timeitm > 0)
4598 {
4599 register int i;
4600 clock_t time_taken;
4601 clock_t start_time = clock();
4602
4603 #if !defined NODFA
4604 if (all_use_dfa || use_dfa)
4605 {
4606 if ((options & PCRE_DFA_RESTART) != 0)
4607 {
4608 fprintf(outfile, "Timing DFA restarts is not supported\n");
4609 break;
4610 }
4611 if (dfa_workspace == NULL)
4612 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4613 for (i = 0; i < timeitm; i++)
4614 {
4615 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4616 (options | g_notempty), use_offsets, use_size_offsets,
4617 dfa_workspace, DFA_WS_DIMENSION);
4618 }
4619 }
4620 else
4621 #endif
4622
4623 for (i = 0; i < timeitm; i++)
4624 {
4625 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4626 (options | g_notempty), use_offsets, use_size_offsets);
4627 }
4628 time_taken = clock() - start_time;
4629 fprintf(outfile, "Execute time %.4f milliseconds\n",
4630 (((double)time_taken * 1000.0) / (double)timeitm) /
4631 (double)CLOCKS_PER_SEC);
4632 }
4633
4634 /* If find_match_limit is set, we want to do repeated matches with
4635 varying limits in order to find the minimum value for the match limit and
4636 for the recursion limit. The match limits are relevant only to the normal
4637 running of pcre_exec(), so disable the JIT optimization. This makes it
4638 possible to run the same set of tests with and without JIT externally
4639 requested. */
4640
4641 if (find_match_limit)
4642 {
4643 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4644 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4645 extra->flags = 0;
4646
4647 (void)check_match_limit(re, extra, bptr, len, start_offset,
4648 options|g_notempty, use_offsets, use_size_offsets,
4649 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4650 PCRE_ERROR_MATCHLIMIT, "match()");
4651
4652 count = check_match_limit(re, extra, bptr, len, start_offset,
4653 options|g_notempty, use_offsets, use_size_offsets,
4654 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4655 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4656 }
4657
4658 /* If callout_data is set, use the interface with additional data */
4659
4660 else if (callout_data_set)
4661 {
4662 if (extra == NULL)
4663 {
4664 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4665 extra->flags = 0;
4666 }
4667 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4668 extra->callout_data = &callout_data;
4669 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4670 options | g_notempty, use_offsets, use_size_offsets);
4671 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4672 }
4673
4674 /* The normal case is just to do the match once, with the default
4675 value of match_limit. */
4676
4677 #if !defined NODFA
4678 else if (all_use_dfa || use_dfa)
4679 {
4680 if (dfa_workspace == NULL)
4681 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4682 if (dfa_matched++ == 0)
4683 dfa_workspace[0] = -1; /* To catch bad restart */
4684 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4685 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4686 DFA_WS_DIMENSION);
4687 if (count == 0)
4688 {
4689 fprintf(outfile, "Matched, but too many subsidiary matches\n");
4690 count = use_size_offsets/2;
4691 }
4692 }
4693 #endif
4694
4695 else
4696 {
4697 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4698 options | g_notempty, use_offsets, use_size_offsets);
4699 if (count == 0)
4700 {
4701 fprintf(outfile, "Matched, but too many substrings\n");
4702 count = use_size_offsets/3;
4703 }
4704 }
4705
4706 /* Matched */
4707
4708 if (count >= 0)
4709 {
4710 int i, maxcount;
4711 void *cnptr, *gnptr;
4712
4713 #if !defined NODFA
4714 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4715 #endif
4716 maxcount = use_size_offsets/3;
4717
4718 /* This is a check against a lunatic return value. */
4719
4720 if (count > maxcount)
4721 {
4722 fprintf(outfile,
4723 "** PCRE error: returned count %d is too big for offset size %d\n",
4724 count, use_size_offsets);
4725 count = use_size_offsets/3;
4726 if (do_g || do_G)
4727 {
4728 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4729 do_g = do_G = FALSE; /* Break g/G loop */
4730 }
4731 }
4732
4733 /* do_allcaps requests showing of all captures in the pattern, to check
4734 unset ones at the end. */
4735
4736 if (do_allcaps)
4737 {
4738 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4739 goto SKIP_DATA;
4740 count++; /* Allow for full match */
4741 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4742 }
4743
4744 /* Output the captured substrings */
4745
4746 for (i = 0; i < count * 2; i += 2)
4747 {
4748 if (use_offsets[i] < 0)
4749 {
4750 if (use_offsets[i] != -1)
4751 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4752 use_offsets[i], i);
4753 if (use_offsets[i+1] != -1)
4754 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4755 use_offsets[i+1], i+1);
4756 fprintf(outfile, "%2d: <unset>\n", i/2);
4757 }
4758 else
4759 {
4760 fprintf(outfile, "%2d: ", i/2);
4761 PCHARSV(bptr, use_offsets[i],
4762 use_offsets[i+1] - use_offsets[i], outfile);
4763 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4764 fprintf(outfile, "\n");
4765 if (do_showcaprest || (i == 0 && do_showrest))
4766 {
4767 fprintf(outfile, "%2d+ ", i/2);
4768 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4769 outfile);
4770 fprintf(outfile, "\n");
4771 }
4772 }
4773 }
4774
4775 if (markptr != NULL)
4776 {
4777 fprintf(outfile, "MK: ");
4778 PCHARSV(markptr, 0, -1, outfile);
4779 fprintf(outfile, "\n");
4780 }
4781
4782 for (i = 0; i < 32; i++)
4783 {
4784 if ((copystrings & (1 << i)) != 0)
4785 {
4786 int rc;
4787 char copybuffer[256];
4788 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4789 copybuffer, sizeof(copybuffer));
4790 if (rc < 0)
4791 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4792 else
4793 {
4794 fprintf(outfile, "%2dC ", i);
4795 PCHARSV(copybuffer, 0, rc, outfile);
4796 fprintf(outfile, " (%d)\n", rc);
4797 }
4798 }
4799 }
4800
4801 cnptr = copynames;
4802 for (;;)
4803 {
4804 int rc;
4805 char copybuffer[256];
4806
4807 if (pcre_mode == PCRE16_MODE)
4808 {
4809 if (*(pcre_uint16 *)cnptr == 0) break;
4810 }
4811 else
4812 {
4813 if (*(pcre_uint8 *)cnptr == 0) break;
4814 }
4815
4816 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4817 cnptr, copybuffer, sizeof(copybuffer));
4818
4819 if (rc < 0)
4820 {
4821 fprintf(outfile, "copy substring ");
4822 PCHARSV(cnptr, 0, -1, outfile);
4823 fprintf(outfile, " failed %d\n", rc);
4824 }
4825 else
4826 {
4827 fprintf(outfile, " C ");
4828 PCHARSV(copybuffer, 0, rc, outfile);
4829 fprintf(outfile, " (%d) ", rc);
4830 PCHARSV(cnptr, 0, -1, outfile);
4831 putc('\n', outfile);
4832 }
4833
4834 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4835 }
4836
4837 for (i = 0; i < 32; i++)
4838 {
4839 if ((getstrings & (1 << i)) != 0)
4840 {
4841 int rc;
4842 const char *substring;
4843 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4844 if (rc < 0)
4845 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4846 else
4847 {
4848 fprintf(outfile, "%2dG ", i);
4849 PCHARSV(substring, 0, rc, outfile);
4850 fprintf(outfile, " (%d)\n", rc);
4851 PCRE_FREE_SUBSTRING(substring);
4852 }
4853 }
4854 }
4855
4856 gnptr = getnames;
4857 for (;;)
4858 {
4859 int rc;
4860 const char *substring;
4861
4862 if (pcre_mode == PCRE16_MODE)
4863 {
4864 if (*(pcre_uint16 *)gnptr == 0) break;
4865 }
4866 else
4867 {
4868 if (*(pcre_uint8 *)gnptr == 0) break;
4869 }
4870
4871 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4872 gnptr, &substring);
4873 if (rc < 0)
4874 {
4875 fprintf(outfile, "get substring ");
4876 PCHARSV(gnptr, 0, -1, outfile);
4877 fprintf(outfile, " failed %d\n", rc);
4878 }
4879 else
4880 {
4881 fprintf(outfile, " G ");
4882 PCHARSV(substring, 0, rc, outfile);
4883 fprintf(outfile, " (%d) ", rc);
4884 PCHARSV(gnptr, 0, -1, outfile);
4885 PCRE_FREE_SUBSTRING(substring);
4886 putc('\n', outfile);
4887 }
4888
4889 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4890 }
4891
4892 if (getlist)
4893 {
4894 int rc;
4895 const char **stringlist;
4896 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4897 if (rc < 0)
4898 fprintf(outfile, "get substring list failed %d\n", rc);
4899 else
4900 {
4901 for (i = 0; i < count; i++)
4902 {
4903 fprintf(outfile, "%2dL ", i);
4904 PCHARSV(stringlist[i], 0, -1, outfile);
4905 putc('\n', outfile);
4906 }
4907 if (stringlist[i] != NULL)
4908 fprintf(outfile, "string list not terminated by NULL\n");
4909 PCRE_FREE_SUBSTRING_LIST(stringlist);
4910 }
4911 }
4912 }
4913
4914 /* There was a partial match */
4915
4916 else if (count == PCRE_ERROR_PARTIAL)
4917 {
4918 if (markptr == NULL) fprintf(outfile, "Partial match");
4919 else
4920 {
4921 fprintf(outfile, "Partial match, mark=");
4922 PCHARSV(markptr, 0, -1, outfile);
4923 }
4924 if (use_size_offsets > 1)
4925 {
4926 fprintf(outfile, ": ");
4927 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4928 outfile);
4929 }
4930 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4931 fprintf(outfile, "\n");
4932 break; /* Out of the /g loop */
4933 }
4934
4935 /* Failed to match. If this is a /g or /G loop and we previously set
4936 g_notempty after a null match, this is not necessarily the end. We want
4937 to advance the start offset, and continue. We won't be at the end of the
4938 string - that was checked before setting g_notempty.
4939
4940 Complication arises in the case when the newline convention is "any",
4941 "crlf", or "anycrlf". If the previous match was at the end of a line
4942 terminated by CRLF, an advance of one character just passes the \r,
4943 whereas we should prefer the longer newline sequence, as does the code in
4944 pcre_exec(). Fudge the offset value to achieve this. We check for a
4945 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4946 find the default.
4947
4948 Otherwise, in the case of UTF-8 matching, the advance must be one
4949 character, not one byte. */
4950
4951 else
4952 {
4953 if (g_notempty != 0)
4954 {
4955 int onechar = 1;
4956 unsigned int obits = REAL_PCRE_OPTIONS(re);
4957 use_offsets[0] = start_offset;
4958 if ((obits & PCRE_NEWLINE_BITS) == 0)
4959 {
4960 int d;
4961 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4962 /* Note that these values are always the ASCII ones, even in
4963 EBCDIC environments. CR = 13, NL = 10. */
4964 obits = (d == 13)? PCRE_NEWLINE_CR :
4965 (d == 10)? PCRE_NEWLINE_LF :
4966 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4967 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4968 (d == -1)? PCRE_NEWLINE_ANY : 0;
4969 }
4970 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4971 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4972 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4973 &&
4974 start_offset < len - 1 && (
4975 #ifdef SUPPORT_PCRE8
4976 (pcre_mode == PCRE8_MODE &&
4977 bptr[start_offset] == '\r' &&
4978 bptr[start_offset + 1] == '\n') ||
4979 #endif
4980 #ifdef SUPPORT_PCRE16
4981 (pcre_mode == PCRE16_MODE &&
4982 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
4983 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
4984 #endif
4985 #ifdef SUPPORT_PCRE32
4986 (pcre_mode == PCRE32_MODE &&
4987 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
4988 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
4989 #endif
4990 0))
4991 onechar++;
4992 else if (use_utf)
4993 {
4994 while (start_offset + onechar < len)
4995 {
4996 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4997 onechar++;
4998 }
4999 }
5000 use_offsets[1] = start_offset + onechar;
5001 }
5002 else
5003 {
5004 switch(count)
5005 {
5006 case PCRE_ERROR_NOMATCH:
5007 if (gmatched == 0)
5008 {
5009 if (markptr == NULL)
5010 {
5011 fprintf(outfile, "No match");
5012 }
5013 else
5014 {
5015 fprintf(outfile, "No match, mark = ");
5016 PCHARSV(markptr, 0, -1, outfile);
5017 }
5018 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5019 putc('\n', outfile);
5020 }
5021 break;
5022
5023 case PCRE_ERROR_BADUTF8:
5024 case PCRE_ERROR_SHORTUTF8:
5025 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5026 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5027 8 * CHAR_SIZE);
5028 if (use_size_offsets >= 2)
5029 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5030 use_offsets[1]);
5031 fprintf(outfile, "\n");
5032 break;
5033
5034 case PCRE_ERROR_BADUTF8_OFFSET:
5035 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5036 8 * CHAR_SIZE);
5037 break;
5038
5039 default:
5040 if (count < 0 &&
5041 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5042 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5043 else
5044 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5045 break;
5046 }
5047
5048 break; /* Out of the /g loop */
5049 }
5050 }
5051
5052 /* If not /g or /G we are done */
5053
5054 if (!do_g && !do_G) break;
5055
5056 /* If we have matched an empty string, first check to see if we are at
5057 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5058 Perl's /g options does. This turns out to be rather cunning. First we set
5059 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5060 same point. If this fails (picked up above) we advance to the next
5061 character. */
5062
5063 g_notempty = 0;
5064
5065 if (use_offsets[0] == use_offsets[1])
5066 {
5067 if (use_offsets[0] == len) break;
5068 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5069 }
5070
5071 /* For /g, update the start offset, leaving the rest alone */
5072
5073 if (do_g) start_offset = use_offsets[1];
5074
5075 /* For /G, update the pointer and length */
5076
5077 else
5078 {
5079 bptr += use_offsets[1] * CHAR_SIZE;
5080 len -= use_offsets[1];
5081 }
5082 } /* End of loop for /g and /G */
5083
5084 NEXT_DATA: continue;
5085 } /* End of loop for data lines */
5086
5087 CONTINUE:
5088
5089 #if !defined NOPOSIX
5090 if (posix || do_posix) regfree(&preg);
5091 #endif
5092
5093 if (re != NULL) new_free(re);
5094 if (extra != NULL)
5095 {
5096 PCRE_FREE_STUDY(extra);
5097 }
5098 if (locale_set)
5099 {
5100 new_free((void *)tables);
5101 setlocale(LC_CTYPE, "C");
5102 locale_set = 0;
5103 }
5104 if (jit_stack != NULL)
5105 {
5106 PCRE_JIT_STACK_FREE(jit_stack);
5107 jit_stack = NULL;
5108 }
5109 }
5110
5111 if (infile == stdin) fprintf(outfile, "\n");
5112
5113 EXIT:
5114
5115 if (infile != NULL && infile != stdin) fclose(infile);
5116 if (outfile != NULL && outfile != stdout) fclose(outfile);
5117
5118 free(buffer);
5119 free(dbuffer);
5120 free(pbuffer);
5121 free(offsets);
5122
5123 #ifdef SUPPORT_PCRE16
5124 if (buffer16 != NULL) free(buffer16);
5125 #endif
5126 #ifdef SUPPORT_PCRE32
5127 if (buffer32 != NULL) free(buffer32);
5128 #endif
5129
5130 #if !defined NODFA
5131 if (dfa_workspace != NULL)
5132 free(dfa_workspace);
5133 #endif
5134
5135 return yield;
5136 }
5137
5138 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5