/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1085 - (show annotations)
Tue Oct 16 15:55:32 2012 UTC (7 years, 1 month ago) by chpe
File MIME type: text/plain
File size: 153766 byte(s)
pcre32: pcretest: Fix pchar for 32-bit
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136
137 #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138 /* Configure internal macros to 32 bit mode. */
139 #define COMPILE_PCRE32
140 #endif
141 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142 /* Configure internal macros to 16 bit mode. */
143 #define COMPILE_PCRE16
144 #endif
145 #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146 /* Configure internal macros to 16 bit mode. */
147 #define COMPILE_PCRE8
148 #endif
149
150 #include "pcre_internal.h"
151
152 /* The pcre_printint() function, which prints the internal form of a compiled
153 regex, is held in a separate file so that (a) it can be compiled in either
154 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
155 when that is compiled in debug mode. */
156
157 #ifdef SUPPORT_PCRE8
158 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159 #endif
160 #ifdef SUPPORT_PCRE16
161 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162 #endif
163 #ifdef SUPPORT_PCRE32
164 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165 #endif
166
167 /* We need access to some of the data tables that PCRE uses. So as not to have
168 to keep two copies, we include the source files here, changing the names of the
169 external symbols to prevent clashes. */
170
171 #define PCRE_INCLUDED
172
173 #include "pcre_tables.c"
174 #include "pcre_ucd.c"
175
176 /* The definition of the macro PRINTABLE, which determines whether to print an
177 output character as-is or as a hex value when showing compiled patterns, is
178 the same as in the printint.src file. We uses it here in cases when the locale
179 has not been explicitly changed, so as to get consistent output from systems
180 that differ in their output from isprint() even in the "C" locale. */
181
182 #ifdef EBCDIC
183 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
184 #else
185 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
186 #endif
187
188 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189
190 /* Posix support is disabled in 16 or 32 bit only mode. */
191 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192 #define NOPOSIX
193 #endif
194
195 /* It is possible to compile this test program without including support for
196 testing the POSIX interface, though this is not available via the standard
197 Makefile. */
198
199 #if !defined NOPOSIX
200 #include "pcreposix.h"
201 #endif
202
203 /* It is also possible, originally for the benefit of a version that was
204 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
205 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
206 automatically cut out the UTF support if PCRE is built without it. */
207
208 #ifndef SUPPORT_UTF
209 #ifndef NOUTF
210 #define NOUTF
211 #endif
212 #endif
213
214 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
215 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216 only from one place and is handled differently). I couldn't dream up any way of
217 using a single macro to do this in a generic way, because of the many different
218 argument requirements. We know that at least one of SUPPORT_PCRE8 and
219 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
220 use these in the definitions of generic macros.
221
222 **** Special note about the PCHARSxxx macros: the address of the string to be
223 printed is always given as two arguments: a base address followed by an offset.
224 The base address is cast to the correct data size for 8 or 16 bit data; the
225 offset is in units of this size. If the string were given as base+offset in one
226 argument, the casting might be incorrectly applied. */
227
228 #ifdef SUPPORT_PCRE8
229
230 #define PCHARS8(lv, p, offset, len, f) \
231 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
232
233 #define PCHARSV8(p, offset, len, f) \
234 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235
236 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237 p = read_capture_name8(p, cn8, re)
238
239 #define STRLEN8(p) ((int)strlen((char *)p))
240
241 #define SET_PCRE_CALLOUT8(callout) \
242 pcre_callout = callout
243
244 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
245 pcre_assign_jit_stack(extra, callback, userdata)
246
247 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
248 re = pcre_compile((char *)pat, options, error, erroffset, tables)
249
250 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251 namesptr, cbuffer, size) \
252 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
253 (char *)namesptr, cbuffer, size)
254
255 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
256 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
257
258 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259 offsets, size_offsets, workspace, size_workspace) \
260 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
261 offsets, size_offsets, workspace, size_workspace)
262
263 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
264 offsets, size_offsets) \
265 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
266 offsets, size_offsets)
267
268 #define PCRE_FREE_STUDY8(extra) \
269 pcre_free_study(extra)
270
271 #define PCRE_FREE_SUBSTRING8(substring) \
272 pcre_free_substring(substring)
273
274 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
275 pcre_free_substring_list(listptr)
276
277 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
278 getnamesptr, subsptr) \
279 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
280 (char *)getnamesptr, subsptr)
281
282 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
283 n = pcre_get_stringnumber(re, (char *)ptr)
284
285 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
286 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
287
288 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
289 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
290
291 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
292 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
293
294 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
295 pcre_printint(re, outfile, debug_lengths)
296
297 #define PCRE_STUDY8(extra, re, options, error) \
298 extra = pcre_study(re, options, error)
299
300 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
301 pcre_jit_stack_alloc(startsize, maxsize)
302
303 #define PCRE_JIT_STACK_FREE8(stack) \
304 pcre_jit_stack_free(stack)
305
306 #endif /* SUPPORT_PCRE8 */
307
308 /* -----------------------------------------------------------*/
309
310 #ifdef SUPPORT_PCRE16
311
312 #define PCHARS16(lv, p, offset, len, f) \
313 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
314
315 #define PCHARSV16(p, offset, len, f) \
316 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
317
318 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319 p = read_capture_name16(p, cn16, re)
320
321 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
322
323 #define SET_PCRE_CALLOUT16(callout) \
324 pcre16_callout = (int (*)(pcre16_callout_block *))callout
325
326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327 pcre16_assign_jit_stack((pcre16_extra *)extra, \
328 (pcre16_jit_callback)callback, userdata)
329
330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332 tables)
333
334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335 namesptr, cbuffer, size) \
336 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338
339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341 (PCRE_UCHAR16 *)cbuffer, size/2)
342
343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344 offsets, size_offsets, workspace, size_workspace) \
345 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347 workspace, size_workspace)
348
349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350 offsets, size_offsets) \
351 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352 len, start_offset, options, offsets, size_offsets)
353
354 #define PCRE_FREE_STUDY16(extra) \
355 pcre16_free_study((pcre16_extra *)extra)
356
357 #define PCRE_FREE_SUBSTRING16(substring) \
358 pcre16_free_substring((PCRE_SPTR16)substring)
359
360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362
363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364 getnamesptr, subsptr) \
365 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367
368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370
371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373 (PCRE_SPTR16 *)(void*)subsptr)
374
375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377 (PCRE_SPTR16 **)(void*)listptr)
378
379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381 tables)
382
383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384 pcre16_printint(re, outfile, debug_lengths)
385
386 #define PCRE_STUDY16(extra, re, options, error) \
387 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388
389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391
392 #define PCRE_JIT_STACK_FREE16(stack) \
393 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394
395 #endif /* SUPPORT_PCRE16 */
396
397 /* -----------------------------------------------------------*/
398
399 #ifdef SUPPORT_PCRE32
400
401 #define PCHARS32(lv, p, offset, len, f) \
402 lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)
403
404 #define PCHARSV32(p, offset, len, f) \
405 (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)
406
407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408 p = read_capture_name32(p, cn32, re)
409
410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411
412 #define SET_PCRE_CALLOUT32(callout) \
413 pcre32_callout = (int (*)(pcre32_callout_block *))callout
414
415 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416 pcre32_assign_jit_stack((pcre32_extra *)extra, \
417 (pcre32_jit_callback)callback, userdata)
418
419 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421 tables)
422
423 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424 namesptr, cbuffer, size) \
425 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427
428 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430 (PCRE_UCHAR32 *)cbuffer, size/2)
431
432 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433 offsets, size_offsets, workspace, size_workspace) \
434 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436 workspace, size_workspace)
437
438 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439 offsets, size_offsets) \
440 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441 len, start_offset, options, offsets, size_offsets)
442
443 #define PCRE_FREE_STUDY32(extra) \
444 pcre32_free_study((pcre32_extra *)extra)
445
446 #define PCRE_FREE_SUBSTRING32(substring) \
447 pcre32_free_substring((PCRE_SPTR32)substring)
448
449 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
451
452 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453 getnamesptr, subsptr) \
454 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456
457 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
459
460 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462 (PCRE_SPTR32 *)(void*)subsptr)
463
464 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466 (PCRE_SPTR32 **)(void*)listptr)
467
468 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470 tables)
471
472 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473 pcre32_printint(re, outfile, debug_lengths)
474
475 #define PCRE_STUDY32(extra, re, options, error) \
476 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477
478 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480
481 #define PCRE_JIT_STACK_FREE32(stack) \
482 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483
484 #endif /* SUPPORT_PCRE32 */
485
486
487 /* ----- Both modes are supported; a runtime test is needed, except for
488 pcre_config(), and the JIT stack functions, when it doesn't matter which
489 version is called. ----- */
490
491 enum {
492 PCRE8_MODE,
493 PCRE16_MODE,
494 PCRE32_MODE
495 };
496
497 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2
498
499 #define CHAR_SIZE (1 << pcre_mode)
500
501 #define PCHARS(lv, p, offset, len, f) \
502 if (pcre_mode == PCRE32_MODE) \
503 PCHARS32(lv, p, offset, len, f); \
504 else if (pcre_mode == PCRE16_MODE) \
505 PCHARS16(lv, p, offset, len, f); \
506 else \
507 PCHARS8(lv, p, offset, len, f)
508
509 #define PCHARSV(p, offset, len, f) \
510 if (pcre_mode == PCRE32_MODE) \
511 PCHARSV32(p, offset, len, f); \
512 else if (pcre_mode == PCRE16_MODE) \
513 PCHARSV16(p, offset, len, f); \
514 else \
515 PCHARSV8(p, offset, len, f)
516
517 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
518 if (pcre_mode == PCRE32_MODE) \
519 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
520 else if (pcre_mode == PCRE16_MODE) \
521 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
522 else \
523 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
524
525 #define SET_PCRE_CALLOUT(callout) \
526 if (pcre_mode == PCRE32_MODE) \
527 SET_PCRE_CALLOUT32(callout); \
528 else if (pcre_mode == PCRE16_MODE) \
529 SET_PCRE_CALLOUT16(callout); \
530 else \
531 SET_PCRE_CALLOUT8(callout)
532
533 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
534
535 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
536 if (pcre_mode == PCRE32_MODE) \
537 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
538 else if (pcre_mode == PCRE16_MODE) \
539 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
540 else \
541 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
542
543 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
544 if (pcre_mode == PCRE32_MODE) \
545 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
546 else if (pcre_mode == PCRE16_MODE) \
547 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
548 else \
549 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
550
551 #define PCRE_CONFIG pcre_config
552
553 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
554 namesptr, cbuffer, size) \
555 if (pcre_mode == PCRE32_MODE) \
556 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
557 namesptr, cbuffer, size); \
558 else if (pcre_mode == PCRE16_MODE) \
559 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
560 namesptr, cbuffer, size); \
561 else \
562 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
563 namesptr, cbuffer, size)
564
565 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
566 if (pcre_mode == PCRE32_MODE) \
567 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
568 else if (pcre_mode == PCRE16_MODE) \
569 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
570 else \
571 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
572
573 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
574 offsets, size_offsets, workspace, size_workspace) \
575 if (pcre_mode == PCRE32_MODE) \
576 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
577 offsets, size_offsets, workspace, size_workspace); \
578 else if (pcre_mode == PCRE16_MODE) \
579 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
580 offsets, size_offsets, workspace, size_workspace); \
581 else \
582 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets, workspace, size_workspace)
584
585 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets) \
587 if (pcre_mode == PCRE32_MODE) \
588 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets); \
590 else if (pcre_mode == PCRE16_MODE) \
591 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets); \
593 else \
594 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
595 offsets, size_offsets)
596
597 #define PCRE_FREE_STUDY(extra) \
598 if (pcre_mode == PCRE32_MODE) \
599 PCRE_FREE_STUDY32(extra); \
600 else if (pcre_mode == PCRE16_MODE) \
601 PCRE_FREE_STUDY16(extra); \
602 else \
603 PCRE_FREE_STUDY8(extra)
604
605 #define PCRE_FREE_SUBSTRING(substring) \
606 if (pcre_mode == PCRE32_MODE) \
607 PCRE_FREE_SUBSTRING32(substring); \
608 else if (pcre_mode == PCRE16_MODE) \
609 PCRE_FREE_SUBSTRING16(substring); \
610 else \
611 PCRE_FREE_SUBSTRING8(substring)
612
613 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
614 if (pcre_mode == PCRE32_MODE) \
615 PCRE_FREE_SUBSTRING_LIST32(listptr); \
616 else if (pcre_mode == PCRE16_MODE) \
617 PCRE_FREE_SUBSTRING_LIST16(listptr); \
618 else \
619 PCRE_FREE_SUBSTRING_LIST8(listptr)
620
621 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
622 getnamesptr, subsptr) \
623 if (pcre_mode == PCRE32_MODE) \
624 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
625 getnamesptr, subsptr); \
626 else if (pcre_mode == PCRE16_MODE) \
627 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
628 getnamesptr, subsptr); \
629 else \
630 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
631 getnamesptr, subsptr)
632
633 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
634 if (pcre_mode == PCRE32_MODE) \
635 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
636 else if (pcre_mode == PCRE16_MODE) \
637 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
638 else \
639 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
640
641 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
642 if (pcre_mode == PCRE32_MODE) \
643 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
644 else if (pcre_mode == PCRE16_MODE) \
645 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
646 else \
647 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
648
649 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
650 if (pcre_mode == PCRE32_MODE) \
651 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
652 else if (pcre_mode == PCRE16_MODE) \
653 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
654 else \
655 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
656
657 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
658 (pcre_mode == PCRE32_MODE ? \
659 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
660 : pcre_mode == PCRE16_MODE ? \
661 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
662 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
663
664 #define PCRE_JIT_STACK_FREE(stack) \
665 if (pcre_mode == PCRE32_MODE) \
666 PCRE_JIT_STACK_FREE32(stack); \
667 else if (pcre_mode == PCRE16_MODE) \
668 PCRE_JIT_STACK_FREE16(stack); \
669 else \
670 PCRE_JIT_STACK_FREE8(stack)
671
672 #define PCRE_MAKETABLES \
673 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
674
675 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
676 if (pcre_mode == PCRE32_MODE) \
677 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
678 else if (pcre_mode == PCRE16_MODE) \
679 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
680 else \
681 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
682
683 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
684 if (pcre_mode == PCRE32_MODE) \
685 PCRE_PRINTINT32(re, outfile, debug_lengths); \
686 else if (pcre_mode == PCRE16_MODE) \
687 PCRE_PRINTINT16(re, outfile, debug_lengths); \
688 else \
689 PCRE_PRINTINT8(re, outfile, debug_lengths)
690
691 #define PCRE_STUDY(extra, re, options, error) \
692 if (pcre_mode == PCRE32_MODE) \
693 PCRE_STUDY32(extra, re, options, error); \
694 else if (pcre_mode == PCRE16_MODE) \
695 PCRE_STUDY16(extra, re, options, error); \
696 else \
697 PCRE_STUDY8(extra, re, options, error)
698
699 /* ----- Only 8-bit mode is supported ----- */
700
701 #elif defined SUPPORT_PCRE8
702 #define CHAR_SIZE 1
703 #define PCHARS PCHARS8
704 #define PCHARSV PCHARSV8
705 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
706 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
707 #define STRLEN STRLEN8
708 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
709 #define PCRE_COMPILE PCRE_COMPILE8
710 #define PCRE_CONFIG pcre_config
711 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
712 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
713 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
714 #define PCRE_EXEC PCRE_EXEC8
715 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
716 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
717 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
718 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
719 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
720 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
721 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
722 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
723 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
724 #define PCRE_MAKETABLES pcre_maketables()
725 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
726 #define PCRE_PRINTINT PCRE_PRINTINT8
727 #define PCRE_STUDY PCRE_STUDY8
728
729 /* ----- Only 16-bit mode is supported ----- */
730
731 #elif defined SUPPORT_PCRE16
732 #define CHAR_SIZE 2
733 #define PCHARS PCHARS16
734 #define PCHARSV PCHARSV16
735 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
736 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
737 #define STRLEN STRLEN16
738 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
739 #define PCRE_COMPILE PCRE_COMPILE16
740 #define PCRE_CONFIG pcre16_config
741 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
742 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
743 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
744 #define PCRE_EXEC PCRE_EXEC16
745 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
746 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
747 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
748 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
749 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
750 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
751 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
752 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
753 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
754 #define PCRE_MAKETABLES pcre16_maketables()
755 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
756 #define PCRE_PRINTINT PCRE_PRINTINT16
757 #define PCRE_STUDY PCRE_STUDY16
758
759 /* ----- Only 32-bit mode is supported ----- */
760
761 #elif defined SUPPORT_PCRE32
762 #define CHAR_SIZE 4
763 #define PCHARS PCHARS32
764 #define PCHARSV PCHARSV32
765 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
766 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
767 #define STRLEN STRLEN32
768 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
769 #define PCRE_COMPILE PCRE_COMPILE32
770 #define PCRE_CONFIG pcre32_config
771 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
772 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
773 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
774 #define PCRE_EXEC PCRE_EXEC32
775 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
776 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
777 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
778 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
779 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
780 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
781 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
782 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
783 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
784 #define PCRE_MAKETABLES pcre32_maketables()
785 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
786 #define PCRE_PRINTINT PCRE_PRINTINT32
787 #define PCRE_STUDY PCRE_STUDY32
788
789 #endif
790
791 /* ----- End of mode-specific function call macros ----- */
792
793
794 /* Other parameters */
795
796 #ifndef CLOCKS_PER_SEC
797 #ifdef CLK_TCK
798 #define CLOCKS_PER_SEC CLK_TCK
799 #else
800 #define CLOCKS_PER_SEC 100
801 #endif
802 #endif
803
804 #if !defined NODFA
805 #define DFA_WS_DIMENSION 1000
806 #endif
807
808 /* This is the default loop count for timing. */
809
810 #define LOOPREPEAT 500000
811
812 /* Static variables */
813
814 static FILE *outfile;
815 static int log_store = 0;
816 static int callout_count;
817 static int callout_extra;
818 static int callout_fail_count;
819 static int callout_fail_id;
820 static int debug_lengths;
821 static int first_callout;
822 static int jit_was_used;
823 static int locale_set = 0;
824 static int show_malloc;
825 static int use_utf;
826 static size_t gotten_store;
827 static size_t first_gotten_store = 0;
828 static const unsigned char *last_callout_mark = NULL;
829
830 /* The buffers grow automatically if very long input lines are encountered. */
831
832 static int buffer_size = 50000;
833 static pcre_uint8 *buffer = NULL;
834 static pcre_uint8 *dbuffer = NULL;
835 static pcre_uint8 *pbuffer = NULL;
836
837 /* Another buffer is needed translation to 16-bit character strings. It will
838 obtained and extended as required. */
839
840 #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
841
842 /* We need the table of operator lengths that is used for 16/32-bit compiling, in
843 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
844 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
845 appropriately for the 16/32-bit world. Just as a safety check, make sure that
846 COMPILE_PCRE[16|32] is *not* set. */
847
848 #ifdef COMPILE_PCRE16
849 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
850 #endif
851
852 #ifdef COMPILE_PCRE32
853 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
854 #endif
855
856 #if LINK_SIZE == 2
857 #undef LINK_SIZE
858 #define LINK_SIZE 1
859 #elif LINK_SIZE == 3 || LINK_SIZE == 4
860 #undef LINK_SIZE
861 #define LINK_SIZE 2
862 #else
863 #error LINK_SIZE must be either 2, 3, or 4
864 #endif
865
866 #undef IMM2_SIZE
867 #define IMM2_SIZE 1
868
869 #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
870
871 #ifdef SUPPORT_PCRE16
872 static int buffer16_size = 0;
873 static pcre_uint16 *buffer16 = NULL;
874 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
875 #endif /* SUPPORT_PCRE16 */
876
877 #ifdef SUPPORT_PCRE32
878 static int buffer32_size = 0;
879 static pcre_uint32 *buffer32 = NULL;
880 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
881 #endif /* SUPPORT_PCRE32 */
882
883 /* If we have 8-bit support, default to it; if there is also
884 16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
885 there must be 16-or 32-bit support, so default it to 1. */
886
887 #if defined SUPPORT_PCRE8
888 static int pcre_mode = PCRE8_MODE;
889 #elif defined SUPPORT_PCRE16
890 static int pcre_mode = PCRE16_MODE;
891 #elif defined SUPPORT_PCRE32
892 static int pcre_mode = PCRE32_MODE;
893 #endif
894
895 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
896
897 static int jit_study_bits[] =
898 {
899 PCRE_STUDY_JIT_COMPILE,
900 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
901 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
902 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
903 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
904 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
905 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
906 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
907 };
908
909 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
910 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
911
912 /* Textual explanations for runtime error codes */
913
914 static const char *errtexts[] = {
915 NULL, /* 0 is no error */
916 NULL, /* NOMATCH is handled specially */
917 "NULL argument passed",
918 "bad option value",
919 "magic number missing",
920 "unknown opcode - pattern overwritten?",
921 "no more memory",
922 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
923 "match limit exceeded",
924 "callout error code",
925 NULL, /* BADUTF8/16 is handled specially */
926 NULL, /* BADUTF8/16 offset is handled specially */
927 NULL, /* PARTIAL is handled specially */
928 "not used - internal error",
929 "internal error - pattern overwritten?",
930 "bad count value",
931 "item unsupported for DFA matching",
932 "backreference condition or recursion test not supported for DFA matching",
933 "match limit not supported for DFA matching",
934 "workspace size exceeded in DFA matching",
935 "too much recursion for DFA matching",
936 "recursion limit exceeded",
937 "not used - internal error",
938 "invalid combination of newline options",
939 "bad offset value",
940 NULL, /* SHORTUTF8/16 is handled specially */
941 "nested recursion at the same subject position",
942 "JIT stack limit reached",
943 "pattern compiled in wrong mode: 8-bit/16-bit error",
944 "pattern compiled with other endianness",
945 "invalid data in workspace for DFA restart"
946 };
947
948
949 /*************************************************
950 * Alternate character tables *
951 *************************************************/
952
953 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
954 using the default tables of the library. However, the T option can be used to
955 select alternate sets of tables, for different kinds of testing. Note also that
956 the L (locale) option also adjusts the tables. */
957
958 /* This is the set of tables distributed as default with PCRE. It recognizes
959 only ASCII characters. */
960
961 static const pcre_uint8 tables0[] = {
962
963 /* This table is a lower casing table. */
964
965 0, 1, 2, 3, 4, 5, 6, 7,
966 8, 9, 10, 11, 12, 13, 14, 15,
967 16, 17, 18, 19, 20, 21, 22, 23,
968 24, 25, 26, 27, 28, 29, 30, 31,
969 32, 33, 34, 35, 36, 37, 38, 39,
970 40, 41, 42, 43, 44, 45, 46, 47,
971 48, 49, 50, 51, 52, 53, 54, 55,
972 56, 57, 58, 59, 60, 61, 62, 63,
973 64, 97, 98, 99,100,101,102,103,
974 104,105,106,107,108,109,110,111,
975 112,113,114,115,116,117,118,119,
976 120,121,122, 91, 92, 93, 94, 95,
977 96, 97, 98, 99,100,101,102,103,
978 104,105,106,107,108,109,110,111,
979 112,113,114,115,116,117,118,119,
980 120,121,122,123,124,125,126,127,
981 128,129,130,131,132,133,134,135,
982 136,137,138,139,140,141,142,143,
983 144,145,146,147,148,149,150,151,
984 152,153,154,155,156,157,158,159,
985 160,161,162,163,164,165,166,167,
986 168,169,170,171,172,173,174,175,
987 176,177,178,179,180,181,182,183,
988 184,185,186,187,188,189,190,191,
989 192,193,194,195,196,197,198,199,
990 200,201,202,203,204,205,206,207,
991 208,209,210,211,212,213,214,215,
992 216,217,218,219,220,221,222,223,
993 224,225,226,227,228,229,230,231,
994 232,233,234,235,236,237,238,239,
995 240,241,242,243,244,245,246,247,
996 248,249,250,251,252,253,254,255,
997
998 /* This table is a case flipping table. */
999
1000 0, 1, 2, 3, 4, 5, 6, 7,
1001 8, 9, 10, 11, 12, 13, 14, 15,
1002 16, 17, 18, 19, 20, 21, 22, 23,
1003 24, 25, 26, 27, 28, 29, 30, 31,
1004 32, 33, 34, 35, 36, 37, 38, 39,
1005 40, 41, 42, 43, 44, 45, 46, 47,
1006 48, 49, 50, 51, 52, 53, 54, 55,
1007 56, 57, 58, 59, 60, 61, 62, 63,
1008 64, 97, 98, 99,100,101,102,103,
1009 104,105,106,107,108,109,110,111,
1010 112,113,114,115,116,117,118,119,
1011 120,121,122, 91, 92, 93, 94, 95,
1012 96, 65, 66, 67, 68, 69, 70, 71,
1013 72, 73, 74, 75, 76, 77, 78, 79,
1014 80, 81, 82, 83, 84, 85, 86, 87,
1015 88, 89, 90,123,124,125,126,127,
1016 128,129,130,131,132,133,134,135,
1017 136,137,138,139,140,141,142,143,
1018 144,145,146,147,148,149,150,151,
1019 152,153,154,155,156,157,158,159,
1020 160,161,162,163,164,165,166,167,
1021 168,169,170,171,172,173,174,175,
1022 176,177,178,179,180,181,182,183,
1023 184,185,186,187,188,189,190,191,
1024 192,193,194,195,196,197,198,199,
1025 200,201,202,203,204,205,206,207,
1026 208,209,210,211,212,213,214,215,
1027 216,217,218,219,220,221,222,223,
1028 224,225,226,227,228,229,230,231,
1029 232,233,234,235,236,237,238,239,
1030 240,241,242,243,244,245,246,247,
1031 248,249,250,251,252,253,254,255,
1032
1033 /* This table contains bit maps for various character classes. Each map is 32
1034 bytes long and the bits run from the least significant end of each byte. The
1035 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1036 graph, print, punct, and cntrl. Other classes are built from combinations. */
1037
1038 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1039 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1040 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1041 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1042
1043 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1044 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1045 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1046 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1047
1048 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1049 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1050 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1051 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1052
1053 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1054 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1055 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1056 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1057
1058 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1059 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1060 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1061 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1062
1063 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1064 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1065 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1066 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1067
1068 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1069 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1070 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1071 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1072
1073 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1074 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1075 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1076 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1077
1078 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1079 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1080 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1081 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1082
1083 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1084 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1085 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1086 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1087
1088 /* This table identifies various classes of character by individual bits:
1089 0x01 white space character
1090 0x02 letter
1091 0x04 decimal digit
1092 0x08 hexadecimal digit
1093 0x10 alphanumeric or '_'
1094 0x80 regular expression metacharacter or binary zero
1095 */
1096
1097 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1098 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1099 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1100 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1101 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1102 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1103 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1104 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1105 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1106 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1107 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1108 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1109 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1110 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1111 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1112 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1113 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1114 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1115 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1116 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1117 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1118 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1119 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1120 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1121 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1122 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1123 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1124 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1125 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1126 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1127 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1128 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1129
1130 /* This is a set of tables that came orginally from a Windows user. It seems to
1131 be at least an approximation of ISO 8859. In particular, there are characters
1132 greater than 128 that are marked as spaces, letters, etc. */
1133
1134 static const pcre_uint8 tables1[] = {
1135 0,1,2,3,4,5,6,7,
1136 8,9,10,11,12,13,14,15,
1137 16,17,18,19,20,21,22,23,
1138 24,25,26,27,28,29,30,31,
1139 32,33,34,35,36,37,38,39,
1140 40,41,42,43,44,45,46,47,
1141 48,49,50,51,52,53,54,55,
1142 56,57,58,59,60,61,62,63,
1143 64,97,98,99,100,101,102,103,
1144 104,105,106,107,108,109,110,111,
1145 112,113,114,115,116,117,118,119,
1146 120,121,122,91,92,93,94,95,
1147 96,97,98,99,100,101,102,103,
1148 104,105,106,107,108,109,110,111,
1149 112,113,114,115,116,117,118,119,
1150 120,121,122,123,124,125,126,127,
1151 128,129,130,131,132,133,134,135,
1152 136,137,138,139,140,141,142,143,
1153 144,145,146,147,148,149,150,151,
1154 152,153,154,155,156,157,158,159,
1155 160,161,162,163,164,165,166,167,
1156 168,169,170,171,172,173,174,175,
1157 176,177,178,179,180,181,182,183,
1158 184,185,186,187,188,189,190,191,
1159 224,225,226,227,228,229,230,231,
1160 232,233,234,235,236,237,238,239,
1161 240,241,242,243,244,245,246,215,
1162 248,249,250,251,252,253,254,223,
1163 224,225,226,227,228,229,230,231,
1164 232,233,234,235,236,237,238,239,
1165 240,241,242,243,244,245,246,247,
1166 248,249,250,251,252,253,254,255,
1167 0,1,2,3,4,5,6,7,
1168 8,9,10,11,12,13,14,15,
1169 16,17,18,19,20,21,22,23,
1170 24,25,26,27,28,29,30,31,
1171 32,33,34,35,36,37,38,39,
1172 40,41,42,43,44,45,46,47,
1173 48,49,50,51,52,53,54,55,
1174 56,57,58,59,60,61,62,63,
1175 64,97,98,99,100,101,102,103,
1176 104,105,106,107,108,109,110,111,
1177 112,113,114,115,116,117,118,119,
1178 120,121,122,91,92,93,94,95,
1179 96,65,66,67,68,69,70,71,
1180 72,73,74,75,76,77,78,79,
1181 80,81,82,83,84,85,86,87,
1182 88,89,90,123,124,125,126,127,
1183 128,129,130,131,132,133,134,135,
1184 136,137,138,139,140,141,142,143,
1185 144,145,146,147,148,149,150,151,
1186 152,153,154,155,156,157,158,159,
1187 160,161,162,163,164,165,166,167,
1188 168,169,170,171,172,173,174,175,
1189 176,177,178,179,180,181,182,183,
1190 184,185,186,187,188,189,190,191,
1191 224,225,226,227,228,229,230,231,
1192 232,233,234,235,236,237,238,239,
1193 240,241,242,243,244,245,246,215,
1194 248,249,250,251,252,253,254,223,
1195 192,193,194,195,196,197,198,199,
1196 200,201,202,203,204,205,206,207,
1197 208,209,210,211,212,213,214,247,
1198 216,217,218,219,220,221,222,255,
1199 0,62,0,0,1,0,0,0,
1200 0,0,0,0,0,0,0,0,
1201 32,0,0,0,1,0,0,0,
1202 0,0,0,0,0,0,0,0,
1203 0,0,0,0,0,0,255,3,
1204 126,0,0,0,126,0,0,0,
1205 0,0,0,0,0,0,0,0,
1206 0,0,0,0,0,0,0,0,
1207 0,0,0,0,0,0,255,3,
1208 0,0,0,0,0,0,0,0,
1209 0,0,0,0,0,0,12,2,
1210 0,0,0,0,0,0,0,0,
1211 0,0,0,0,0,0,0,0,
1212 254,255,255,7,0,0,0,0,
1213 0,0,0,0,0,0,0,0,
1214 255,255,127,127,0,0,0,0,
1215 0,0,0,0,0,0,0,0,
1216 0,0,0,0,254,255,255,7,
1217 0,0,0,0,0,4,32,4,
1218 0,0,0,128,255,255,127,255,
1219 0,0,0,0,0,0,255,3,
1220 254,255,255,135,254,255,255,7,
1221 0,0,0,0,0,4,44,6,
1222 255,255,127,255,255,255,127,255,
1223 0,0,0,0,254,255,255,255,
1224 255,255,255,255,255,255,255,127,
1225 0,0,0,0,254,255,255,255,
1226 255,255,255,255,255,255,255,255,
1227 0,2,0,0,255,255,255,255,
1228 255,255,255,255,255,255,255,127,
1229 0,0,0,0,255,255,255,255,
1230 255,255,255,255,255,255,255,255,
1231 0,0,0,0,254,255,0,252,
1232 1,0,0,248,1,0,0,120,
1233 0,0,0,0,254,255,255,255,
1234 0,0,128,0,0,0,128,0,
1235 255,255,255,255,0,0,0,0,
1236 0,0,0,0,0,0,0,128,
1237 255,255,255,255,0,0,0,0,
1238 0,0,0,0,0,0,0,0,
1239 128,0,0,0,0,0,0,0,
1240 0,1,1,0,1,1,0,0,
1241 0,0,0,0,0,0,0,0,
1242 0,0,0,0,0,0,0,0,
1243 1,0,0,0,128,0,0,0,
1244 128,128,128,128,0,0,128,0,
1245 28,28,28,28,28,28,28,28,
1246 28,28,0,0,0,0,0,128,
1247 0,26,26,26,26,26,26,18,
1248 18,18,18,18,18,18,18,18,
1249 18,18,18,18,18,18,18,18,
1250 18,18,18,128,128,0,128,16,
1251 0,26,26,26,26,26,26,18,
1252 18,18,18,18,18,18,18,18,
1253 18,18,18,18,18,18,18,18,
1254 18,18,18,128,128,0,0,0,
1255 0,0,0,0,0,1,0,0,
1256 0,0,0,0,0,0,0,0,
1257 0,0,0,0,0,0,0,0,
1258 0,0,0,0,0,0,0,0,
1259 1,0,0,0,0,0,0,0,
1260 0,0,18,0,0,0,0,0,
1261 0,0,20,20,0,18,0,0,
1262 0,20,18,0,0,0,0,0,
1263 18,18,18,18,18,18,18,18,
1264 18,18,18,18,18,18,18,18,
1265 18,18,18,18,18,18,18,0,
1266 18,18,18,18,18,18,18,18,
1267 18,18,18,18,18,18,18,18,
1268 18,18,18,18,18,18,18,18,
1269 18,18,18,18,18,18,18,0,
1270 18,18,18,18,18,18,18,18
1271 };
1272
1273
1274
1275
1276 #ifndef HAVE_STRERROR
1277 /*************************************************
1278 * Provide strerror() for non-ANSI libraries *
1279 *************************************************/
1280
1281 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1282 in their libraries, but can provide the same facility by this simple
1283 alternative function. */
1284
1285 extern int sys_nerr;
1286 extern char *sys_errlist[];
1287
1288 char *
1289 strerror(int n)
1290 {
1291 if (n < 0 || n >= sys_nerr) return "unknown error number";
1292 return sys_errlist[n];
1293 }
1294 #endif /* HAVE_STRERROR */
1295
1296
1297
1298 /*************************************************
1299 * Print newline configuration *
1300 *************************************************/
1301
1302 /*
1303 Arguments:
1304 rc the return code from PCRE_CONFIG_NEWLINE
1305 isc TRUE if called from "-C newline"
1306 Returns: nothing
1307 */
1308
1309 static void
1310 print_newline_config(int rc, BOOL isc)
1311 {
1312 const char *s = NULL;
1313 if (!isc) printf(" Newline sequence is ");
1314 switch(rc)
1315 {
1316 case CHAR_CR: s = "CR"; break;
1317 case CHAR_LF: s = "LF"; break;
1318 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1319 case -1: s = "ANY"; break;
1320 case -2: s = "ANYCRLF"; break;
1321
1322 default:
1323 printf("a non-standard value: 0x%04x\n", rc);
1324 return;
1325 }
1326
1327 printf("%s\n", s);
1328 }
1329
1330
1331
1332 /*************************************************
1333 * JIT memory callback *
1334 *************************************************/
1335
1336 static pcre_jit_stack* jit_callback(void *arg)
1337 {
1338 jit_was_used = TRUE;
1339 return (pcre_jit_stack *)arg;
1340 }
1341
1342
1343 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1344 /*************************************************
1345 * Convert UTF-8 string to value *
1346 *************************************************/
1347
1348 /* This function takes one or more bytes that represents a UTF-8 character,
1349 and returns the value of the character.
1350
1351 Argument:
1352 utf8bytes a pointer to the byte vector
1353 vptr a pointer to an int to receive the value
1354
1355 Returns: > 0 => the number of bytes consumed
1356 -6 to 0 => malformed UTF-8 character at offset = (-return)
1357 */
1358
1359 static int
1360 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1361 {
1362 int c = *utf8bytes++;
1363 int d = c;
1364 int i, j, s;
1365
1366 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1367 {
1368 if ((d & 0x80) == 0) break;
1369 d <<= 1;
1370 }
1371
1372 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1373 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1374
1375 /* i now has a value in the range 1-5 */
1376
1377 s = 6*i;
1378 d = (c & utf8_table3[i]) << s;
1379
1380 for (j = 0; j < i; j++)
1381 {
1382 c = *utf8bytes++;
1383 if ((c & 0xc0) != 0x80) return -(j+1);
1384 s -= 6;
1385 d |= (c & 0x3f) << s;
1386 }
1387
1388 /* Check that encoding was the correct unique one */
1389
1390 for (j = 0; j < utf8_table1_size; j++)
1391 if (d <= utf8_table1[j]) break;
1392 if (j != i) return -(i+1);
1393
1394 /* Valid value */
1395
1396 *vptr = d;
1397 return i+1;
1398 }
1399 #endif /* NOUTF || SUPPORT_PCRE16 */
1400
1401
1402
1403 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1404 /*************************************************
1405 * Convert character value to UTF-8 *
1406 *************************************************/
1407
1408 /* This function takes an integer value in the range 0 - 0x7fffffff
1409 and encodes it as a UTF-8 character in 0 to 6 bytes.
1410
1411 Arguments:
1412 cvalue the character value
1413 utf8bytes pointer to buffer for result - at least 6 bytes long
1414
1415 Returns: number of characters placed in the buffer
1416 */
1417
1418 static int
1419 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1420 {
1421 register int i, j;
1422 for (i = 0; i < utf8_table1_size; i++)
1423 if (cvalue <= utf8_table1[i]) break;
1424 utf8bytes += i;
1425 for (j = i; j > 0; j--)
1426 {
1427 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1428 cvalue >>= 6;
1429 }
1430 *utf8bytes = utf8_table2[i] | cvalue;
1431 return i + 1;
1432 }
1433 #endif
1434
1435
1436 #ifdef SUPPORT_PCRE16
1437 /*************************************************
1438 * Convert a string to 16-bit *
1439 *************************************************/
1440
1441 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1442 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1443 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1444 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1445 result is always left in buffer16.
1446
1447 Note that this function does not object to surrogate values. This is
1448 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1449 for the purpose of testing that they are correctly faulted.
1450
1451 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1452 in UTF-8 so that values greater than 255 can be handled.
1453
1454 Arguments:
1455 data TRUE if converting a data line; FALSE for a regex
1456 p points to a byte string
1457 utf true if UTF-8 (to be converted to UTF-16)
1458 len number of bytes in the string (excluding trailing zero)
1459
1460 Returns: number of 16-bit data items used (excluding trailing zero)
1461 OR -1 if a UTF-8 string is malformed
1462 OR -2 if a value > 0x10ffff is encountered
1463 OR -3 if a value > 0xffff is encountered when not in UTF mode
1464 */
1465
1466 static int
1467 to16(int data, pcre_uint8 *p, int utf, int len)
1468 {
1469 pcre_uint16 *pp;
1470
1471 if (buffer16_size < 2*len + 2)
1472 {
1473 if (buffer16 != NULL) free(buffer16);
1474 buffer16_size = 2*len + 2;
1475 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1476 if (buffer16 == NULL)
1477 {
1478 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1479 exit(1);
1480 }
1481 }
1482
1483 pp = buffer16;
1484
1485 if (!utf && !data)
1486 {
1487 while (len-- > 0) *pp++ = *p++;
1488 }
1489
1490 else
1491 {
1492 int c = 0;
1493 while (len > 0)
1494 {
1495 int chlen = utf82ord(p, &c);
1496 if (chlen <= 0) return -1;
1497 if (c > 0x10ffff) return -2;
1498 p += chlen;
1499 len -= chlen;
1500 if (c < 0x10000) *pp++ = c; else
1501 {
1502 if (!utf) return -3;
1503 c -= 0x10000;
1504 *pp++ = 0xD800 | (c >> 10);
1505 *pp++ = 0xDC00 | (c & 0x3ff);
1506 }
1507 }
1508 }
1509
1510 *pp = 0;
1511 return pp - buffer16;
1512 }
1513 #endif
1514
1515 #ifdef SUPPORT_PCRE32
1516 /*************************************************
1517 * Convert a string to 32-bit *
1518 *************************************************/
1519
1520 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1521 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1522 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1523 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1524 result is always left in buffer32.
1525
1526 Note that this function does not object to surrogate values. This is
1527 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1528 for the purpose of testing that they are correctly faulted.
1529
1530 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1531 in UTF-8 so that values greater than 255 can be handled.
1532
1533 Arguments:
1534 data TRUE if converting a data line; FALSE for a regex
1535 p points to a byte string
1536 utf true if UTF-8 (to be converted to UTF-32)
1537 len number of bytes in the string (excluding trailing zero)
1538
1539 Returns: number of 32-bit data items used (excluding trailing zero)
1540 OR -1 if a UTF-8 string is malformed
1541 OR -2 if a value > 0x10ffff is encountered
1542 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1543 */
1544
1545 static int
1546 to32(int data, pcre_uint8 *p, int utf, int len)
1547 {
1548 pcre_uint32 *pp;
1549
1550 if (buffer32_size < 4*len + 4)
1551 {
1552 if (buffer32 != NULL) free(buffer32);
1553 buffer32_size = 4*len + 4;
1554 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1555 if (buffer32 == NULL)
1556 {
1557 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1558 exit(1);
1559 }
1560 }
1561
1562 pp = buffer32;
1563
1564 if (!utf && !data)
1565 {
1566 while (len-- > 0) *pp++ = *p++;
1567 }
1568
1569 else
1570 {
1571 int c = 0;
1572 while (len > 0)
1573 {
1574 int chlen = utf82ord(p, &c);
1575 if (chlen <= 0) return -1;
1576 if (utf)
1577 {
1578 if (c > 0x10ffff) return -2;
1579 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1580 }
1581
1582 p += chlen;
1583 len -= chlen;
1584 *pp++ = c;
1585 }
1586 }
1587
1588 *pp = 0;
1589 return pp - buffer32;
1590 }
1591 #endif
1592
1593 /*************************************************
1594 * Read or extend an input line *
1595 *************************************************/
1596
1597 /* Input lines are read into buffer, but both patterns and data lines can be
1598 continued over multiple input lines. In addition, if the buffer fills up, we
1599 want to automatically expand it so as to be able to handle extremely large
1600 lines that are needed for certain stress tests. When the input buffer is
1601 expanded, the other two buffers must also be expanded likewise, and the
1602 contents of pbuffer, which are a copy of the input for callouts, must be
1603 preserved (for when expansion happens for a data line). This is not the most
1604 optimal way of handling this, but hey, this is just a test program!
1605
1606 Arguments:
1607 f the file to read
1608 start where in buffer to start (this *must* be within buffer)
1609 prompt for stdin or readline()
1610
1611 Returns: pointer to the start of new data
1612 could be a copy of start, or could be moved
1613 NULL if no data read and EOF reached
1614 */
1615
1616 static pcre_uint8 *
1617 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1618 {
1619 pcre_uint8 *here = start;
1620
1621 for (;;)
1622 {
1623 size_t rlen = (size_t)(buffer_size - (here - buffer));
1624
1625 if (rlen > 1000)
1626 {
1627 int dlen;
1628
1629 /* If libreadline or libedit support is required, use readline() to read a
1630 line if the input is a terminal. Note that readline() removes the trailing
1631 newline, so we must put it back again, to be compatible with fgets(). */
1632
1633 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1634 if (isatty(fileno(f)))
1635 {
1636 size_t len;
1637 char *s = readline(prompt);
1638 if (s == NULL) return (here == start)? NULL : start;
1639 len = strlen(s);
1640 if (len > 0) add_history(s);
1641 if (len > rlen - 1) len = rlen - 1;
1642 memcpy(here, s, len);
1643 here[len] = '\n';
1644 here[len+1] = 0;
1645 free(s);
1646 }
1647 else
1648 #endif
1649
1650 /* Read the next line by normal means, prompting if the file is stdin. */
1651
1652 {
1653 if (f == stdin) printf("%s", prompt);
1654 if (fgets((char *)here, rlen, f) == NULL)
1655 return (here == start)? NULL : start;
1656 }
1657
1658 dlen = (int)strlen((char *)here);
1659 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1660 here += dlen;
1661 }
1662
1663 else
1664 {
1665 int new_buffer_size = 2*buffer_size;
1666 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1667 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1668 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1669
1670 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1671 {
1672 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1673 exit(1);
1674 }
1675
1676 memcpy(new_buffer, buffer, buffer_size);
1677 memcpy(new_pbuffer, pbuffer, buffer_size);
1678
1679 buffer_size = new_buffer_size;
1680
1681 start = new_buffer + (start - buffer);
1682 here = new_buffer + (here - buffer);
1683
1684 free(buffer);
1685 free(dbuffer);
1686 free(pbuffer);
1687
1688 buffer = new_buffer;
1689 dbuffer = new_dbuffer;
1690 pbuffer = new_pbuffer;
1691 }
1692 }
1693
1694 return NULL; /* Control never gets here */
1695 }
1696
1697
1698
1699 /*************************************************
1700 * Read number from string *
1701 *************************************************/
1702
1703 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1704 around with conditional compilation, just do the job by hand. It is only used
1705 for unpicking arguments, so just keep it simple.
1706
1707 Arguments:
1708 str string to be converted
1709 endptr where to put the end pointer
1710
1711 Returns: the unsigned long
1712 */
1713
1714 static int
1715 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1716 {
1717 int result = 0;
1718 while(*str != 0 && isspace(*str)) str++;
1719 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1720 *endptr = str;
1721 return(result);
1722 }
1723
1724
1725
1726 /*************************************************
1727 * Print one character *
1728 *************************************************/
1729
1730 /* Print a single character either literally, or as a hex escape. */
1731
1732 static int pchar(pcre_uint32 c, FILE *f)
1733 {
1734 int n;
1735 if (PRINTOK(c))
1736 {
1737 if (f != NULL) fprintf(f, "%c", c);
1738 return 1;
1739 }
1740
1741 if (c < 0x100)
1742 {
1743 if (use_utf)
1744 {
1745 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1746 return 6;
1747 }
1748 else
1749 {
1750 if (f != NULL) fprintf(f, "\\x%02x", c);
1751 return 4;
1752 }
1753 }
1754
1755 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1756 return n >= 0 ? n : 0;
1757 }
1758
1759
1760
1761 #ifdef SUPPORT_PCRE8
1762 /*************************************************
1763 * Print 8-bit character string *
1764 *************************************************/
1765
1766 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1767 If handed a NULL file, just counts chars without printing. */
1768
1769 static int pchars(pcre_uint8 *p, int length, FILE *f)
1770 {
1771 int c = 0;
1772 int yield = 0;
1773
1774 if (length < 0)
1775 length = strlen((char *)p);
1776
1777 while (length-- > 0)
1778 {
1779 #if !defined NOUTF
1780 if (use_utf)
1781 {
1782 int rc = utf82ord(p, &c);
1783 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1784 {
1785 length -= rc - 1;
1786 p += rc;
1787 yield += pchar(c, f);
1788 continue;
1789 }
1790 }
1791 #endif
1792 c = *p++;
1793 yield += pchar(c, f);
1794 }
1795
1796 return yield;
1797 }
1798 #endif
1799
1800
1801
1802 #ifdef SUPPORT_PCRE16
1803 /*************************************************
1804 * Find length of 0-terminated 16-bit string *
1805 *************************************************/
1806
1807 static int strlen16(PCRE_SPTR16 p)
1808 {
1809 int len = 0;
1810 while (*p++ != 0) len++;
1811 return len;
1812 }
1813 #endif /* SUPPORT_PCRE16 */
1814
1815
1816
1817 #ifdef SUPPORT_PCRE32
1818 /*************************************************
1819 * Find length of 0-terminated 32-bit string *
1820 *************************************************/
1821
1822 static int strlen32(PCRE_SPTR32 p)
1823 {
1824 int len = 0;
1825 while (*p++ != 0) len++;
1826 return len;
1827 }
1828 #endif /* SUPPORT_PCRE32 */
1829
1830
1831
1832 #ifdef SUPPORT_PCRE16
1833 /*************************************************
1834 * Print 16-bit character string *
1835 *************************************************/
1836
1837 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1838 If handed a NULL file, just counts chars without printing. */
1839
1840 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1841 {
1842 int yield = 0;
1843
1844 if (length < 0)
1845 length = strlen16(p);
1846
1847 while (length-- > 0)
1848 {
1849 pcre_uint32 c = *p++ & 0xffff;
1850 #if !defined NOUTF
1851 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1852 {
1853 int d = *p & 0xffff;
1854 if (d >= 0xDC00 && d < 0xDFFF)
1855 {
1856 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1857 length--;
1858 p++;
1859 }
1860 }
1861 #endif
1862 yield += pchar(c, f);
1863 }
1864
1865 return yield;
1866 }
1867 #endif /* SUPPORT_PCRE16 */
1868
1869
1870
1871 #ifdef SUPPORT_PCRE32
1872 /*************************************************
1873 * Print 32-bit character string *
1874 *************************************************/
1875
1876 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
1877 If handed a NULL file, just counts chars without printing. */
1878
1879 static int pchars32(PCRE_SPTR32 p, int length, FILE *f)
1880 {
1881 int yield = 0;
1882
1883 if (length < 0)
1884 length = strlen32(p);
1885
1886 while (length-- > 0)
1887 {
1888 pcre_uint32 c = *p++;
1889 yield += pchar(c, f);
1890 }
1891
1892 return yield;
1893 }
1894 #endif /* SUPPORT_PCRE32 */
1895
1896
1897
1898 #ifdef SUPPORT_PCRE8
1899 /*************************************************
1900 * Read a capture name (8-bit) and check it *
1901 *************************************************/
1902
1903 static pcre_uint8 *
1904 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1905 {
1906 pcre_uint8 *npp = *pp;
1907 while (isalnum(*p)) *npp++ = *p++;
1908 *npp++ = 0;
1909 *npp = 0;
1910 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1911 {
1912 fprintf(outfile, "no parentheses with name \"");
1913 PCHARSV(*pp, 0, -1, outfile);
1914 fprintf(outfile, "\"\n");
1915 }
1916
1917 *pp = npp;
1918 return p;
1919 }
1920 #endif /* SUPPORT_PCRE8 */
1921
1922
1923
1924 #ifdef SUPPORT_PCRE16
1925 /*************************************************
1926 * Read a capture name (16-bit) and check it *
1927 *************************************************/
1928
1929 /* Note that the text being read is 8-bit. */
1930
1931 static pcre_uint8 *
1932 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1933 {
1934 pcre_uint16 *npp = *pp;
1935 while (isalnum(*p)) *npp++ = *p++;
1936 *npp++ = 0;
1937 *npp = 0;
1938 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1939 {
1940 fprintf(outfile, "no parentheses with name \"");
1941 PCHARSV(*pp, 0, -1, outfile);
1942 fprintf(outfile, "\"\n");
1943 }
1944 *pp = npp;
1945 return p;
1946 }
1947 #endif /* SUPPORT_PCRE16 */
1948
1949
1950
1951 #ifdef SUPPORT_PCRE32
1952 /*************************************************
1953 * Read a capture name (32-bit) and check it *
1954 *************************************************/
1955
1956 /* Note that the text being read is 8-bit. */
1957
1958 static pcre_uint8 *
1959 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
1960 {
1961 pcre_uint32 *npp = *pp;
1962 while (isalnum(*p)) *npp++ = *p++;
1963 *npp++ = 0;
1964 *npp = 0;
1965 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
1966 {
1967 fprintf(outfile, "no parentheses with name \"");
1968 PCHARSV(*pp, 0, -1, outfile);
1969 fprintf(outfile, "\"\n");
1970 }
1971 *pp = npp;
1972 return p;
1973 }
1974 #endif /* SUPPORT_PCRE32 */
1975
1976
1977
1978 /*************************************************
1979 * Callout function *
1980 *************************************************/
1981
1982 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1983 the match. Yield zero unless more callouts than the fail count, or the callout
1984 data is not zero. */
1985
1986 static int callout(pcre_callout_block *cb)
1987 {
1988 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1989 int i, pre_start, post_start, subject_length;
1990
1991 if (callout_extra)
1992 {
1993 fprintf(f, "Callout %d: last capture = %d\n",
1994 cb->callout_number, cb->capture_last);
1995
1996 for (i = 0; i < cb->capture_top * 2; i += 2)
1997 {
1998 if (cb->offset_vector[i] < 0)
1999 fprintf(f, "%2d: <unset>\n", i/2);
2000 else
2001 {
2002 fprintf(f, "%2d: ", i/2);
2003 PCHARSV(cb->subject, cb->offset_vector[i],
2004 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2005 fprintf(f, "\n");
2006 }
2007 }
2008 }
2009
2010 /* Re-print the subject in canonical form, the first time or if giving full
2011 datails. On subsequent calls in the same match, we use pchars just to find the
2012 printed lengths of the substrings. */
2013
2014 if (f != NULL) fprintf(f, "--->");
2015
2016 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2017 PCHARS(post_start, cb->subject, cb->start_match,
2018 cb->current_position - cb->start_match, f);
2019
2020 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2021
2022 PCHARSV(cb->subject, cb->current_position,
2023 cb->subject_length - cb->current_position, f);
2024
2025 if (f != NULL) fprintf(f, "\n");
2026
2027 /* Always print appropriate indicators, with callout number if not already
2028 shown. For automatic callouts, show the pattern offset. */
2029
2030 if (cb->callout_number == 255)
2031 {
2032 fprintf(outfile, "%+3d ", cb->pattern_position);
2033 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2034 }
2035 else
2036 {
2037 if (callout_extra) fprintf(outfile, " ");
2038 else fprintf(outfile, "%3d ", cb->callout_number);
2039 }
2040
2041 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2042 fprintf(outfile, "^");
2043
2044 if (post_start > 0)
2045 {
2046 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2047 fprintf(outfile, "^");
2048 }
2049
2050 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2051 fprintf(outfile, " ");
2052
2053 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2054 pbuffer + cb->pattern_position);
2055
2056 fprintf(outfile, "\n");
2057 first_callout = 0;
2058
2059 if (cb->mark != last_callout_mark)
2060 {
2061 if (cb->mark == NULL)
2062 fprintf(outfile, "Latest Mark: <unset>\n");
2063 else
2064 {
2065 fprintf(outfile, "Latest Mark: ");
2066 PCHARSV(cb->mark, 0, -1, outfile);
2067 putc('\n', outfile);
2068 }
2069 last_callout_mark = cb->mark;
2070 }
2071
2072 if (cb->callout_data != NULL)
2073 {
2074 int callout_data = *((int *)(cb->callout_data));
2075 if (callout_data != 0)
2076 {
2077 fprintf(outfile, "Callout data = %d\n", callout_data);
2078 return callout_data;
2079 }
2080 }
2081
2082 return (cb->callout_number != callout_fail_id)? 0 :
2083 (++callout_count >= callout_fail_count)? 1 : 0;
2084 }
2085
2086
2087 /*************************************************
2088 * Local malloc functions *
2089 *************************************************/
2090
2091 /* Alternative malloc function, to test functionality and save the size of a
2092 compiled re, which is the first store request that pcre_compile() makes. The
2093 show_malloc variable is set only during matching. */
2094
2095 static void *new_malloc(size_t size)
2096 {
2097 void *block = malloc(size);
2098 gotten_store = size;
2099 if (first_gotten_store == 0) first_gotten_store = size;
2100 if (show_malloc)
2101 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2102 return block;
2103 }
2104
2105 static void new_free(void *block)
2106 {
2107 if (show_malloc)
2108 fprintf(outfile, "free %p\n", block);
2109 free(block);
2110 }
2111
2112 /* For recursion malloc/free, to test stacking calls */
2113
2114 static void *stack_malloc(size_t size)
2115 {
2116 void *block = malloc(size);
2117 if (show_malloc)
2118 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2119 return block;
2120 }
2121
2122 static void stack_free(void *block)
2123 {
2124 if (show_malloc)
2125 fprintf(outfile, "stack_free %p\n", block);
2126 free(block);
2127 }
2128
2129
2130 /*************************************************
2131 * Call pcre_fullinfo() *
2132 *************************************************/
2133
2134 /* Get one piece of information from the pcre_fullinfo() function. When only
2135 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2136 value, but the code is defensive.
2137
2138 Arguments:
2139 re compiled regex
2140 study study data
2141 option PCRE_INFO_xxx option
2142 ptr where to put the data
2143
2144 Returns: 0 when OK, < 0 on error
2145 */
2146
2147 static int
2148 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2149 {
2150 int rc;
2151
2152 if (pcre_mode == PCRE32_MODE)
2153 #ifdef SUPPORT_PCRE32
2154 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2155 #else
2156 rc = PCRE_ERROR_BADMODE;
2157 #endif
2158 else if (pcre_mode == PCRE16_MODE)
2159 #ifdef SUPPORT_PCRE16
2160 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2161 #else
2162 rc = PCRE_ERROR_BADMODE;
2163 #endif
2164 else
2165 #ifdef SUPPORT_PCRE8
2166 rc = pcre_fullinfo(re, study, option, ptr);
2167 #else
2168 rc = PCRE_ERROR_BADMODE;
2169 #endif
2170
2171 if (rc < 0)
2172 {
2173 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2174 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2175 if (rc == PCRE_ERROR_BADMODE)
2176 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2177 "%d-bit mode\n", 8 * CHAR_SIZE,
2178 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2179 }
2180
2181 return rc;
2182 }
2183
2184
2185
2186 /*************************************************
2187 * Swap byte functions *
2188 *************************************************/
2189
2190 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2191 value, respectively.
2192
2193 Arguments:
2194 value any number
2195
2196 Returns: the byte swapped value
2197 */
2198
2199 static pcre_uint32
2200 swap_uint32(pcre_uint32 value)
2201 {
2202 return ((value & 0x000000ff) << 24) |
2203 ((value & 0x0000ff00) << 8) |
2204 ((value & 0x00ff0000) >> 8) |
2205 (value >> 24);
2206 }
2207
2208 static pcre_uint16
2209 swap_uint16(pcre_uint16 value)
2210 {
2211 return (value >> 8) | (value << 8);
2212 }
2213
2214
2215
2216 /*************************************************
2217 * Flip bytes in a compiled pattern *
2218 *************************************************/
2219
2220 /* This function is called if the 'F' option was present on a pattern that is
2221 to be written to a file. We flip the bytes of all the integer fields in the
2222 regex data block and the study block. In 16-bit mode this also flips relevant
2223 bytes in the pattern itself. This is to make it possible to test PCRE's
2224 ability to reload byte-flipped patterns, e.g. those compiled on a different
2225 architecture. */
2226
2227 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2228 static void
2229 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2230 {
2231 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2232 #ifdef SUPPORT_PCRE16
2233 int op;
2234 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2235 int length = re->name_count * re->name_entry_size;
2236 #ifdef SUPPORT_UTF
2237 BOOL utf = (re->options & PCRE_UTF16) != 0;
2238 BOOL utf16_char = FALSE;
2239 #endif /* SUPPORT_UTF */
2240 #endif /* SUPPORT_PCRE16 */
2241
2242 /* Always flip the bytes in the main data block and study blocks. */
2243
2244 re->magic_number = REVERSED_MAGIC_NUMBER;
2245 re->size = swap_uint32(re->size);
2246 re->options = swap_uint32(re->options);
2247 re->flags = swap_uint16(re->flags);
2248 re->top_bracket = swap_uint16(re->top_bracket);
2249 re->top_backref = swap_uint16(re->top_backref);
2250 re->first_char = swap_uint16(re->first_char);
2251 re->req_char = swap_uint16(re->req_char);
2252 re->name_table_offset = swap_uint16(re->name_table_offset);
2253 re->name_entry_size = swap_uint16(re->name_entry_size);
2254 re->name_count = swap_uint16(re->name_count);
2255
2256 if (extra != NULL)
2257 {
2258 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2259 rsd->size = swap_uint32(rsd->size);
2260 rsd->flags = swap_uint32(rsd->flags);
2261 rsd->minlength = swap_uint32(rsd->minlength);
2262 }
2263
2264 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2265 in the name table, if present, and then in the pattern itself. */
2266
2267 #ifdef SUPPORT_PCRE16
2268 if (pcre_mode != PCRE16_MODE) return;
2269
2270 while(TRUE)
2271 {
2272 /* Swap previous characters. */
2273 while (length-- > 0)
2274 {
2275 *ptr = swap_uint16(*ptr);
2276 ptr++;
2277 }
2278 #ifdef SUPPORT_UTF
2279 if (utf16_char)
2280 {
2281 if ((ptr[-1] & 0xfc00) == 0xd800)
2282 {
2283 /* We know that there is only one extra character in UTF-16. */
2284 *ptr = swap_uint16(*ptr);
2285 ptr++;
2286 }
2287 }
2288 utf16_char = FALSE;
2289 #endif /* SUPPORT_UTF */
2290
2291 /* Get next opcode. */
2292
2293 length = 0;
2294 op = *ptr;
2295 *ptr++ = swap_uint16(op);
2296
2297 switch (op)
2298 {
2299 case OP_END:
2300 return;
2301
2302 #ifdef SUPPORT_UTF
2303 case OP_CHAR:
2304 case OP_CHARI:
2305 case OP_NOT:
2306 case OP_NOTI:
2307 case OP_STAR:
2308 case OP_MINSTAR:
2309 case OP_PLUS:
2310 case OP_MINPLUS:
2311 case OP_QUERY:
2312 case OP_MINQUERY:
2313 case OP_UPTO:
2314 case OP_MINUPTO:
2315 case OP_EXACT:
2316 case OP_POSSTAR:
2317 case OP_POSPLUS:
2318 case OP_POSQUERY:
2319 case OP_POSUPTO:
2320 case OP_STARI:
2321 case OP_MINSTARI:
2322 case OP_PLUSI:
2323 case OP_MINPLUSI:
2324 case OP_QUERYI:
2325 case OP_MINQUERYI:
2326 case OP_UPTOI:
2327 case OP_MINUPTOI:
2328 case OP_EXACTI:
2329 case OP_POSSTARI:
2330 case OP_POSPLUSI:
2331 case OP_POSQUERYI:
2332 case OP_POSUPTOI:
2333 case OP_NOTSTAR:
2334 case OP_NOTMINSTAR:
2335 case OP_NOTPLUS:
2336 case OP_NOTMINPLUS:
2337 case OP_NOTQUERY:
2338 case OP_NOTMINQUERY:
2339 case OP_NOTUPTO:
2340 case OP_NOTMINUPTO:
2341 case OP_NOTEXACT:
2342 case OP_NOTPOSSTAR:
2343 case OP_NOTPOSPLUS:
2344 case OP_NOTPOSQUERY:
2345 case OP_NOTPOSUPTO:
2346 case OP_NOTSTARI:
2347 case OP_NOTMINSTARI:
2348 case OP_NOTPLUSI:
2349 case OP_NOTMINPLUSI:
2350 case OP_NOTQUERYI:
2351 case OP_NOTMINQUERYI:
2352 case OP_NOTUPTOI:
2353 case OP_NOTMINUPTOI:
2354 case OP_NOTEXACTI:
2355 case OP_NOTPOSSTARI:
2356 case OP_NOTPOSPLUSI:
2357 case OP_NOTPOSQUERYI:
2358 case OP_NOTPOSUPTOI:
2359 if (utf) utf16_char = TRUE;
2360 #endif
2361 /* Fall through. */
2362
2363 default:
2364 length = OP_lengths16[op] - 1;
2365 break;
2366
2367 case OP_CLASS:
2368 case OP_NCLASS:
2369 /* Skip the character bit map. */
2370 ptr += 32/sizeof(pcre_uint16);
2371 length = 0;
2372 break;
2373
2374 case OP_XCLASS:
2375 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2376 if (LINK_SIZE > 1)
2377 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2378 - (1 + LINK_SIZE + 1));
2379 else
2380 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2381
2382 /* Reverse the size of the XCLASS instance. */
2383 *ptr = swap_uint16(*ptr);
2384 ptr++;
2385 if (LINK_SIZE > 1)
2386 {
2387 *ptr = swap_uint16(*ptr);
2388 ptr++;
2389 }
2390
2391 op = *ptr;
2392 *ptr = swap_uint16(op);
2393 ptr++;
2394 if ((op & XCL_MAP) != 0)
2395 {
2396 /* Skip the character bit map. */
2397 ptr += 32/sizeof(pcre_uint16);
2398 length -= 32/sizeof(pcre_uint16);
2399 }
2400 break;
2401 }
2402 }
2403 /* Control should never reach here in 16 bit mode. */
2404 #endif /* SUPPORT_PCRE16 */
2405 }
2406 #endif /* SUPPORT_PCRE[8|16] */
2407
2408
2409
2410 #if defined SUPPORT_PCRE32
2411 static void
2412 regexflip_32(pcre *ere, pcre_extra *extra)
2413 {
2414 real_pcre32 *re = (real_pcre32 *)ere;
2415 int op;
2416 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2417 int length = re->name_count * re->name_entry_size;
2418 #ifdef SUPPORT_UTF
2419 BOOL utf = (re->options & PCRE_UTF32) != 0;
2420 #endif /* SUPPORT_UTF */
2421
2422 /* Always flip the bytes in the main data block and study blocks. */
2423
2424 re->magic_number = REVERSED_MAGIC_NUMBER;
2425 re->size = swap_uint32(re->size);
2426 re->options = swap_uint32(re->options);
2427 re->flags = swap_uint16(re->flags);
2428 re->top_bracket = swap_uint16(re->top_bracket);
2429 re->top_backref = swap_uint16(re->top_backref);
2430 re->first_char = swap_uint32(re->first_char);
2431 re->req_char = swap_uint32(re->req_char);
2432 re->name_table_offset = swap_uint16(re->name_table_offset);
2433 re->name_entry_size = swap_uint16(re->name_entry_size);
2434 re->name_count = swap_uint16(re->name_count);
2435
2436 if (extra != NULL)
2437 {
2438 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2439 rsd->size = swap_uint32(rsd->size);
2440 rsd->flags = swap_uint32(rsd->flags);
2441 rsd->minlength = swap_uint32(rsd->minlength);
2442 }
2443
2444 /* In 32-bit mode we must swap bytes
2445 in the name table, if present, and then in the pattern itself. */
2446
2447 while(TRUE)
2448 {
2449 /* Swap previous characters. */
2450 while (length-- > 0)
2451 {
2452 *ptr = swap_uint32(*ptr);
2453 ptr++;
2454 }
2455
2456 /* Get next opcode. */
2457
2458 length = 0;
2459 op = *ptr;
2460 *ptr++ = swap_uint32(op);
2461
2462 switch (op)
2463 {
2464 case OP_END:
2465 return;
2466
2467 default:
2468 length = OP_lengths32[op] - 1;
2469 break;
2470
2471 case OP_CLASS:
2472 case OP_NCLASS:
2473 /* Skip the character bit map. */
2474 ptr += 32/sizeof(pcre_uint32);
2475 length = 0;
2476 break;
2477
2478 case OP_XCLASS:
2479 /* LINK_SIZE can only be 1 in 32-bit mode. */
2480 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2481
2482 /* Reverse the size of the XCLASS instance. */
2483 *ptr = swap_uint32(*ptr);
2484 ptr++;
2485
2486 op = *ptr;
2487 *ptr = swap_uint32(op);
2488 ptr++;
2489 if ((op & XCL_MAP) != 0)
2490 {
2491 /* Skip the character bit map. */
2492 ptr += 32/sizeof(pcre_uint32);
2493 length -= 32/sizeof(pcre_uint32);
2494 }
2495 break;
2496 }
2497 }
2498 /* Control should never reach here in 32 bit mode. */
2499 }
2500
2501 #endif /* SUPPORT_PCRE32 */
2502
2503
2504
2505 static void
2506 regexflip(pcre *ere, pcre_extra *extra)
2507 {
2508 #if defined SUPPORT_PCRE32
2509 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2510 regexflip_32(ere, extra);
2511 #endif
2512 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2513 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2514 regexflip8_or_16(ere, extra);
2515 #endif
2516 }
2517
2518
2519
2520 /*************************************************
2521 * Check match or recursion limit *
2522 *************************************************/
2523
2524 static int
2525 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2526 int start_offset, int options, int *use_offsets, int use_size_offsets,
2527 int flag, unsigned long int *limit, int errnumber, const char *msg)
2528 {
2529 int count;
2530 int min = 0;
2531 int mid = 64;
2532 int max = -1;
2533
2534 extra->flags |= flag;
2535
2536 for (;;)
2537 {
2538 *limit = mid;
2539
2540 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2541 use_offsets, use_size_offsets);
2542
2543 if (count == errnumber)
2544 {
2545 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2546 min = mid;
2547 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2548 }
2549
2550 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2551 count == PCRE_ERROR_PARTIAL)
2552 {
2553 if (mid == min + 1)
2554 {
2555 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2556 break;
2557 }
2558 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2559 max = mid;
2560 mid = (min + mid)/2;
2561 }
2562 else break; /* Some other error */
2563 }
2564
2565 extra->flags &= ~flag;
2566 return count;
2567 }
2568
2569
2570
2571 /*************************************************
2572 * Case-independent strncmp() function *
2573 *************************************************/
2574
2575 /*
2576 Arguments:
2577 s first string
2578 t second string
2579 n number of characters to compare
2580
2581 Returns: < 0, = 0, or > 0, according to the comparison
2582 */
2583
2584 static int
2585 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2586 {
2587 while (n--)
2588 {
2589 int c = tolower(*s++) - tolower(*t++);
2590 if (c) return c;
2591 }
2592 return 0;
2593 }
2594
2595
2596
2597 /*************************************************
2598 * Check newline indicator *
2599 *************************************************/
2600
2601 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2602 a message and return 0 if there is no match.
2603
2604 Arguments:
2605 p points after the leading '<'
2606 f file for error message
2607
2608 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2609 */
2610
2611 static int
2612 check_newline(pcre_uint8 *p, FILE *f)
2613 {
2614 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2615 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2616 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2617 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2618 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2619 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2620 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2621 fprintf(f, "Unknown newline type at: <%s\n", p);
2622 return 0;
2623 }
2624
2625
2626
2627 /*************************************************
2628 * Usage function *
2629 *************************************************/
2630
2631 static void
2632 usage(void)
2633 {
2634 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2635 printf("Input and output default to stdin and stdout.\n");
2636 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2637 printf("If input is a terminal, readline() is used to read from it.\n");
2638 #else
2639 printf("This version of pcretest is not linked with readline().\n");
2640 #endif
2641 printf("\nOptions:\n");
2642 #ifdef SUPPORT_PCRE16
2643 printf(" -16 use the 16-bit library\n");
2644 #endif
2645 #ifdef SUPPORT_PCRE32
2646 printf(" -32 use the 32-bit library\n");
2647 #endif
2648 printf(" -b show compiled code\n");
2649 printf(" -C show PCRE compile-time options and exit\n");
2650 printf(" -C arg show a specific compile-time option\n");
2651 printf(" and exit with its value. The arg can be:\n");
2652 printf(" linksize internal link size [2, 3, 4]\n");
2653 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2654 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2655 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2656 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2657 printf(" ucp Unicode Properties supported [0, 1]\n");
2658 printf(" jit Just-in-time compiler supported [0, 1]\n");
2659 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2660 printf(" -d debug: show compiled code and information (-b and -i)\n");
2661 #if !defined NODFA
2662 printf(" -dfa force DFA matching for all subjects\n");
2663 #endif
2664 printf(" -help show usage information\n");
2665 printf(" -i show information about compiled patterns\n"
2666 " -M find MATCH_LIMIT minimum for each subject\n"
2667 " -m output memory used information\n"
2668 " -o <n> set size of offsets vector to <n>\n");
2669 #if !defined NOPOSIX
2670 printf(" -p use POSIX interface\n");
2671 #endif
2672 printf(" -q quiet: do not output PCRE version number at start\n");
2673 printf(" -S <n> set stack size to <n> megabytes\n");
2674 printf(" -s force each pattern to be studied at basic level\n"
2675 " -s+ force each pattern to be studied, using JIT if available\n"
2676 " -s++ ditto, verifying when JIT was actually used\n"
2677 " -s+n force each pattern to be studied, using JIT if available,\n"
2678 " where 1 <= n <= 7 selects JIT options\n"
2679 " -s++n ditto, verifying when JIT was actually used\n"
2680 " -t time compilation and execution\n");
2681 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2682 printf(" -tm time execution (matching) only\n");
2683 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2684 }
2685
2686
2687
2688 /*************************************************
2689 * Main Program *
2690 *************************************************/
2691
2692 /* Read lines from named file or stdin and write to named file or stdout; lines
2693 consist of a regular expression, in delimiters and optionally followed by
2694 options, followed by a set of test data, terminated by an empty line. */
2695
2696 int main(int argc, char **argv)
2697 {
2698 FILE *infile = stdin;
2699 const char *version;
2700 int options = 0;
2701 int study_options = 0;
2702 int default_find_match_limit = FALSE;
2703 int op = 1;
2704 int timeit = 0;
2705 int timeitm = 0;
2706 int showinfo = 0;
2707 int showstore = 0;
2708 int force_study = -1;
2709 int force_study_options = 0;
2710 int quiet = 0;
2711 int size_offsets = 45;
2712 int size_offsets_max;
2713 int *offsets = NULL;
2714 int debug = 0;
2715 int done = 0;
2716 int all_use_dfa = 0;
2717 int verify_jit = 0;
2718 int yield = 0;
2719 int stack_size;
2720
2721 #if !defined NOPOSIX
2722 int posix = 0;
2723 #endif
2724 #if !defined NODFA
2725 int *dfa_workspace = NULL;
2726 #endif
2727
2728 pcre_jit_stack *jit_stack = NULL;
2729
2730 /* These vectors store, end-to-end, a list of zero-terminated captured
2731 substring names, each list itself being terminated by an empty name. Assume
2732 that 1024 is plenty long enough for the few names we'll be testing. It is
2733 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2734 for the actual memory, to ensure alignment. */
2735
2736 pcre_uint32 copynames[1024];
2737 pcre_uint32 getnames[1024];
2738
2739 #ifdef SUPPORT_PCRE32
2740 pcre_uint32 *cn32ptr;
2741 pcre_uint32 *gn32ptr;
2742 #endif
2743
2744 #ifdef SUPPORT_PCRE16
2745 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2746 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2747 pcre_uint16 *cn16ptr;
2748 pcre_uint16 *gn16ptr;
2749 #endif
2750
2751 #ifdef SUPPORT_PCRE8
2752 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2753 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2754 pcre_uint8 *cn8ptr;
2755 pcre_uint8 *gn8ptr;
2756 #endif
2757
2758 /* Get buffers from malloc() so that valgrind will check their misuse when
2759 debugging. They grow automatically when very long lines are read. The 16-
2760 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2761
2762 buffer = (pcre_uint8 *)malloc(buffer_size);
2763 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2764 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2765
2766 /* The outfile variable is static so that new_malloc can use it. */
2767
2768 outfile = stdout;
2769
2770 /* The following _setmode() stuff is some Windows magic that tells its runtime
2771 library to translate CRLF into a single LF character. At least, that's what
2772 I've been told: never having used Windows I take this all on trust. Originally
2773 it set 0x8000, but then I was advised that _O_BINARY was better. */
2774
2775 #if defined(_WIN32) || defined(WIN32)
2776 _setmode( _fileno( stdout ), _O_BINARY );
2777 #endif
2778
2779 /* Get the version number: both pcre_version() and pcre16_version() give the
2780 same answer. We just need to ensure that we call one that is available. */
2781
2782 #if defined SUPPORT_PCRE8
2783 version = pcre_version();
2784 #elif defined SUPPORT_PCRE16
2785 version = pcre16_version();
2786 #elif defined SUPPORT_PCRE32
2787 version = pcre32_version();
2788 #endif
2789
2790 /* Scan options */
2791
2792 while (argc > 1 && argv[op][0] == '-')
2793 {
2794 pcre_uint8 *endptr;
2795 char *arg = argv[op];
2796
2797 if (strcmp(arg, "-m") == 0) showstore = 1;
2798 else if (strcmp(arg, "-s") == 0) force_study = 0;
2799
2800 else if (strncmp(arg, "-s+", 3) == 0)
2801 {
2802 arg += 3;
2803 if (*arg == '+') { arg++; verify_jit = TRUE; }
2804 force_study = 1;
2805 if (*arg == 0)
2806 force_study_options = jit_study_bits[6];
2807 else if (*arg >= '1' && *arg <= '7')
2808 force_study_options = jit_study_bits[*arg - '1'];
2809 else goto BAD_ARG;
2810 }
2811 else if (strcmp(arg, "-16") == 0)
2812 {
2813 #ifdef SUPPORT_PCRE16
2814 pcre_mode = PCRE16_MODE;
2815 #else
2816 printf("** This version of PCRE was built without 16-bit support\n");
2817 exit(1);
2818 #endif
2819 }
2820 else if (strcmp(arg, "-32") == 0)
2821 {
2822 #ifdef SUPPORT_PCRE32
2823 pcre_mode = PCRE32_MODE;
2824 #else
2825 printf("** This version of PCRE was built without 32-bit support\n");
2826 exit(1);
2827 #endif
2828 }
2829 else if (strcmp(arg, "-q") == 0) quiet = 1;
2830 else if (strcmp(arg, "-b") == 0) debug = 1;
2831 else if (strcmp(arg, "-i") == 0) showinfo = 1;
2832 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2833 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2834 #if !defined NODFA
2835 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2836 #endif
2837 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2838 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2839 *endptr == 0))
2840 {
2841 op++;
2842 argc--;
2843 }
2844 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2845 {
2846 int both = arg[2] == 0;
2847 int temp;
2848 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2849 *endptr == 0))
2850 {
2851 timeitm = temp;
2852 op++;
2853 argc--;
2854 }
2855 else timeitm = LOOPREPEAT;
2856 if (both) timeit = timeitm;
2857 }
2858 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2859 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2860 *endptr == 0))
2861 {
2862 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2863 printf("PCRE: -S not supported on this OS\n");
2864 exit(1);
2865 #else
2866 int rc;
2867 struct rlimit rlim;
2868 getrlimit(RLIMIT_STACK, &rlim);
2869 rlim.rlim_cur = stack_size * 1024 * 1024;
2870 rc = setrlimit(RLIMIT_STACK, &rlim);
2871 if (rc != 0)
2872 {
2873 printf("PCRE: setrlimit() failed with error %d\n", rc);
2874 exit(1);
2875 }
2876 op++;
2877 argc--;
2878 #endif
2879 }
2880 #if !defined NOPOSIX
2881 else if (strcmp(arg, "-p") == 0) posix = 1;
2882 #endif
2883 else if (strcmp(arg, "-C") == 0)
2884 {
2885 int rc;
2886 unsigned long int lrc;
2887
2888 if (argc > 2)
2889 {
2890 if (strcmp(argv[op + 1], "linksize") == 0)
2891 {
2892 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2893 printf("%d\n", rc);
2894 yield = rc;
2895 }
2896 else if (strcmp(argv[op + 1], "pcre8") == 0)
2897 {
2898 #ifdef SUPPORT_PCRE8
2899 printf("1\n");
2900 yield = 1;
2901 #else
2902 printf("0\n");
2903 yield = 0;
2904 #endif
2905 }
2906 else if (strcmp(argv[op + 1], "pcre16") == 0)
2907 {
2908 #ifdef SUPPORT_PCRE16
2909 printf("1\n");
2910 yield = 1;
2911 #else
2912 printf("0\n");
2913 yield = 0;
2914 #endif
2915 }
2916 else if (strcmp(argv[op + 1], "pcre32") == 0)
2917 {
2918 #ifdef SUPPORT_PCRE32
2919 printf("1\n");
2920 yield = 1;
2921 #else
2922 printf("0\n");
2923 yield = 0;
2924 #endif
2925 goto EXIT;
2926 }
2927 if (strcmp(argv[op + 1], "utf") == 0)
2928 {
2929 #ifdef SUPPORT_PCRE8
2930 if (pcre_mode == PCRE8_MODE)
2931 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2932 #endif
2933 #ifdef SUPPORT_PCRE16
2934 if (pcre_mode == PCRE16_MODE)
2935 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2936 #endif
2937 #ifdef SUPPORT_PCRE32
2938 if (pcre_mode == PCRE32_MODE)
2939 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
2940 #endif
2941 printf("%d\n", rc);
2942 yield = rc;
2943 goto EXIT;
2944 }
2945 else if (strcmp(argv[op + 1], "ucp") == 0)
2946 {
2947 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2948 printf("%d\n", rc);
2949 yield = rc;
2950 }
2951 else if (strcmp(argv[op + 1], "jit") == 0)
2952 {
2953 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2954 printf("%d\n", rc);
2955 yield = rc;
2956 }
2957 else if (strcmp(argv[op + 1], "newline") == 0)
2958 {
2959 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2960 print_newline_config(rc, TRUE);
2961 }
2962 else if (strcmp(argv[op + 1], "ebcdic") == 0)
2963 {
2964 #ifdef EBCDIC
2965 printf("1\n");
2966 yield = 1;
2967 #else
2968 printf("0\n");
2969 #endif
2970 }
2971 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2972 {
2973 #ifdef EBCDIC
2974 printf("0x%02x\n", CHAR_LF);
2975 #else
2976 printf("0\n");
2977 #endif
2978 }
2979 else
2980 {
2981 printf("Unknown -C option: %s\n", argv[op + 1]);
2982 }
2983 goto EXIT;
2984 }
2985
2986 /* No argument for -C: output all configuration information. */
2987
2988 printf("PCRE version %s\n", version);
2989 printf("Compiled with\n");
2990
2991 #ifdef EBCDIC
2992 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2993 #endif
2994
2995 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2996 are set, either both UTFs are supported or both are not supported. */
2997
2998 #ifdef SUPPORT_PCRE8
2999 printf(" 8-bit support\n");
3000 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3001 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3002 #endif
3003 #ifdef SUPPORT_PCRE16
3004 printf(" 16-bit support\n");
3005 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3006 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3007 #endif
3008 #ifdef SUPPORT_PCRE32
3009 printf(" 32-bit support\n");
3010 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3011 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3012 #endif
3013
3014 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3015 printf(" %sUnicode properties support\n", rc? "" : "No ");
3016 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3017 if (rc)
3018 {
3019 const char *arch;
3020 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3021 printf(" Just-in-time compiler support: %s\n", arch);
3022 }
3023 else
3024 printf(" No just-in-time compiler support\n");
3025 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3026 print_newline_config(rc, FALSE);
3027 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3028 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3029 "all Unicode newlines");
3030 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3031 printf(" Internal link size = %d\n", rc);
3032 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3033 printf(" POSIX malloc threshold = %d\n", rc);
3034 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3035 printf(" Default match limit = %ld\n", lrc);
3036 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3037 printf(" Default recursion depth limit = %ld\n", lrc);
3038 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3039 printf(" Match recursion uses %s", rc? "stack" : "heap");
3040 if (showstore)
3041 {
3042 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3043 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3044 }
3045 printf("\n");
3046 goto EXIT;
3047 }
3048 else if (strcmp(arg, "-help") == 0 ||
3049 strcmp(arg, "--help") == 0)
3050 {
3051 usage();
3052 goto EXIT;
3053 }
3054 else
3055 {
3056 BAD_ARG:
3057 printf("** Unknown or malformed option %s\n", arg);
3058 usage();
3059 yield = 1;
3060 goto EXIT;
3061 }
3062 op++;
3063 argc--;
3064 }
3065
3066 /* Get the store for the offsets vector, and remember what it was */
3067
3068 size_offsets_max = size_offsets;
3069 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3070 if (offsets == NULL)
3071 {
3072 printf("** Failed to get %d bytes of memory for offsets vector\n",
3073 (int)(size_offsets_max * sizeof(int)));
3074 yield = 1;
3075 goto EXIT;
3076 }
3077
3078 /* Sort out the input and output files */
3079
3080 if (argc > 1)
3081 {
3082 infile = fopen(argv[op], INPUT_MODE);
3083 if (infile == NULL)
3084 {
3085 printf("** Failed to open %s\n", argv[op]);
3086 yield = 1;
3087 goto EXIT;
3088 }
3089 }
3090
3091 if (argc > 2)
3092 {
3093 outfile = fopen(argv[op+1], OUTPUT_MODE);
3094 if (outfile == NULL)
3095 {
3096 printf("** Failed to open %s\n", argv[op+1]);
3097 yield = 1;
3098 goto EXIT;
3099 }
3100 }
3101
3102 /* Set alternative malloc function */
3103
3104 #ifdef SUPPORT_PCRE8
3105 pcre_malloc = new_malloc;
3106 pcre_free = new_free;
3107 pcre_stack_malloc = stack_malloc;
3108 pcre_stack_free = stack_free;
3109 #endif
3110
3111 #ifdef SUPPORT_PCRE16
3112 pcre16_malloc = new_malloc;
3113 pcre16_free = new_free;
3114 pcre16_stack_malloc = stack_malloc;
3115 pcre16_stack_free = stack_free;
3116 #endif
3117
3118 #ifdef SUPPORT_PCRE32
3119 pcre32_malloc = new_malloc;
3120 pcre32_free = new_free;
3121 pcre32_stack_malloc = stack_malloc;
3122 pcre32_stack_free = stack_free;
3123 #endif
3124
3125 /* Heading line unless quiet, then prompt for first regex if stdin */
3126
3127 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3128
3129 /* Main loop */
3130
3131 while (!done)
3132 {
3133 pcre *re = NULL;
3134 pcre_extra *extra = NULL;
3135
3136 #if !defined NOPOSIX /* There are still compilers that require no indent */
3137 regex_t preg;
3138 int do_posix = 0;
3139 #endif
3140
3141 const char *error;
3142 pcre_uint8 *markptr;
3143 pcre_uint8 *p, *pp, *ppp;
3144 pcre_uint8 *to_file = NULL;
3145 const pcre_uint8 *tables = NULL;
3146 unsigned long int get_options;
3147 unsigned long int true_size, true_study_size = 0;
3148 size_t size, regex_gotten_store;
3149 int do_allcaps = 0;
3150 int do_mark = 0;
3151 int do_study = 0;
3152 int no_force_study = 0;
3153 int do_debug = debug;
3154 int do_G = 0;
3155 int do_g = 0;
3156 int do_showinfo = showinfo;
3157 int do_showrest = 0;
3158 int do_showcaprest = 0;
3159 int do_flip = 0;
3160 int erroroffset, len, delimiter, poffset;
3161
3162 #if !defined NODFA
3163 int dfa_matched = 0;
3164 #endif
3165
3166 use_utf = 0;
3167 debug_lengths = 1;
3168
3169 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3170 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3171 fflush(outfile);
3172
3173 p = buffer;
3174 while (isspace(*p)) p++;
3175 if (*p == 0) continue;
3176
3177 /* See if the pattern is to be loaded pre-compiled from a file. */
3178
3179 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3180 {
3181 pcre_uint32 magic;
3182 pcre_uint8 sbuf[8];
3183 FILE *f;
3184
3185 p++;
3186 if (*p == '!')
3187 {
3188 do_debug = TRUE;
3189 do_showinfo = TRUE;
3190 p++;
3191 }
3192
3193 pp = p + (int)strlen((char *)p);
3194 while (isspace(pp[-1])) pp--;
3195 *pp = 0;
3196
3197 f = fopen((char *)p, "rb");
3198 if (f == NULL)
3199 {
3200 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3201 continue;
3202 }
3203
3204 first_gotten_store = 0;
3205 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3206
3207 true_size =
3208 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3209 true_study_size =
3210 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3211
3212 re = (pcre *)new_malloc(true_size);
3213 if (re == NULL)
3214 {
3215 printf("** Failed to get %d bytes of memory for pcre object\n",
3216 (int)true_size);
3217 yield = 1;
3218 goto EXIT;
3219 }
3220 regex_gotten_store = first_gotten_store;
3221
3222 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3223
3224 magic = REAL_PCRE_MAGIC(re);
3225 if (magic != MAGIC_NUMBER)
3226 {
3227 if (swap_uint32(magic) == MAGIC_NUMBER)
3228 {
3229 do_flip = 1;
3230 }
3231 else
3232 {
3233 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3234 new_free(re);
3235 fclose(f);
3236 continue;
3237 }
3238 }
3239
3240 /* We hide the byte-invert info for little and big endian tests. */
3241 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3242 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3243
3244 /* Now see if there is any following study data. */
3245
3246 if (true_study_size != 0)
3247 {
3248 pcre_study_data *psd;
3249
3250 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3251 extra->flags = PCRE_EXTRA_STUDY_DATA;
3252
3253 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3254 extra->study_data = psd;
3255
3256 if (fread(psd, 1, true_study_size, f) != true_study_size)
3257 {
3258 FAIL_READ:
3259 fprintf(outfile, "Failed to read data from %s\n", p);
3260 if (extra != NULL)
3261 {
3262 PCRE_FREE_STUDY(extra);
3263 }
3264 new_free(re);
3265 fclose(f);
3266 continue;
3267 }
3268 fprintf(outfile, "Study data loaded from %s\n", p);
3269 do_study = 1; /* To get the data output if requested */
3270 }
3271 else fprintf(outfile, "No study data\n");
3272
3273 /* Flip the necessary bytes. */
3274 if (do_flip)
3275 {
3276 int rc;
3277 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3278 if (rc == PCRE_ERROR_BADMODE)
3279 {
3280 /* Simulate the result of the function call below. */
3281 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3282 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3283 PCRE_INFO_OPTIONS);
3284 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3285 "%d-bit mode\n", 8 * CHAR_SIZE,
3286 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3287 new_free(re);
3288 fclose(f);
3289 continue;
3290 }
3291 }
3292
3293 /* Need to know if UTF-8 for printing data strings. */
3294
3295 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3296 {
3297 new_free(re);
3298 fclose(f);
3299 continue;
3300 }
3301 use_utf = (get_options & PCRE_UTF8) != 0;
3302
3303 fclose(f);
3304 goto SHOW_INFO;
3305 }
3306
3307 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3308 the pattern; if it isn't complete, read more. */
3309
3310 delimiter = *p++;
3311
3312 if (isalnum(delimiter) || delimiter == '\\')
3313 {
3314 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3315 goto SKIP_DATA;
3316 }
3317
3318 pp = p;
3319 poffset = (int)(p - buffer);
3320
3321 for(;;)
3322 {
3323 while (*pp != 0)
3324 {
3325 if (*pp == '\\' && pp[1] != 0) pp++;
3326 else if (*pp == delimiter) break;
3327 pp++;
3328 }
3329 if (*pp != 0) break;
3330 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3331 {
3332 fprintf(outfile, "** Unexpected EOF\n");
3333 done = 1;
3334 goto CONTINUE;
3335 }
3336 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3337 }
3338
3339 /* The buffer may have moved while being extended; reset the start of data
3340 pointer to the correct relative point in the buffer. */
3341
3342 p = buffer + poffset;
3343
3344 /* If the first character after the delimiter is backslash, make
3345 the pattern end with backslash. This is purely to provide a way
3346 of testing for the error message when a pattern ends with backslash. */
3347
3348 if (pp[1] == '\\') *pp++ = '\\';
3349
3350 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3351 for callouts. */
3352
3353 *pp++ = 0;
3354 strcpy((char *)pbuffer, (char *)p);
3355
3356 /* Look for options after final delimiter */
3357
3358 options = 0;
3359 study_options = force_study_options;
3360 log_store = showstore; /* default from command line */
3361
3362 while (*pp != 0)
3363 {
3364 switch (*pp++)
3365 {
3366 case 'f': options |= PCRE_FIRSTLINE; break;
3367 case 'g': do_g = 1; break;
3368 case 'i': options |= PCRE_CASELESS; break;
3369 case 'm': options |= PCRE_MULTILINE; break;
3370 case 's': options |= PCRE_DOTALL; break;
3371 case 'x': options |= PCRE_EXTENDED; break;
3372
3373 case '+':
3374 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3375 break;
3376
3377 case '=': do_allcaps = 1; break;
3378 case 'A': options |= PCRE_ANCHORED; break;
3379 case 'B': do_debug = 1; break;
3380 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3381 case 'D': do_debug = do_showinfo = 1; break;
3382 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3383 case 'F': do_flip = 1; break;
3384 case 'G': do_G = 1; break;
3385 case 'I': do_showinfo = 1; break;
3386 case 'J': options |= PCRE_DUPNAMES; break;
3387 case 'K': do_mark = 1; break;
3388 case 'M': log_store = 1; break;
3389 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3390
3391 #if !defined NOPOSIX
3392 case 'P': do_posix = 1; break;
3393 #endif
3394
3395 case 'S':
3396 do_study = 1;
3397 for (;;)
3398 {
3399 switch (*pp++)
3400 {
3401 case 'S':
3402 do_study = 0;
3403 no_force_study = 1;
3404 break;
3405
3406 case '!':
3407 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3408 break;
3409
3410 case '+':
3411 if (*pp == '+')
3412 {
3413 verify_jit = TRUE;
3414 pp++;
3415 }
3416 if (*pp >= '1' && *pp <= '7')
3417 study_options |= jit_study_bits[*pp++ - '1'];
3418 else
3419 study_options |= jit_study_bits[6];
3420 break;
3421
3422 case '-':
3423 study_options &= ~PCRE_STUDY_ALLJIT;
3424 break;
3425
3426 default:
3427 pp--;
3428 goto ENDLOOP;
3429 }
3430 }
3431 ENDLOOP:
3432 break;
3433
3434 case 'U': options |= PCRE_UNGREEDY; break;
3435 case 'W': options |= PCRE_UCP; break;
3436 case 'X': options |= PCRE_EXTRA; break;
3437 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3438 case 'Z': debug_lengths = 0; break;
3439 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3440 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3441
3442 case 'T':
3443 switch (*pp++)
3444 {
3445 case '0': tables = tables0; break;
3446 case '1': tables = tables1; break;
3447
3448 case '\r':
3449 case '\n':
3450 case ' ':
3451 case 0:
3452 fprintf(outfile, "** Missing table number after /T\n");
3453 goto SKIP_DATA;
3454
3455 default:
3456 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3457 goto SKIP_DATA;
3458 }
3459 break;
3460
3461 case 'L':
3462 ppp = pp;
3463 /* The '\r' test here is so that it works on Windows. */
3464 /* The '0' test is just in case this is an unterminated line. */
3465 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3466 *ppp = 0;
3467 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3468 {
3469 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3470 goto SKIP_DATA;
3471 }
3472 locale_set = 1;
3473 tables = PCRE_MAKETABLES;
3474 pp = ppp;
3475 break;
3476
3477 case '>':
3478 to_file = pp;
3479 while (*pp != 0) pp++;
3480 while (isspace(pp[-1])) pp--;
3481 *pp = 0;
3482 break;
3483
3484 case '<':
3485 {
3486 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3487 {
3488 options |= PCRE_JAVASCRIPT_COMPAT;
3489 pp += 3;
3490 }
3491 else
3492 {
3493 int x = check_newline(pp, outfile);
3494 if (x == 0) goto SKIP_DATA;
3495 options |= x;
3496 while (*pp++ != '>');
3497 }
3498 }
3499 break;
3500
3501 case '\r': /* So that it works in Windows */
3502 case '\n':
3503 case ' ':
3504 break;
3505
3506 default:
3507 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3508 goto SKIP_DATA;
3509 }
3510 }
3511
3512 /* Handle compiling via the POSIX interface, which doesn't support the
3513 timing, showing, or debugging options, nor the ability to pass over
3514 local character tables. Neither does it have 16-bit support. */
3515
3516 #if !defined NOPOSIX
3517 if (posix || do_posix)
3518 {
3519 int rc;
3520 int cflags = 0;
3521
3522 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3523 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3524 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3525 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3526 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3527 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3528 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3529
3530 first_gotten_store = 0;
3531 rc = regcomp(&preg, (char *)p, cflags);
3532
3533 /* Compilation failed; go back for another re, skipping to blank line
3534 if non-interactive. */
3535
3536 if (rc != 0)
3537 {
3538 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3539 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3540 goto SKIP_DATA;
3541 }
3542 }
3543
3544 /* Handle compiling via the native interface */
3545
3546 else
3547 #endif /* !defined NOPOSIX */
3548
3549 {
3550 /* In 16- or 32-bit mode, convert the input. */
3551
3552 #ifdef SUPPORT_PCRE16
3553 if (pcre_mode == PCRE16_MODE)
3554 {
3555 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3556 {
3557 case -1:
3558 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3559 "converted to UTF-16\n");
3560 goto SKIP_DATA;
3561
3562 case -2:
3563 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3564 "cannot be converted to UTF-16\n");
3565 goto SKIP_DATA;
3566
3567 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3568 fprintf(outfile, "**Failed: character value greater than 0xffff "
3569 "cannot be converted to 16-bit in non-UTF mode\n");
3570 goto SKIP_DATA;
3571
3572 default:
3573 break;
3574 }
3575 p = (pcre_uint8 *)buffer16;
3576 }
3577 #endif
3578
3579 #ifdef SUPPORT_PCRE32
3580 if (pcre_mode == PCRE32_MODE)
3581 {
3582 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3583 {
3584 case -1:
3585 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3586 "converted to UTF-32\n");
3587 goto SKIP_DATA;
3588
3589 case -2:
3590 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3591 "cannot be converted to UTF-32\n");
3592 goto SKIP_DATA;
3593
3594 case -3:
3595 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3596 goto SKIP_DATA;
3597
3598 default:
3599 break;
3600 }
3601 p = (pcre_uint8 *)buffer32;
3602 }
3603 #endif
3604
3605 /* Compile many times when timing */
3606
3607 if (timeit > 0)
3608 {
3609 register int i;
3610 clock_t time_taken;
3611 clock_t start_time = clock();
3612 for (i = 0; i < timeit; i++)
3613 {
3614 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3615 if (re != NULL) free(re);
3616 }
3617 time_taken = clock() - start_time;
3618 fprintf(outfile, "Compile time %.4f milliseconds\n",
3619 (((double)time_taken * 1000.0) / (double)timeit) /
3620 (double)CLOCKS_PER_SEC);
3621 }
3622
3623 first_gotten_store = 0;
3624 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3625
3626 /* Compilation failed; go back for another re, skipping to blank line
3627 if non-interactive. */
3628
3629 if (re == NULL)
3630 {
3631 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3632 SKIP_DATA:
3633 if (infile != stdin)
3634 {
3635 for (;;)
3636 {
3637 if (extend_inputline(infile, buffer, NULL) == NULL)
3638 {
3639 done = 1;
3640 goto CONTINUE;
3641 }
3642 len = (int)strlen((char *)buffer);
3643 while (len > 0 && isspace(buffer[len-1])) len--;
3644 if (len == 0) break;
3645 }
3646 fprintf(outfile, "\n");
3647 }
3648 goto CONTINUE;
3649 }
3650
3651 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3652 within the regex; check for this so that we know how to process the data
3653 lines. */
3654
3655 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3656 goto SKIP_DATA;
3657 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3658
3659 /* Extract the size for possible writing before possibly flipping it,
3660 and remember the store that was got. */
3661
3662 true_size = REAL_PCRE_SIZE(re);
3663 regex_gotten_store = first_gotten_store;
3664
3665 /* Output code size information if requested */
3666
3667 if (log_store)
3668 {
3669 int name_count, name_entry_size, real_pcre_size;
3670
3671 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3672 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3673 #ifdef SUPPORT_PCRE8
3674 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3675 real_pcre_size = sizeof(real_pcre);
3676 #endif
3677 #ifdef SUPPORT_PCRE16
3678 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3679 real_pcre_size = sizeof(real_pcre16);
3680 #endif
3681 #ifdef SUPPORT_PCRE32
3682 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3683 real_pcre_size = sizeof(real_pcre32);
3684 #endif
3685 fprintf(outfile, "Memory allocation (code space): %d\n",
3686 (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3687 }
3688
3689 /* If -s or /S was present, study the regex to generate additional info to
3690 help with the matching, unless the pattern has the SS option, which
3691 suppresses the effect of /S (used for a few test patterns where studying is
3692 never sensible). */
3693
3694 if (do_study || (force_study >= 0 && !no_force_study))
3695 {
3696 if (timeit > 0)
3697 {
3698 register int i;
3699 clock_t time_taken;
3700 clock_t start_time = clock();
3701 for (i = 0; i < timeit; i++)
3702 {
3703 PCRE_STUDY(extra, re, study_options, &error);
3704 }
3705 time_taken = clock() - start_time;
3706 if (extra != NULL)
3707 {
3708 PCRE_FREE_STUDY(extra);
3709 }
3710 fprintf(outfile, " Study time %.4f milliseconds\n",
3711 (((double)time_taken * 1000.0) / (double)timeit) /
3712 (double)CLOCKS_PER_SEC);
3713 }
3714 PCRE_STUDY(extra, re, study_options, &error);
3715 if (error != NULL)
3716 fprintf(outfile, "Failed to study: %s\n", error);
3717 else if (extra != NULL)
3718 {
3719 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3720 if (log_store)
3721 {
3722 size_t jitsize;
3723 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3724 jitsize != 0)
3725 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3726 }
3727 }
3728 }
3729
3730 /* If /K was present, we set up for handling MARK data. */
3731
3732 if (do_mark)
3733 {
3734 if (extra == NULL)
3735 {
3736 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3737 extra->flags = 0;
3738 }
3739 extra->mark = &markptr;
3740 extra->flags |= PCRE_EXTRA_MARK;
3741 }
3742
3743 /* Extract and display information from the compiled data if required. */
3744
3745 SHOW_INFO:
3746
3747 if (do_debug)
3748 {
3749 fprintf(outfile, "------------------------------------------------------------------\n");
3750 PCRE_PRINTINT(re, outfile, debug_lengths);
3751 }
3752
3753 /* We already have the options in get_options (see above) */
3754
3755 if (do_showinfo)
3756 {
3757 unsigned long int all_options;
3758 pcre_uint32 first_char, need_char;
3759 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
3760 hascrorlf, maxlookbehind;
3761 int nameentrysize, namecount;
3762 const pcre_uint8 *nametable;
3763
3764 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3765 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3766 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3767 new_info(re, NULL, PCRE_INFO_FIRSTLITERAL, &first_char) +
3768 new_info(re, NULL, PCRE_INFO_FIRSTLITERALSET, &first_char_set) +
3769 new_info(re, NULL, PCRE_INFO_LASTLITERAL2, &need_char) +
3770 new_info(re, NULL, PCRE_INFO_LASTLITERAL2SET, &need_char_set) +
3771 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3772 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3773 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3774 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3775 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3776 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3777 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3778 != 0)
3779 goto SKIP_DATA;
3780
3781 if (size != regex_gotten_store) fprintf(outfile,
3782 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3783 (int)size, (int)regex_gotten_store);
3784
3785 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3786 if (backrefmax > 0)
3787 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3788
3789 if (namecount > 0)
3790 {
3791 fprintf(outfile, "Named capturing subpatterns:\n");
3792 while (namecount-- > 0)
3793 {
3794 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
3795 int length = (int)STRLEN(nametable + imm2_size);
3796 fprintf(outfile, " ");
3797 PCHARSV(nametable, imm2_size, length, outfile);
3798 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3799 #ifdef SUPPORT_PCRE32
3800 if (pcre_mode == PCRE32_MODE)
3801 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
3802 #endif
3803 #ifdef SUPPORT_PCRE16
3804 if (pcre_mode == PCRE16_MODE)
3805 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
3806 #endif
3807 #ifdef SUPPORT_PCRE8
3808 if (pcre_mode == PCRE8_MODE)
3809 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
3810 #endif
3811 nametable += nameentrysize * CHAR_SIZE;
3812 }
3813 }
3814
3815 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3816 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3817
3818 all_options = REAL_PCRE_OPTIONS(re);
3819 if (do_flip) all_options = swap_uint32(all_options);
3820
3821 if (get_options == 0) fprintf(outfile, "No options\n");
3822 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3823 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3824 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3825 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3826 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3827 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3828 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3829 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3830 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3831 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3832 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3833 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3834 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3835 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3836 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3837 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3838 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3839 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3840
3841 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3842
3843 switch (get_options & PCRE_NEWLINE_BITS)
3844 {
3845 case PCRE_NEWLINE_CR:
3846 fprintf(outfile, "Forced newline sequence: CR\n");
3847 break;
3848
3849 case PCRE_NEWLINE_LF:
3850 fprintf(outfile, "Forced newline sequence: LF\n");
3851 break;
3852
3853 case PCRE_NEWLINE_CRLF:
3854 fprintf(outfile, "Forced newline sequence: CRLF\n");
3855 break;
3856
3857 case PCRE_NEWLINE_ANYCRLF:
3858 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3859 break;
3860
3861 case PCRE_NEWLINE_ANY:
3862 fprintf(outfile, "Forced newline sequence: ANY\n");
3863 break;
3864
3865 default:
3866 break;
3867 }
3868
3869 if (first_char_set == 2)
3870 {
3871 fprintf(outfile, "First char at start or follows newline\n");
3872 }
3873 else if (first_char_set == 1)
3874 {
3875 const char *caseless =
3876 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
3877 "" : " (caseless)";
3878
3879 if (PRINTOK(first_char))
3880 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3881 else
3882 {
3883 fprintf(outfile, "First char = ");
3884 pchar(first_char, outfile);
3885 fprintf(outfile, "%s\n", caseless);
3886 }
3887 }
3888 else
3889 {
3890 fprintf(outfile, "No first char\n");
3891 }
3892
3893 if (need_char_set == 0)
3894 {
3895 fprintf(outfile, "No need char\n");
3896 }
3897 else
3898 {
3899 const char *caseless =
3900 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
3901 "" : " (caseless)";
3902
3903 if (PRINTOK(need_char))
3904 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3905 else
3906 {
3907 fprintf(outfile, "Need char = ");
3908 pchar(need_char, outfile);
3909 fprintf(outfile, "%s\n", caseless);
3910 }
3911 }
3912
3913 if (maxlookbehind > 0)
3914 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3915
3916 /* Don't output study size; at present it is in any case a fixed
3917 value, but it varies, depending on the computer architecture, and
3918 so messes up the test suite. (And with the /F option, it might be
3919 flipped.) If study was forced by an external -s, don't show this
3920 information unless -i or -d was also present. This means that, except
3921 when auto-callouts are involved, the output from runs with and without
3922 -s should be identical. */
3923
3924 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3925 {
3926 if (extra == NULL)
3927 fprintf(outfile, "Study returned NULL\n");
3928 else
3929 {
3930 pcre_uint8 *start_bits = NULL;
3931 int minlength;
3932
3933 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3934 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3935
3936 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3937 {
3938 if (start_bits == NULL)
3939 fprintf(outfile, "No set of starting bytes\n");
3940 else
3941 {
3942 int i;
3943 int c = 24;
3944 fprintf(outfile, "Starting byte set: ");
3945 for (i = 0; i < 256; i++)
3946 {
3947 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3948 {
3949 if (c > 75)
3950 {
3951 fprintf(outfile, "\n ");
3952 c = 2;
3953 }
3954 if (PRINTOK(i) && i != ' ')
3955 {
3956 fprintf(outfile, "%c ", i);
3957 c += 2;
3958 }
3959 else
3960 {
3961 fprintf(outfile, "\\x%02x ", i);
3962 c += 5;
3963 }
3964 }
3965 }
3966 fprintf(outfile, "\n");
3967 }
3968 }
3969 }
3970
3971 /* Show this only if the JIT was set by /S, not by -s. */
3972
3973 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3974 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3975 {
3976 int jit;
3977 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3978 {
3979 if (jit)
3980 fprintf(outfile, "JIT study was successful\n");
3981 else
3982 #ifdef SUPPORT_JIT
3983 fprintf(outfile, "JIT study was not successful\n");
3984 #else
3985 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3986 #endif
3987 }
3988 }
3989 }
3990 }
3991
3992 /* If the '>' option was present, we write out the regex to a file, and
3993 that is all. The first 8 bytes of the file are the regex length and then
3994 the study length, in big-endian order. */
3995
3996 if (to_file != NULL)
3997 {
3998 FILE *f = fopen((char *)to_file, "wb");
3999 if (f == NULL)
4000 {
4001 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4002 }
4003 else
4004 {
4005 pcre_uint8 sbuf[8];
4006
4007 if (do_flip) regexflip(re, extra);
4008 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4009 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4010 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4011 sbuf[3] = (pcre_uint8)((true_size) & 255);
4012 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4013 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4014 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4015 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4016
4017 if (fwrite(sbuf, 1, 8, f) < 8 ||
4018 fwrite(re, 1, true_size, f) < true_size)
4019 {
4020 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4021 }
4022 else
4023 {
4024 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4025
4026 /* If there is study data, write it. */
4027
4028 if (extra != NULL)
4029 {
4030 if (fwrite(extra->study_data, 1, true_study_size, f) <
4031 true_study_size)
4032 {
4033 fprintf(outfile, "Write error on %s: %s\n", to_file,
4034 strerror(errno));
4035 }
4036 else fprintf(outfile, "Study data written to %s\n", to_file);
4037 }
4038 }
4039 fclose(f);
4040 }
4041
4042 new_free(re);
4043 if (extra != NULL)
4044 {
4045 PCRE_FREE_STUDY(extra);
4046 }
4047 if (locale_set)
4048 {
4049 new_free((void *)tables);
4050 setlocale(LC_CTYPE, "C");
4051 locale_set = 0;
4052 }
4053 continue; /* With next regex */
4054 }
4055 } /* End of non-POSIX compile */
4056
4057 /* Read data lines and test them */
4058
4059 for (;;)
4060 {
4061 pcre_uint8 *q;
4062 pcre_uint8 *bptr;
4063 int *use_offsets = offsets;
4064 int use_size_offsets = size_offsets;
4065 int callout_data = 0;
4066 int callout_data_set = 0;
4067 int count;
4068 pcre_uint32 c;
4069 int copystrings = 0;
4070 int find_match_limit = default_find_match_limit;
4071 int getstrings = 0;
4072 int getlist = 0;
4073 int gmatched = 0;
4074 int start_offset = 0;
4075 int start_offset_sign = 1;
4076 int g_notempty = 0;
4077 int use_dfa = 0;
4078
4079 *copynames = 0;
4080 *getnames = 0;
4081
4082 #ifdef SUPPORT_PCRE32
4083 cn32ptr = copynames;
4084 gn32ptr = getnames;
4085 #endif
4086 #ifdef SUPPORT_PCRE16
4087 cn16ptr = copynames16;
4088 gn16ptr = getnames16;
4089 #endif
4090 #ifdef SUPPORT_PCRE8
4091 cn8ptr = copynames8;
4092 gn8ptr = getnames8;
4093 #endif
4094
4095 SET_PCRE_CALLOUT(callout);
4096 first_callout = 1;
4097 last_callout_mark = NULL;
4098 callout_extra = 0;
4099 callout_count = 0;
4100 callout_fail_count = 999999;
4101 callout_fail_id = -1;
4102 show_malloc = 0;
4103 options = 0;
4104
4105 if (extra != NULL) extra->flags &=
4106 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4107
4108 len = 0;
4109 for (;;)
4110 {
4111 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4112 {
4113 if (len > 0) /* Reached EOF without hitting a newline */
4114 {
4115 fprintf(outfile, "\n");
4116 break;
4117 }
4118 done = 1;
4119 goto CONTINUE;
4120 }
4121 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4122 len = (int)strlen((char *)buffer);
4123 if (buffer[len-1] == '\n') break;
4124 }
4125
4126 while (len > 0 && isspace(buffer[len-1])) len--;
4127 buffer[len] = 0;
4128 if (len == 0) break;
4129
4130 p = buffer;
4131 while (isspace(*p)) p++;
4132
4133 bptr = q = dbuffer;
4134 while ((c = *p++) != 0)
4135 {
4136 int i = 0;
4137 int n = 0;
4138
4139 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4140 In non-UTF mode, allow the value of the byte to fall through to later,
4141 where values greater than 127 are turned into UTF-8 when running in
4142 16-bit mode. */
4143
4144 if (c != '\\')
4145 {
4146 if (use_utf)
4147 {
4148 *q++ = c;
4149 continue;
4150 }
4151 }
4152
4153 /* Handle backslash escapes */
4154
4155 else switch ((c = *p++))
4156 {
4157 case 'a': c = 7; break;
4158 case 'b': c = '\b'; break;
4159 case 'e': c = 27; break;
4160 case 'f': c = '\f'; break;
4161 case 'n': c = '\n'; break;
4162 case 'r': c = '\r'; break;
4163 case 't': c = '\t'; break;
4164 case 'v': c = '\v'; break;
4165
4166 case '0': case '1': case '2': case '3':
4167 case '4': case '5': case '6': case '7':
4168 c -= '0';
4169 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4170 c = c * 8 + *p++ - '0';
4171 break;
4172
4173 case 'x':
4174 if (*p == '{')
4175 {
4176 pcre_uint8 *pt = p;
4177 c = 0;
4178
4179 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4180 when isxdigit() is a macro that refers to its argument more than
4181 once. This is banned by the C Standard, but apparently happens in at
4182 least one MacOS environment. */
4183
4184 for (pt++; isxdigit(*pt); pt++)
4185 {
4186 if (++i == 9)
4187 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4188 "using only the first eight.\n");
4189 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4190 }
4191 if (*pt == '}')
4192 {
4193 p = pt + 1;
4194 break;
4195 }
4196 /* Not correct form for \x{...}; fall through */
4197 }
4198
4199 /* \x without {} always defines just one byte in 8-bit mode. This
4200 allows UTF-8 characters to be constructed byte by byte, and also allows
4201 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4202 Otherwise, pass it down to later code so that it can be turned into
4203 UTF-8 when running in 16/32-bit mode. */
4204
4205 c = 0;
4206 while (i++ < 2 && isxdigit(*p))
4207 {
4208 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4209 p++;
4210 }
4211 if (use_utf)
4212 {
4213 *q++ = c;
4214 continue;
4215 }
4216 break;
4217
4218 case 0: /* \ followed by EOF allows for an empty line */
4219 p--;
4220 continue;
4221
4222 case '>':
4223 if (*p == '-')
4224 {
4225 start_offset_sign = -1;
4226 p++;
4227 }
4228 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4229 start_offset *= start_offset_sign;
4230 continue;
4231
4232 case 'A': /* Option setting */
4233 options |= PCRE_ANCHORED;
4234 continue;
4235
4236 case 'B':
4237 options |= PCRE_NOTBOL;
4238 continue;
4239
4240 case 'C':
4241 if (isdigit(*p)) /* Set copy string */
4242 {
4243 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4244 copystrings |= 1 << n;
4245 }
4246 else if (isalnum(*p))
4247 {
4248 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4249 }
4250 else if (*p == '+')
4251 {
4252 callout_extra = 1;
4253 p++;
4254 }
4255 else if (*p == '-')
4256 {
4257 SET_PCRE_CALLOUT(NULL);
4258 p++;
4259 }
4260 else if (*p == '!')
4261 {
4262 callout_fail_id = 0;
4263 p++;
4264 while(isdigit(*p))
4265 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4266 callout_fail_count = 0;
4267 if (*p == '!')
4268 {
4269 p++;
4270 while(isdigit(*p))
4271 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4272 }
4273 }
4274 else if (*p == '*')
4275 {
4276 int sign = 1;
4277 callout_data = 0;
4278 if (*(++p) == '-') { sign = -1; p++; }
4279 while(isdigit(*p))
4280 callout_data = callout_data * 10 + *p++ - '0';
4281 callout_data *= sign;
4282 callout_data_set = 1;
4283 }
4284 continue;
4285
4286 #if !defined NODFA
4287 case 'D':
4288 #if !defined NOPOSIX
4289 if (posix || do_posix)
4290 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4291 else
4292 #endif
4293 use_dfa = 1;
4294 continue;
4295 #endif
4296
4297 #if !defined NODFA
4298 case 'F':
4299 options |= PCRE_DFA_SHORTEST;
4300 continue;
4301 #endif
4302
4303 case 'G':
4304 if (isdigit(*p))
4305 {
4306 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4307 getstrings |= 1 << n;
4308 }
4309 else if (isalnum(*p))
4310 {
4311 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4312 }
4313 continue;
4314
4315 case 'J':
4316 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4317 if (extra != NULL
4318 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4319 && extra->executable_jit != NULL)
4320 {
4321 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4322 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4323 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4324 }
4325 continue;
4326
4327 case 'L':
4328 getlist = 1;
4329 continue;
4330
4331 case 'M':
4332 find_match_limit = 1;
4333 continue;
4334
4335 case 'N':
4336 if ((options & PCRE_NOTEMPTY) != 0)
4337 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4338 else
4339 options |= PCRE_NOTEMPTY;
4340 continue;
4341
4342 case 'O':
4343 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4344 if (n > size_offsets_max)
4345 {
4346 size_offsets_max = n;
4347 free(offsets);
4348 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4349 if (offsets == NULL)
4350 {
4351 printf("** Failed to get %d bytes of memory for offsets vector\n",
4352 (int)(size_offsets_max * sizeof(int)));
4353 yield = 1;
4354 goto EXIT;
4355 }
4356 }
4357 use_size_offsets = n;
4358 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4359 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4360 continue;
4361
4362 case 'P':
4363 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4364 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4365 continue;
4366
4367 case 'Q':
4368 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4369 if (extra == NULL)
4370 {
4371 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4372 extra->flags = 0;
4373 }
4374 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4375 extra->match_limit_recursion = n;
4376 continue;
4377
4378 case 'q':
4379 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4380 if (extra == NULL)
4381 {
4382 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4383 extra->flags = 0;
4384 }
4385 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4386 extra->match_limit = n;
4387 continue;
4388
4389 #if !defined NODFA
4390 case 'R':
4391 options |= PCRE_DFA_RESTART;
4392 continue;
4393 #endif
4394
4395 case 'S':
4396 show_malloc = 1;
4397 continue;
4398
4399 case 'Y':
4400 options |= PCRE_NO_START_OPTIMIZE;
4401 continue;
4402
4403 case 'Z':
4404 options |= PCRE_NOTEOL;
4405 continue;
4406
4407 case '?':
4408 options |= PCRE_NO_UTF8_CHECK;
4409 continue;
4410
4411 case '<':
4412 {
4413 int x = check_newline(p, outfile);
4414 if (x == 0) goto NEXT_DATA;
4415 options |= x;
4416 while (*p++ != '>');
4417 }
4418 continue;
4419 }
4420
4421 /* We now have a character value in c that may be greater than 255. In
4422 16-bit or 32-bit mode, we always convert characters to UTF-8 so that
4423 values greater than 255 can be passed to non-UTF 16-bit strings. In 8-bit
4424 mode we convert to UTF-8 if we are in UTF mode. Values greater than 127
4425 in UTF mode must have come from \x{...} or octal constructs because values
4426 from \x.. get this far only in non-UTF mode. */
4427
4428 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
4429 if (pcre_mode != PCRE8_MODE || use_utf)
4430 {
4431 pcre_uint8 buff8[8];
4432 int ii, utn;
4433 utn = ord2utf8(c, buff8);
4434 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
4435 }
4436 else
4437 #endif
4438 {
4439 if (c > 255)
4440 {
4441 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4442 "and UTF-8 mode is not enabled.\n", c);
4443 fprintf(outfile, "** Truncation will probably give the wrong "
4444 "result.\n");
4445 }
4446 *q++ = c;
4447 }
4448 }
4449
4450 /* Reached end of subject string */
4451
4452 *q = 0;
4453 len = (int)(q - dbuffer);
4454
4455 /* Move the data to the end of the buffer so that a read over the end of
4456 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
4457 we are using the POSIX interface, we must include the terminating zero. */
4458
4459 #if !defined NOPOSIX
4460 if (posix || do_posix)
4461 {
4462 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
4463 bptr += buffer_size - len - 1;
4464 }
4465 else
4466 #endif
4467 {
4468 memmove(bptr + buffer_size - len, bptr, len);
4469 bptr += buffer_size - len;
4470 }
4471
4472 if ((all_use_dfa || use_dfa) && find_match_limit)
4473 {
4474 printf("**Match limit not relevant for DFA matching: ignored\n");
4475 find_match_limit = 0;
4476 }
4477
4478 /* Handle matching via the POSIX interface, which does not
4479 support timing or playing with the match limit or callout data. */
4480
4481 #if !defined NOPOSIX
4482 if (posix || do_posix)
4483 {
4484 int rc;
4485 int eflags = 0;
4486 regmatch_t *pmatch = NULL;
4487 if (use_size_offsets > 0)
4488 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4489 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4490 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4491 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
4492
4493 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
4494
4495 if (rc != 0)
4496 {
4497 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4498 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4499 }
4500 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
4501 {
4502 fprintf(outfile, "Matched with REG_NOSUB\n");
4503 }
4504 else
4505 {
4506 size_t i;
4507 for (i = 0; i < (size_t)use_size_offsets; i++)
4508 {
4509 if (pmatch[i].rm_so >= 0)
4510 {
4511 fprintf(outfile, "%2d: ", (int)i);
4512 PCHARSV(dbuffer, pmatch[i].rm_so,
4513 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
4514 fprintf(outfile, "\n");
4515 if (do_showcaprest || (i == 0 && do_showrest))
4516 {
4517 fprintf(outfile, "%2d+ ", (int)i);
4518 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
4519 outfile);
4520 fprintf(outfile, "\n");
4521 }
4522 }
4523 }
4524 }
4525 free(pmatch);
4526 goto NEXT_DATA;
4527 }
4528
4529 #endif /* !defined NOPOSIX */
4530
4531 /* Handle matching via the native interface - repeats for /g and /G */
4532
4533 #ifdef SUPPORT_PCRE16
4534 if (pcre_mode == PCRE16_MODE)
4535 {
4536 len = to16(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF8, len);
4537 switch(len)
4538 {
4539 case -1:
4540 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
4541 "converted to UTF-16\n");
4542 goto NEXT_DATA;
4543
4544 case -2:
4545 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4546 "cannot be converted to UTF-16\n");
4547 goto NEXT_DATA;
4548
4549 case -3:
4550 fprintf(outfile, "**Failed: character value greater than 0xffff "
4551 "cannot be converted to 16-bit in non-UTF mode\n");
4552 goto NEXT_DATA;
4553
4554 default:
4555 break;
4556 }
4557 bptr = (pcre_uint8 *)buffer16;
4558 }
4559 #endif
4560
4561 #ifdef SUPPORT_PCRE32
4562 if (pcre_mode == PCRE32_MODE)
4563 {
4564 len = to32(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF32, len);
4565 switch(len)
4566 {
4567 case -1:
4568 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
4569 "converted to UTF-32\n");
4570 goto NEXT_DATA;
4571
4572 case -2:
4573 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4574 "cannot be converted to UTF-32\n");
4575 goto NEXT_DATA;
4576
4577 case -3:
4578 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
4579 goto NEXT_DATA;
4580
4581 default:
4582 break;
4583 }
4584 bptr = (pcre_uint8 *)buffer32;
4585 }
4586 #endif
4587
4588 /* Ensure that there is a JIT callback if we want to verify that JIT was
4589 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4590
4591 if (verify_jit && jit_stack == NULL && extra != NULL)
4592 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4593
4594 for (;; gmatched++) /* Loop for /g or /G */
4595 {
4596 markptr = NULL;
4597 jit_was_used = FALSE;
4598
4599 if (timeitm > 0)
4600 {
4601 register int i;
4602 clock_t time_taken;
4603 clock_t start_time = clock();
4604
4605 #if !defined NODFA
4606 if (all_use_dfa || use_dfa)
4607 {
4608 if ((options & PCRE_DFA_RESTART) != 0)
4609 {
4610 fprintf(outfile, "Timing DFA restarts is not supported\n");
4611 break;
4612 }
4613 if (dfa_workspace == NULL)
4614 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4615 for (i = 0; i < timeitm; i++)
4616 {
4617 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4618 (options | g_notempty), use_offsets, use_size_offsets,
4619 dfa_workspace, DFA_WS_DIMENSION);
4620 }
4621 }
4622 else
4623 #endif
4624
4625 for (i = 0; i < timeitm; i++)
4626 {
4627 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4628 (options | g_notempty), use_offsets, use_size_offsets);
4629 }
4630 time_taken = clock() - start_time;
4631 fprintf(outfile, "Execute time %.4f milliseconds\n",
4632 (((double)time_taken * 1000.0) / (double)timeitm) /
4633 (double)CLOCKS_PER_SEC);
4634 }
4635
4636 /* If find_match_limit is set, we want to do repeated matches with
4637 varying limits in order to find the minimum value for the match limit and
4638 for the recursion limit. The match limits are relevant only to the normal
4639 running of pcre_exec(), so disable the JIT optimization. This makes it
4640 possible to run the same set of tests with and without JIT externally
4641 requested. */
4642
4643 if (find_match_limit)
4644 {
4645 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4646 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4647 extra->flags = 0;
4648
4649 (void)check_match_limit(re, extra, bptr, len, start_offset,
4650 options|g_notempty, use_offsets, use_size_offsets,
4651 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4652 PCRE_ERROR_MATCHLIMIT, "match()");
4653
4654 count = check_match_limit(re, extra, bptr, len, start_offset,
4655 options|g_notempty, use_offsets, use_size_offsets,
4656 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4657 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4658 }
4659
4660 /* If callout_data is set, use the interface with additional data */
4661
4662 else if (callout_data_set)
4663 {
4664 if (extra == NULL)
4665 {
4666 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4667 extra->flags = 0;
4668 }
4669 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4670 extra->callout_data = &callout_data;
4671 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4672 options | g_notempty, use_offsets, use_size_offsets);
4673 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4674 }
4675
4676 /* The normal case is just to do the match once, with the default
4677 value of match_limit. */
4678
4679 #if !defined NODFA
4680 else if (all_use_dfa || use_dfa)
4681 {
4682 if (dfa_workspace == NULL)
4683 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4684 if (dfa_matched++ == 0)
4685 dfa_workspace[0] = -1; /* To catch bad restart */
4686 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4687 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4688 DFA_WS_DIMENSION);
4689 if (count == 0)
4690 {
4691 fprintf(outfile, "Matched, but too many subsidiary matches\n");
4692 count = use_size_offsets/2;
4693 }
4694 }
4695 #endif
4696
4697 else
4698 {
4699 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4700 options | g_notempty, use_offsets, use_size_offsets);
4701 if (count == 0)
4702 {
4703 fprintf(outfile, "Matched, but too many substrings\n");
4704 count = use_size_offsets/3;
4705 }
4706 }
4707
4708 /* Matched */
4709
4710 if (count >= 0)
4711 {
4712 int i, maxcount;
4713 void *cnptr, *gnptr;
4714
4715 #if !defined NODFA
4716 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4717 #endif
4718 maxcount = use_size_offsets/3;
4719
4720 /* This is a check against a lunatic return value. */
4721
4722 if (count > maxcount)
4723 {
4724 fprintf(outfile,
4725 "** PCRE error: returned count %d is too big for offset size %d\n",
4726 count, use_size_offsets);
4727 count = use_size_offsets/3;
4728 if (do_g || do_G)
4729 {
4730 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4731 do_g = do_G = FALSE; /* Break g/G loop */
4732 }
4733 }
4734
4735 /* do_allcaps requests showing of all captures in the pattern, to check
4736 unset ones at the end. */
4737
4738 if (do_allcaps)
4739 {
4740 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4741 goto SKIP_DATA;
4742 count++; /* Allow for full match */
4743 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4744 }
4745
4746 /* Output the captured substrings */
4747
4748 for (i = 0; i < count * 2; i += 2)
4749 {
4750 if (use_offsets[i] < 0)
4751 {
4752 if (use_offsets[i] != -1)
4753 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4754 use_offsets[i], i);
4755 if (use_offsets[i+1] != -1)
4756 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4757 use_offsets[i+1], i+1);
4758 fprintf(outfile, "%2d: <unset>\n", i/2);
4759 }
4760 else
4761 {
4762 fprintf(outfile, "%2d: ", i/2);
4763 PCHARSV(bptr, use_offsets[i],
4764 use_offsets[i+1] - use_offsets[i], outfile);
4765 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4766 fprintf(outfile, "\n");
4767 if (do_showcaprest || (i == 0 && do_showrest))
4768 {
4769 fprintf(outfile, "%2d+ ", i/2);
4770 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4771 outfile);
4772 fprintf(outfile, "\n");
4773 }
4774 }
4775 }
4776
4777 if (markptr != NULL)
4778 {
4779 fprintf(outfile, "MK: ");
4780 PCHARSV(markptr, 0, -1, outfile);
4781 fprintf(outfile, "\n");
4782 }
4783
4784 for (i = 0; i < 32; i++)
4785 {
4786 if ((copystrings & (1 << i)) != 0)
4787 {
4788 int rc;
4789 char copybuffer[256];
4790 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4791 copybuffer, sizeof(copybuffer));
4792 if (rc < 0)
4793 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4794 else
4795 {
4796 fprintf(outfile, "%2dC ", i);
4797 PCHARSV(copybuffer, 0, rc, outfile);
4798 fprintf(outfile, " (%d)\n", rc);
4799 }
4800 }
4801 }
4802
4803 cnptr = copynames;
4804 for (;;)
4805 {
4806 int rc;
4807 char copybuffer[256];
4808
4809 if (pcre_mode == PCRE16_MODE)
4810 {
4811 if (*(pcre_uint16 *)cnptr == 0) break;
4812 }
4813 else
4814 {
4815 if (*(pcre_uint8 *)cnptr == 0) break;
4816 }
4817
4818 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4819 cnptr, copybuffer, sizeof(copybuffer));
4820
4821 if (rc < 0)
4822 {
4823 fprintf(outfile, "copy substring ");
4824 PCHARSV(cnptr, 0, -1, outfile);
4825 fprintf(outfile, " failed %d\n", rc);
4826 }
4827 else
4828 {
4829 fprintf(outfile, " C ");
4830 PCHARSV(copybuffer, 0, rc, outfile);
4831 fprintf(outfile, " (%d) ", rc);
4832 PCHARSV(cnptr, 0, -1, outfile);
4833 putc('\n', outfile);
4834 }
4835
4836 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4837 }
4838
4839 for (i = 0; i < 32; i++)
4840 {
4841 if ((getstrings & (1 << i)) != 0)
4842 {
4843 int rc;
4844 const char *substring;
4845 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4846 if (rc < 0)
4847 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4848 else
4849 {
4850 fprintf(outfile, "%2dG ", i);
4851 PCHARSV(substring, 0, rc, outfile);
4852 fprintf(outfile, " (%d)\n", rc);
4853 PCRE_FREE_SUBSTRING(substring);
4854 }
4855 }
4856 }
4857
4858 gnptr = getnames;
4859 for (;;)
4860 {
4861 int rc;
4862 const char *substring;
4863
4864 if (pcre_mode == PCRE16_MODE)
4865 {
4866 if (*(pcre_uint16 *)gnptr == 0) break;
4867 }
4868 else
4869 {
4870 if (*(pcre_uint8 *)gnptr == 0) break;
4871 }
4872
4873 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4874 gnptr, &substring);
4875 if (rc < 0)
4876 {
4877 fprintf(outfile, "get substring ");
4878 PCHARSV(gnptr, 0, -1, outfile);
4879 fprintf(outfile, " failed %d\n", rc);
4880 }
4881 else
4882 {
4883 fprintf(outfile, " G ");
4884 PCHARSV(substring, 0, rc, outfile);
4885 fprintf(outfile, " (%d) ", rc);
4886 PCHARSV(gnptr, 0, -1, outfile);
4887 PCRE_FREE_SUBSTRING(substring);
4888 putc('\n', outfile);
4889 }
4890
4891 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4892 }
4893
4894 if (getlist)
4895 {
4896 int rc;
4897 const char **stringlist;
4898 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4899 if (rc < 0)
4900 fprintf(outfile, "get substring list failed %d\n", rc);
4901 else
4902 {
4903 for (i = 0; i < count; i++)
4904 {
4905 fprintf(outfile, "%2dL ", i);
4906 PCHARSV(stringlist[i], 0, -1, outfile);
4907 putc('\n', outfile);
4908 }
4909 if (stringlist[i] != NULL)
4910 fprintf(outfile, "string list not terminated by NULL\n");
4911 PCRE_FREE_SUBSTRING_LIST(stringlist);
4912 }
4913 }
4914 }
4915
4916 /* There was a partial match */
4917
4918 else if (count == PCRE_ERROR_PARTIAL)
4919 {
4920 if (markptr == NULL) fprintf(outfile, "Partial match");
4921 else
4922 {
4923 fprintf(outfile, "Partial match, mark=");
4924 PCHARSV(markptr, 0, -1, outfile);
4925 }
4926 if (use_size_offsets > 1)
4927 {
4928 fprintf(outfile, ": ");
4929 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4930 outfile);
4931 }
4932 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4933 fprintf(outfile, "\n");
4934 break; /* Out of the /g loop */
4935 }
4936
4937 /* Failed to match. If this is a /g or /G loop and we previously set
4938 g_notempty after a null match, this is not necessarily the end. We want
4939 to advance the start offset, and continue. We won't be at the end of the
4940 string - that was checked before setting g_notempty.
4941
4942 Complication arises in the case when the newline convention is "any",
4943 "crlf", or "anycrlf". If the previous match was at the end of a line
4944 terminated by CRLF, an advance of one character just passes the \r,
4945 whereas we should prefer the longer newline sequence, as does the code in
4946 pcre_exec(). Fudge the offset value to achieve this. We check for a
4947 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4948 find the default.
4949
4950 Otherwise, in the case of UTF-8 matching, the advance must be one
4951 character, not one byte. */
4952
4953 else
4954 {
4955 if (g_notempty != 0)
4956 {
4957 int onechar = 1;
4958 unsigned int obits = REAL_PCRE_OPTIONS(re);
4959 use_offsets[0] = start_offset;
4960 if ((obits & PCRE_NEWLINE_BITS) == 0)
4961 {
4962 int d;
4963 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4964 /* Note that these values are always the ASCII ones, even in
4965 EBCDIC environments. CR = 13, NL = 10. */
4966 obits = (d == 13)? PCRE_NEWLINE_CR :
4967 (d == 10)? PCRE_NEWLINE_LF :
4968 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4969 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4970 (d == -1)? PCRE_NEWLINE_ANY : 0;
4971 }
4972 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4973 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4974 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4975 &&
4976 start_offset < len - 1 && (
4977 #ifdef SUPPORT_PCRE8
4978 (pcre_mode == PCRE8_MODE &&
4979 bptr[start_offset] == '\r' &&
4980 bptr[start_offset + 1] == '\n') ||
4981 #endif
4982 #ifdef SUPPORT_PCRE16
4983 (pcre_mode == PCRE16_MODE &&
4984 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
4985 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
4986 #endif
4987 #ifdef SUPPORT_PCRE32
4988 (pcre_mode == PCRE32_MODE &&
4989 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
4990 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
4991 #endif
4992 0))
4993 onechar++;
4994 else if (use_utf)
4995 {
4996 while (start_offset + onechar < len)
4997 {
4998 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4999 onechar++;
5000 }
5001 }
5002 use_offsets[1] = start_offset + onechar;
5003 }
5004 else
5005 {
5006 switch(count)
5007 {
5008 case PCRE_ERROR_NOMATCH:
5009 if (gmatched == 0)
5010 {
5011 if (markptr == NULL)
5012 {
5013 fprintf(outfile, "No match");
5014 }
5015 else
5016 {
5017 fprintf(outfile, "No match, mark = ");
5018 PCHARSV(markptr, 0, -1, outfile);
5019 }
5020 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5021 putc('\n', outfile);
5022 }
5023 break;
5024
5025 case PCRE_ERROR_BADUTF8:
5026 case PCRE_ERROR_SHORTUTF8:
5027 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5028 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5029 8 * CHAR_SIZE);
5030 if (use_size_offsets >= 2)
5031 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5032 use_offsets[1]);
5033 fprintf(outfile, "\n");
5034 break;
5035
5036 case PCRE_ERROR_BADUTF8_OFFSET:
5037 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5038 8 * CHAR_SIZE);
5039 break;
5040
5041 default:
5042 if (count < 0 &&
5043 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5044 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5045 else
5046 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5047 break;
5048 }
5049
5050 break; /* Out of the /g loop */
5051 }
5052 }
5053
5054 /* If not /g or /G we are done */
5055
5056 if (!do_g && !do_G) break;
5057
5058 /* If we have matched an empty string, first check to see if we are at
5059 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5060 Perl's /g options does. This turns out to be rather cunning. First we set
5061 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5062 same point. If this fails (picked up above) we advance to the next
5063 character. */
5064
5065 g_notempty = 0;
5066
5067 if (use_offsets[0] == use_offsets[1])
5068 {
5069 if (use_offsets[0] == len) break;
5070 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5071 }
5072
5073 /* For /g, update the start offset, leaving the rest alone */
5074
5075 if (do_g) start_offset = use_offsets[1];
5076
5077 /* For /G, update the pointer and length */
5078
5079 else
5080 {
5081 bptr += use_offsets[1] * CHAR_SIZE;
5082 len -= use_offsets[1];
5083 }
5084 } /* End of loop for /g and /G */
5085
5086 NEXT_DATA: continue;
5087 } /* End of loop for data lines */
5088
5089 CONTINUE:
5090
5091 #if !defined NOPOSIX
5092 if (posix || do_posix) regfree(&preg);
5093 #endif
5094
5095 if (re != NULL) new_free(re);
5096 if (extra != NULL)
5097 {
5098 PCRE_FREE_STUDY(extra);
5099 }
5100 if (locale_set)
5101 {
5102 new_free((void *)tables);
5103 setlocale(LC_CTYPE, "C");
5104 locale_set = 0;
5105 }
5106 if (jit_stack != NULL)
5107 {
5108 PCRE_JIT_STACK_FREE(jit_stack);
5109 jit_stack = NULL;
5110 }
5111 }
5112
5113 if (infile == stdin) fprintf(outfile, "\n");
5114
5115 EXIT:
5116
5117 if (infile != NULL && infile != stdin) fclose(infile);
5118 if (outfile != NULL && outfile != stdout) fclose(outfile);
5119
5120 free(buffer);
5121 free(dbuffer);
5122 free(pbuffer);
5123 free(offsets);
5124
5125 #ifdef SUPPORT_PCRE16
5126 if (buffer16 != NULL) free(buffer16);
5127 #endif
5128 #ifdef SUPPORT_PCRE32
5129 if (buffer32 != NULL) free(buffer32);
5130 #endif
5131
5132 #if !defined NODFA
5133 if (dfa_workspace != NULL)
5134 free(dfa_workspace);
5135 #endif
5136
5137 return yield;
5138 }
5139
5140 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5