/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1090 - (show annotations)
Tue Oct 16 15:55:48 2012 UTC (6 years, 11 months ago) by chpe
File MIME type: text/plain
File size: 154951 byte(s)
pcre32: pcretest: Don't convert data line to UTF-8 first

While reading the data lines, directly put them into the 8, 16 or
32 bit buffers instead of first converting them into UTF-8 and only
afterwards converting that buffer to 16/32 bit. This is necessary so
the in 32 bit mode the \x{} escapes can use the full 32-bit range
(while the non-standard 5/6 byte UTF-8 sequences can only express
characters up to 31-bits).
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136
137 #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138 /* Configure internal macros to 32 bit mode. */
139 #define COMPILE_PCRE32
140 #endif
141 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142 /* Configure internal macros to 16 bit mode. */
143 #define COMPILE_PCRE16
144 #endif
145 #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146 /* Configure internal macros to 16 bit mode. */
147 #define COMPILE_PCRE8
148 #endif
149
150 #include "pcre_internal.h"
151
152 /* The pcre_printint() function, which prints the internal form of a compiled
153 regex, is held in a separate file so that (a) it can be compiled in either
154 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
155 when that is compiled in debug mode. */
156
157 #ifdef SUPPORT_PCRE8
158 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159 #endif
160 #ifdef SUPPORT_PCRE16
161 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162 #endif
163 #ifdef SUPPORT_PCRE32
164 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165 #endif
166
167 /* We need access to some of the data tables that PCRE uses. So as not to have
168 to keep two copies, we include the source files here, changing the names of the
169 external symbols to prevent clashes. */
170
171 #define PCRE_INCLUDED
172
173 #include "pcre_tables.c"
174 #include "pcre_ucd.c"
175
176 /* The definition of the macro PRINTABLE, which determines whether to print an
177 output character as-is or as a hex value when showing compiled patterns, is
178 the same as in the printint.src file. We uses it here in cases when the locale
179 has not been explicitly changed, so as to get consistent output from systems
180 that differ in their output from isprint() even in the "C" locale. */
181
182 #ifdef EBCDIC
183 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
184 #else
185 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
186 #endif
187
188 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189
190 /* Posix support is disabled in 16 or 32 bit only mode. */
191 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192 #define NOPOSIX
193 #endif
194
195 /* It is possible to compile this test program without including support for
196 testing the POSIX interface, though this is not available via the standard
197 Makefile. */
198
199 #if !defined NOPOSIX
200 #include "pcreposix.h"
201 #endif
202
203 /* It is also possible, originally for the benefit of a version that was
204 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
205 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
206 automatically cut out the UTF support if PCRE is built without it. */
207
208 #ifndef SUPPORT_UTF
209 #ifndef NOUTF
210 #define NOUTF
211 #endif
212 #endif
213
214 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
215 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216 only from one place and is handled differently). I couldn't dream up any way of
217 using a single macro to do this in a generic way, because of the many different
218 argument requirements. We know that at least one of SUPPORT_PCRE8 and
219 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
220 use these in the definitions of generic macros.
221
222 **** Special note about the PCHARSxxx macros: the address of the string to be
223 printed is always given as two arguments: a base address followed by an offset.
224 The base address is cast to the correct data size for 8 or 16 bit data; the
225 offset is in units of this size. If the string were given as base+offset in one
226 argument, the casting might be incorrectly applied. */
227
228 #ifdef SUPPORT_PCRE8
229
230 #define PCHARS8(lv, p, offset, len, f) \
231 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
232
233 #define PCHARSV8(p, offset, len, f) \
234 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235
236 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237 p = read_capture_name8(p, cn8, re)
238
239 #define STRLEN8(p) ((int)strlen((char *)p))
240
241 #define SET_PCRE_CALLOUT8(callout) \
242 pcre_callout = callout
243
244 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
245 pcre_assign_jit_stack(extra, callback, userdata)
246
247 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
248 re = pcre_compile((char *)pat, options, error, erroffset, tables)
249
250 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251 namesptr, cbuffer, size) \
252 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
253 (char *)namesptr, cbuffer, size)
254
255 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
256 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
257
258 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259 offsets, size_offsets, workspace, size_workspace) \
260 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
261 offsets, size_offsets, workspace, size_workspace)
262
263 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
264 offsets, size_offsets) \
265 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
266 offsets, size_offsets)
267
268 #define PCRE_FREE_STUDY8(extra) \
269 pcre_free_study(extra)
270
271 #define PCRE_FREE_SUBSTRING8(substring) \
272 pcre_free_substring(substring)
273
274 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
275 pcre_free_substring_list(listptr)
276
277 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
278 getnamesptr, subsptr) \
279 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
280 (char *)getnamesptr, subsptr)
281
282 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
283 n = pcre_get_stringnumber(re, (char *)ptr)
284
285 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
286 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
287
288 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
289 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
290
291 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
292 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
293
294 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
295 pcre_printint(re, outfile, debug_lengths)
296
297 #define PCRE_STUDY8(extra, re, options, error) \
298 extra = pcre_study(re, options, error)
299
300 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
301 pcre_jit_stack_alloc(startsize, maxsize)
302
303 #define PCRE_JIT_STACK_FREE8(stack) \
304 pcre_jit_stack_free(stack)
305
306 #endif /* SUPPORT_PCRE8 */
307
308 /* -----------------------------------------------------------*/
309
310 #ifdef SUPPORT_PCRE16
311
312 #define PCHARS16(lv, p, offset, len, f) \
313 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
314
315 #define PCHARSV16(p, offset, len, f) \
316 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
317
318 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319 p = read_capture_name16(p, cn16, re)
320
321 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
322
323 #define SET_PCRE_CALLOUT16(callout) \
324 pcre16_callout = (int (*)(pcre16_callout_block *))callout
325
326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327 pcre16_assign_jit_stack((pcre16_extra *)extra, \
328 (pcre16_jit_callback)callback, userdata)
329
330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332 tables)
333
334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335 namesptr, cbuffer, size) \
336 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338
339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341 (PCRE_UCHAR16 *)cbuffer, size/2)
342
343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344 offsets, size_offsets, workspace, size_workspace) \
345 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347 workspace, size_workspace)
348
349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350 offsets, size_offsets) \
351 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352 len, start_offset, options, offsets, size_offsets)
353
354 #define PCRE_FREE_STUDY16(extra) \
355 pcre16_free_study((pcre16_extra *)extra)
356
357 #define PCRE_FREE_SUBSTRING16(substring) \
358 pcre16_free_substring((PCRE_SPTR16)substring)
359
360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362
363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364 getnamesptr, subsptr) \
365 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367
368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370
371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373 (PCRE_SPTR16 *)(void*)subsptr)
374
375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377 (PCRE_SPTR16 **)(void*)listptr)
378
379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381 tables)
382
383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384 pcre16_printint(re, outfile, debug_lengths)
385
386 #define PCRE_STUDY16(extra, re, options, error) \
387 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388
389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391
392 #define PCRE_JIT_STACK_FREE16(stack) \
393 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394
395 #endif /* SUPPORT_PCRE16 */
396
397 /* -----------------------------------------------------------*/
398
399 #ifdef SUPPORT_PCRE32
400
401 #define PCHARS32(lv, p, offset, len, f) \
402 lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)
403
404 #define PCHARSV32(p, offset, len, f) \
405 (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)
406
407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408 p = read_capture_name32(p, cn32, re)
409
410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411
412 #define SET_PCRE_CALLOUT32(callout) \
413 pcre32_callout = (int (*)(pcre32_callout_block *))callout
414
415 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416 pcre32_assign_jit_stack((pcre32_extra *)extra, \
417 (pcre32_jit_callback)callback, userdata)
418
419 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421 tables)
422
423 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424 namesptr, cbuffer, size) \
425 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427
428 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430 (PCRE_UCHAR32 *)cbuffer, size/2)
431
432 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433 offsets, size_offsets, workspace, size_workspace) \
434 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436 workspace, size_workspace)
437
438 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439 offsets, size_offsets) \
440 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441 len, start_offset, options, offsets, size_offsets)
442
443 #define PCRE_FREE_STUDY32(extra) \
444 pcre32_free_study((pcre32_extra *)extra)
445
446 #define PCRE_FREE_SUBSTRING32(substring) \
447 pcre32_free_substring((PCRE_SPTR32)substring)
448
449 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
451
452 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453 getnamesptr, subsptr) \
454 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456
457 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
459
460 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462 (PCRE_SPTR32 *)(void*)subsptr)
463
464 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466 (PCRE_SPTR32 **)(void*)listptr)
467
468 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470 tables)
471
472 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473 pcre32_printint(re, outfile, debug_lengths)
474
475 #define PCRE_STUDY32(extra, re, options, error) \
476 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477
478 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480
481 #define PCRE_JIT_STACK_FREE32(stack) \
482 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483
484 #endif /* SUPPORT_PCRE32 */
485
486
487 /* ----- Both modes are supported; a runtime test is needed, except for
488 pcre_config(), and the JIT stack functions, when it doesn't matter which
489 version is called. ----- */
490
491 enum {
492 PCRE8_MODE,
493 PCRE16_MODE,
494 PCRE32_MODE
495 };
496
497 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2
498
499 #define CHAR_SIZE (1 << pcre_mode)
500
501 #define PCHARS(lv, p, offset, len, f) \
502 if (pcre_mode == PCRE32_MODE) \
503 PCHARS32(lv, p, offset, len, f); \
504 else if (pcre_mode == PCRE16_MODE) \
505 PCHARS16(lv, p, offset, len, f); \
506 else \
507 PCHARS8(lv, p, offset, len, f)
508
509 #define PCHARSV(p, offset, len, f) \
510 if (pcre_mode == PCRE32_MODE) \
511 PCHARSV32(p, offset, len, f); \
512 else if (pcre_mode == PCRE16_MODE) \
513 PCHARSV16(p, offset, len, f); \
514 else \
515 PCHARSV8(p, offset, len, f)
516
517 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
518 if (pcre_mode == PCRE32_MODE) \
519 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
520 else if (pcre_mode == PCRE16_MODE) \
521 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
522 else \
523 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
524
525 #define SET_PCRE_CALLOUT(callout) \
526 if (pcre_mode == PCRE32_MODE) \
527 SET_PCRE_CALLOUT32(callout); \
528 else if (pcre_mode == PCRE16_MODE) \
529 SET_PCRE_CALLOUT16(callout); \
530 else \
531 SET_PCRE_CALLOUT8(callout)
532
533 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
534
535 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
536 if (pcre_mode == PCRE32_MODE) \
537 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
538 else if (pcre_mode == PCRE16_MODE) \
539 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
540 else \
541 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
542
543 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
544 if (pcre_mode == PCRE32_MODE) \
545 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
546 else if (pcre_mode == PCRE16_MODE) \
547 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
548 else \
549 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
550
551 #define PCRE_CONFIG pcre_config
552
553 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
554 namesptr, cbuffer, size) \
555 if (pcre_mode == PCRE32_MODE) \
556 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
557 namesptr, cbuffer, size); \
558 else if (pcre_mode == PCRE16_MODE) \
559 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
560 namesptr, cbuffer, size); \
561 else \
562 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
563 namesptr, cbuffer, size)
564
565 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
566 if (pcre_mode == PCRE32_MODE) \
567 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
568 else if (pcre_mode == PCRE16_MODE) \
569 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
570 else \
571 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
572
573 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
574 offsets, size_offsets, workspace, size_workspace) \
575 if (pcre_mode == PCRE32_MODE) \
576 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
577 offsets, size_offsets, workspace, size_workspace); \
578 else if (pcre_mode == PCRE16_MODE) \
579 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
580 offsets, size_offsets, workspace, size_workspace); \
581 else \
582 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets, workspace, size_workspace)
584
585 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets) \
587 if (pcre_mode == PCRE32_MODE) \
588 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets); \
590 else if (pcre_mode == PCRE16_MODE) \
591 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets); \
593 else \
594 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
595 offsets, size_offsets)
596
597 #define PCRE_FREE_STUDY(extra) \
598 if (pcre_mode == PCRE32_MODE) \
599 PCRE_FREE_STUDY32(extra); \
600 else if (pcre_mode == PCRE16_MODE) \
601 PCRE_FREE_STUDY16(extra); \
602 else \
603 PCRE_FREE_STUDY8(extra)
604
605 #define PCRE_FREE_SUBSTRING(substring) \
606 if (pcre_mode == PCRE32_MODE) \
607 PCRE_FREE_SUBSTRING32(substring); \
608 else if (pcre_mode == PCRE16_MODE) \
609 PCRE_FREE_SUBSTRING16(substring); \
610 else \
611 PCRE_FREE_SUBSTRING8(substring)
612
613 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
614 if (pcre_mode == PCRE32_MODE) \
615 PCRE_FREE_SUBSTRING_LIST32(listptr); \
616 else if (pcre_mode == PCRE16_MODE) \
617 PCRE_FREE_SUBSTRING_LIST16(listptr); \
618 else \
619 PCRE_FREE_SUBSTRING_LIST8(listptr)
620
621 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
622 getnamesptr, subsptr) \
623 if (pcre_mode == PCRE32_MODE) \
624 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
625 getnamesptr, subsptr); \
626 else if (pcre_mode == PCRE16_MODE) \
627 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
628 getnamesptr, subsptr); \
629 else \
630 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
631 getnamesptr, subsptr)
632
633 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
634 if (pcre_mode == PCRE32_MODE) \
635 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
636 else if (pcre_mode == PCRE16_MODE) \
637 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
638 else \
639 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
640
641 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
642 if (pcre_mode == PCRE32_MODE) \
643 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
644 else if (pcre_mode == PCRE16_MODE) \
645 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
646 else \
647 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
648
649 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
650 if (pcre_mode == PCRE32_MODE) \
651 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
652 else if (pcre_mode == PCRE16_MODE) \
653 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
654 else \
655 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
656
657 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
658 (pcre_mode == PCRE32_MODE ? \
659 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
660 : pcre_mode == PCRE16_MODE ? \
661 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
662 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
663
664 #define PCRE_JIT_STACK_FREE(stack) \
665 if (pcre_mode == PCRE32_MODE) \
666 PCRE_JIT_STACK_FREE32(stack); \
667 else if (pcre_mode == PCRE16_MODE) \
668 PCRE_JIT_STACK_FREE16(stack); \
669 else \
670 PCRE_JIT_STACK_FREE8(stack)
671
672 #define PCRE_MAKETABLES \
673 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
674
675 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
676 if (pcre_mode == PCRE32_MODE) \
677 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
678 else if (pcre_mode == PCRE16_MODE) \
679 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
680 else \
681 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
682
683 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
684 if (pcre_mode == PCRE32_MODE) \
685 PCRE_PRINTINT32(re, outfile, debug_lengths); \
686 else if (pcre_mode == PCRE16_MODE) \
687 PCRE_PRINTINT16(re, outfile, debug_lengths); \
688 else \
689 PCRE_PRINTINT8(re, outfile, debug_lengths)
690
691 #define PCRE_STUDY(extra, re, options, error) \
692 if (pcre_mode == PCRE32_MODE) \
693 PCRE_STUDY32(extra, re, options, error); \
694 else if (pcre_mode == PCRE16_MODE) \
695 PCRE_STUDY16(extra, re, options, error); \
696 else \
697 PCRE_STUDY8(extra, re, options, error)
698
699 /* ----- Only 8-bit mode is supported ----- */
700
701 #elif defined SUPPORT_PCRE8
702 #define CHAR_SIZE 1
703 #define PCHARS PCHARS8
704 #define PCHARSV PCHARSV8
705 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
706 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
707 #define STRLEN STRLEN8
708 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
709 #define PCRE_COMPILE PCRE_COMPILE8
710 #define PCRE_CONFIG pcre_config
711 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
712 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
713 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
714 #define PCRE_EXEC PCRE_EXEC8
715 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
716 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
717 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
718 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
719 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
720 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
721 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
722 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
723 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
724 #define PCRE_MAKETABLES pcre_maketables()
725 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
726 #define PCRE_PRINTINT PCRE_PRINTINT8
727 #define PCRE_STUDY PCRE_STUDY8
728
729 /* ----- Only 16-bit mode is supported ----- */
730
731 #elif defined SUPPORT_PCRE16
732 #define CHAR_SIZE 2
733 #define PCHARS PCHARS16
734 #define PCHARSV PCHARSV16
735 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
736 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
737 #define STRLEN STRLEN16
738 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
739 #define PCRE_COMPILE PCRE_COMPILE16
740 #define PCRE_CONFIG pcre16_config
741 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
742 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
743 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
744 #define PCRE_EXEC PCRE_EXEC16
745 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
746 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
747 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
748 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
749 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
750 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
751 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
752 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
753 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
754 #define PCRE_MAKETABLES pcre16_maketables()
755 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
756 #define PCRE_PRINTINT PCRE_PRINTINT16
757 #define PCRE_STUDY PCRE_STUDY16
758
759 /* ----- Only 32-bit mode is supported ----- */
760
761 #elif defined SUPPORT_PCRE32
762 #define CHAR_SIZE 4
763 #define PCHARS PCHARS32
764 #define PCHARSV PCHARSV32
765 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
766 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
767 #define STRLEN STRLEN32
768 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
769 #define PCRE_COMPILE PCRE_COMPILE32
770 #define PCRE_CONFIG pcre32_config
771 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
772 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
773 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
774 #define PCRE_EXEC PCRE_EXEC32
775 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
776 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
777 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
778 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
779 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
780 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
781 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
782 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
783 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
784 #define PCRE_MAKETABLES pcre32_maketables()
785 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
786 #define PCRE_PRINTINT PCRE_PRINTINT32
787 #define PCRE_STUDY PCRE_STUDY32
788
789 #endif
790
791 /* ----- End of mode-specific function call macros ----- */
792
793
794 /* Other parameters */
795
796 #ifndef CLOCKS_PER_SEC
797 #ifdef CLK_TCK
798 #define CLOCKS_PER_SEC CLK_TCK
799 #else
800 #define CLOCKS_PER_SEC 100
801 #endif
802 #endif
803
804 #if !defined NODFA
805 #define DFA_WS_DIMENSION 1000
806 #endif
807
808 /* This is the default loop count for timing. */
809
810 #define LOOPREPEAT 500000
811
812 /* Static variables */
813
814 static FILE *outfile;
815 static int log_store = 0;
816 static int callout_count;
817 static int callout_extra;
818 static int callout_fail_count;
819 static int callout_fail_id;
820 static int debug_lengths;
821 static int first_callout;
822 static int jit_was_used;
823 static int locale_set = 0;
824 static int show_malloc;
825 static int use_utf;
826 static size_t gotten_store;
827 static size_t first_gotten_store = 0;
828 static const unsigned char *last_callout_mark = NULL;
829
830 /* The buffers grow automatically if very long input lines are encountered. */
831
832 static int buffer_size = 50000;
833 static pcre_uint8 *buffer = NULL;
834 static pcre_uint8 *pbuffer = NULL;
835
836 /* Another buffer is needed translation to 16/32-bit character strings. It will
837 obtained and extended as required. */
838
839 #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
840
841 /* We need the table of operator lengths that is used for 16/32-bit compiling, in
842 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
843 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
844 appropriately for the 16/32-bit world. Just as a safety check, make sure that
845 COMPILE_PCRE[16|32] is *not* set. */
846
847 #ifdef COMPILE_PCRE16
848 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
849 #endif
850
851 #ifdef COMPILE_PCRE32
852 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
853 #endif
854
855 #if LINK_SIZE == 2
856 #undef LINK_SIZE
857 #define LINK_SIZE 1
858 #elif LINK_SIZE == 3 || LINK_SIZE == 4
859 #undef LINK_SIZE
860 #define LINK_SIZE 2
861 #else
862 #error LINK_SIZE must be either 2, 3, or 4
863 #endif
864
865 #undef IMM2_SIZE
866 #define IMM2_SIZE 1
867
868 #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
869
870 #ifdef SUPPORT_PCRE16
871 static int buffer16_size = 0;
872 static pcre_uint16 *buffer16 = NULL;
873 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
874 #endif /* SUPPORT_PCRE16 */
875
876 #ifdef SUPPORT_PCRE32
877 static int buffer32_size = 0;
878 static pcre_uint32 *buffer32 = NULL;
879 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
880 #endif /* SUPPORT_PCRE32 */
881
882 /* If we have 8-bit support, default to it; if there is also
883 16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
884 there must be 16-or 32-bit support, so default it to 1. */
885
886 #if defined SUPPORT_PCRE8
887 static int pcre_mode = PCRE8_MODE;
888 #elif defined SUPPORT_PCRE16
889 static int pcre_mode = PCRE16_MODE;
890 #elif defined SUPPORT_PCRE32
891 static int pcre_mode = PCRE32_MODE;
892 #endif
893
894 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
895
896 static int jit_study_bits[] =
897 {
898 PCRE_STUDY_JIT_COMPILE,
899 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
900 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
901 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
902 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
903 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
904 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
905 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
906 };
907
908 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
909 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
910
911 /* Textual explanations for runtime error codes */
912
913 static const char *errtexts[] = {
914 NULL, /* 0 is no error */
915 NULL, /* NOMATCH is handled specially */
916 "NULL argument passed",
917 "bad option value",
918 "magic number missing",
919 "unknown opcode - pattern overwritten?",
920 "no more memory",
921 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
922 "match limit exceeded",
923 "callout error code",
924 NULL, /* BADUTF8/16 is handled specially */
925 NULL, /* BADUTF8/16 offset is handled specially */
926 NULL, /* PARTIAL is handled specially */
927 "not used - internal error",
928 "internal error - pattern overwritten?",
929 "bad count value",
930 "item unsupported for DFA matching",
931 "backreference condition or recursion test not supported for DFA matching",
932 "match limit not supported for DFA matching",
933 "workspace size exceeded in DFA matching",
934 "too much recursion for DFA matching",
935 "recursion limit exceeded",
936 "not used - internal error",
937 "invalid combination of newline options",
938 "bad offset value",
939 NULL, /* SHORTUTF8/16 is handled specially */
940 "nested recursion at the same subject position",
941 "JIT stack limit reached",
942 "pattern compiled in wrong mode: 8-bit/16-bit error",
943 "pattern compiled with other endianness",
944 "invalid data in workspace for DFA restart"
945 };
946
947
948 /*************************************************
949 * Alternate character tables *
950 *************************************************/
951
952 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
953 using the default tables of the library. However, the T option can be used to
954 select alternate sets of tables, for different kinds of testing. Note also that
955 the L (locale) option also adjusts the tables. */
956
957 /* This is the set of tables distributed as default with PCRE. It recognizes
958 only ASCII characters. */
959
960 static const pcre_uint8 tables0[] = {
961
962 /* This table is a lower casing table. */
963
964 0, 1, 2, 3, 4, 5, 6, 7,
965 8, 9, 10, 11, 12, 13, 14, 15,
966 16, 17, 18, 19, 20, 21, 22, 23,
967 24, 25, 26, 27, 28, 29, 30, 31,
968 32, 33, 34, 35, 36, 37, 38, 39,
969 40, 41, 42, 43, 44, 45, 46, 47,
970 48, 49, 50, 51, 52, 53, 54, 55,
971 56, 57, 58, 59, 60, 61, 62, 63,
972 64, 97, 98, 99,100,101,102,103,
973 104,105,106,107,108,109,110,111,
974 112,113,114,115,116,117,118,119,
975 120,121,122, 91, 92, 93, 94, 95,
976 96, 97, 98, 99,100,101,102,103,
977 104,105,106,107,108,109,110,111,
978 112,113,114,115,116,117,118,119,
979 120,121,122,123,124,125,126,127,
980 128,129,130,131,132,133,134,135,
981 136,137,138,139,140,141,142,143,
982 144,145,146,147,148,149,150,151,
983 152,153,154,155,156,157,158,159,
984 160,161,162,163,164,165,166,167,
985 168,169,170,171,172,173,174,175,
986 176,177,178,179,180,181,182,183,
987 184,185,186,187,188,189,190,191,
988 192,193,194,195,196,197,198,199,
989 200,201,202,203,204,205,206,207,
990 208,209,210,211,212,213,214,215,
991 216,217,218,219,220,221,222,223,
992 224,225,226,227,228,229,230,231,
993 232,233,234,235,236,237,238,239,
994 240,241,242,243,244,245,246,247,
995 248,249,250,251,252,253,254,255,
996
997 /* This table is a case flipping table. */
998
999 0, 1, 2, 3, 4, 5, 6, 7,
1000 8, 9, 10, 11, 12, 13, 14, 15,
1001 16, 17, 18, 19, 20, 21, 22, 23,
1002 24, 25, 26, 27, 28, 29, 30, 31,
1003 32, 33, 34, 35, 36, 37, 38, 39,
1004 40, 41, 42, 43, 44, 45, 46, 47,
1005 48, 49, 50, 51, 52, 53, 54, 55,
1006 56, 57, 58, 59, 60, 61, 62, 63,
1007 64, 97, 98, 99,100,101,102,103,
1008 104,105,106,107,108,109,110,111,
1009 112,113,114,115,116,117,118,119,
1010 120,121,122, 91, 92, 93, 94, 95,
1011 96, 65, 66, 67, 68, 69, 70, 71,
1012 72, 73, 74, 75, 76, 77, 78, 79,
1013 80, 81, 82, 83, 84, 85, 86, 87,
1014 88, 89, 90,123,124,125,126,127,
1015 128,129,130,131,132,133,134,135,
1016 136,137,138,139,140,141,142,143,
1017 144,145,146,147,148,149,150,151,
1018 152,153,154,155,156,157,158,159,
1019 160,161,162,163,164,165,166,167,
1020 168,169,170,171,172,173,174,175,
1021 176,177,178,179,180,181,182,183,
1022 184,185,186,187,188,189,190,191,
1023 192,193,194,195,196,197,198,199,
1024 200,201,202,203,204,205,206,207,
1025 208,209,210,211,212,213,214,215,
1026 216,217,218,219,220,221,222,223,
1027 224,225,226,227,228,229,230,231,
1028 232,233,234,235,236,237,238,239,
1029 240,241,242,243,244,245,246,247,
1030 248,249,250,251,252,253,254,255,
1031
1032 /* This table contains bit maps for various character classes. Each map is 32
1033 bytes long and the bits run from the least significant end of each byte. The
1034 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1035 graph, print, punct, and cntrl. Other classes are built from combinations. */
1036
1037 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1038 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1039 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1040 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1041
1042 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1043 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1044 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1045 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1046
1047 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1048 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1049 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1050 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1051
1052 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1053 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1054 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1055 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1056
1057 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1058 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1059 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1060 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1061
1062 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1063 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1064 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1065 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1066
1067 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1068 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1069 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1070 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1071
1072 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1073 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1074 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1075 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1076
1077 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1078 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1079 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1080 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1081
1082 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1083 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1084 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1085 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1086
1087 /* This table identifies various classes of character by individual bits:
1088 0x01 white space character
1089 0x02 letter
1090 0x04 decimal digit
1091 0x08 hexadecimal digit
1092 0x10 alphanumeric or '_'
1093 0x80 regular expression metacharacter or binary zero
1094 */
1095
1096 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1097 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1098 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1099 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1100 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1101 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1102 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1103 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1104 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1105 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1106 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1107 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1108 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1109 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1110 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1111 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1112 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1113 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1114 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1115 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1116 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1117 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1118 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1119 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1120 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1121 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1122 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1123 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1124 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1125 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1126 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1127 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1128
1129 /* This is a set of tables that came orginally from a Windows user. It seems to
1130 be at least an approximation of ISO 8859. In particular, there are characters
1131 greater than 128 that are marked as spaces, letters, etc. */
1132
1133 static const pcre_uint8 tables1[] = {
1134 0,1,2,3,4,5,6,7,
1135 8,9,10,11,12,13,14,15,
1136 16,17,18,19,20,21,22,23,
1137 24,25,26,27,28,29,30,31,
1138 32,33,34,35,36,37,38,39,
1139 40,41,42,43,44,45,46,47,
1140 48,49,50,51,52,53,54,55,
1141 56,57,58,59,60,61,62,63,
1142 64,97,98,99,100,101,102,103,
1143 104,105,106,107,108,109,110,111,
1144 112,113,114,115,116,117,118,119,
1145 120,121,122,91,92,93,94,95,
1146 96,97,98,99,100,101,102,103,
1147 104,105,106,107,108,109,110,111,
1148 112,113,114,115,116,117,118,119,
1149 120,121,122,123,124,125,126,127,
1150 128,129,130,131,132,133,134,135,
1151 136,137,138,139,140,141,142,143,
1152 144,145,146,147,148,149,150,151,
1153 152,153,154,155,156,157,158,159,
1154 160,161,162,163,164,165,166,167,
1155 168,169,170,171,172,173,174,175,
1156 176,177,178,179,180,181,182,183,
1157 184,185,186,187,188,189,190,191,
1158 224,225,226,227,228,229,230,231,
1159 232,233,234,235,236,237,238,239,
1160 240,241,242,243,244,245,246,215,
1161 248,249,250,251,252,253,254,223,
1162 224,225,226,227,228,229,230,231,
1163 232,233,234,235,236,237,238,239,
1164 240,241,242,243,244,245,246,247,
1165 248,249,250,251,252,253,254,255,
1166 0,1,2,3,4,5,6,7,
1167 8,9,10,11,12,13,14,15,
1168 16,17,18,19,20,21,22,23,
1169 24,25,26,27,28,29,30,31,
1170 32,33,34,35,36,37,38,39,
1171 40,41,42,43,44,45,46,47,
1172 48,49,50,51,52,53,54,55,
1173 56,57,58,59,60,61,62,63,
1174 64,97,98,99,100,101,102,103,
1175 104,105,106,107,108,109,110,111,
1176 112,113,114,115,116,117,118,119,
1177 120,121,122,91,92,93,94,95,
1178 96,65,66,67,68,69,70,71,
1179 72,73,74,75,76,77,78,79,
1180 80,81,82,83,84,85,86,87,
1181 88,89,90,123,124,125,126,127,
1182 128,129,130,131,132,133,134,135,
1183 136,137,138,139,140,141,142,143,
1184 144,145,146,147,148,149,150,151,
1185 152,153,154,155,156,157,158,159,
1186 160,161,162,163,164,165,166,167,
1187 168,169,170,171,172,173,174,175,
1188 176,177,178,179,180,181,182,183,
1189 184,185,186,187,188,189,190,191,
1190 224,225,226,227,228,229,230,231,
1191 232,233,234,235,236,237,238,239,
1192 240,241,242,243,244,245,246,215,
1193 248,249,250,251,252,253,254,223,
1194 192,193,194,195,196,197,198,199,
1195 200,201,202,203,204,205,206,207,
1196 208,209,210,211,212,213,214,247,
1197 216,217,218,219,220,221,222,255,
1198 0,62,0,0,1,0,0,0,
1199 0,0,0,0,0,0,0,0,
1200 32,0,0,0,1,0,0,0,
1201 0,0,0,0,0,0,0,0,
1202 0,0,0,0,0,0,255,3,
1203 126,0,0,0,126,0,0,0,
1204 0,0,0,0,0,0,0,0,
1205 0,0,0,0,0,0,0,0,
1206 0,0,0,0,0,0,255,3,
1207 0,0,0,0,0,0,0,0,
1208 0,0,0,0,0,0,12,2,
1209 0,0,0,0,0,0,0,0,
1210 0,0,0,0,0,0,0,0,
1211 254,255,255,7,0,0,0,0,
1212 0,0,0,0,0,0,0,0,
1213 255,255,127,127,0,0,0,0,
1214 0,0,0,0,0,0,0,0,
1215 0,0,0,0,254,255,255,7,
1216 0,0,0,0,0,4,32,4,
1217 0,0,0,128,255,255,127,255,
1218 0,0,0,0,0,0,255,3,
1219 254,255,255,135,254,255,255,7,
1220 0,0,0,0,0,4,44,6,
1221 255,255,127,255,255,255,127,255,
1222 0,0,0,0,254,255,255,255,
1223 255,255,255,255,255,255,255,127,
1224 0,0,0,0,254,255,255,255,
1225 255,255,255,255,255,255,255,255,
1226 0,2,0,0,255,255,255,255,
1227 255,255,255,255,255,255,255,127,
1228 0,0,0,0,255,255,255,255,
1229 255,255,255,255,255,255,255,255,
1230 0,0,0,0,254,255,0,252,
1231 1,0,0,248,1,0,0,120,
1232 0,0,0,0,254,255,255,255,
1233 0,0,128,0,0,0,128,0,
1234 255,255,255,255,0,0,0,0,
1235 0,0,0,0,0,0,0,128,
1236 255,255,255,255,0,0,0,0,
1237 0,0,0,0,0,0,0,0,
1238 128,0,0,0,0,0,0,0,
1239 0,1,1,0,1,1,0,0,
1240 0,0,0,0,0,0,0,0,
1241 0,0,0,0,0,0,0,0,
1242 1,0,0,0,128,0,0,0,
1243 128,128,128,128,0,0,128,0,
1244 28,28,28,28,28,28,28,28,
1245 28,28,0,0,0,0,0,128,
1246 0,26,26,26,26,26,26,18,
1247 18,18,18,18,18,18,18,18,
1248 18,18,18,18,18,18,18,18,
1249 18,18,18,128,128,0,128,16,
1250 0,26,26,26,26,26,26,18,
1251 18,18,18,18,18,18,18,18,
1252 18,18,18,18,18,18,18,18,
1253 18,18,18,128,128,0,0,0,
1254 0,0,0,0,0,1,0,0,
1255 0,0,0,0,0,0,0,0,
1256 0,0,0,0,0,0,0,0,
1257 0,0,0,0,0,0,0,0,
1258 1,0,0,0,0,0,0,0,
1259 0,0,18,0,0,0,0,0,
1260 0,0,20,20,0,18,0,0,
1261 0,20,18,0,0,0,0,0,
1262 18,18,18,18,18,18,18,18,
1263 18,18,18,18,18,18,18,18,
1264 18,18,18,18,18,18,18,0,
1265 18,18,18,18,18,18,18,18,
1266 18,18,18,18,18,18,18,18,
1267 18,18,18,18,18,18,18,18,
1268 18,18,18,18,18,18,18,0,
1269 18,18,18,18,18,18,18,18
1270 };
1271
1272
1273
1274
1275 #ifndef HAVE_STRERROR
1276 /*************************************************
1277 * Provide strerror() for non-ANSI libraries *
1278 *************************************************/
1279
1280 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1281 in their libraries, but can provide the same facility by this simple
1282 alternative function. */
1283
1284 extern int sys_nerr;
1285 extern char *sys_errlist[];
1286
1287 char *
1288 strerror(int n)
1289 {
1290 if (n < 0 || n >= sys_nerr) return "unknown error number";
1291 return sys_errlist[n];
1292 }
1293 #endif /* HAVE_STRERROR */
1294
1295
1296
1297 /*************************************************
1298 * Print newline configuration *
1299 *************************************************/
1300
1301 /*
1302 Arguments:
1303 rc the return code from PCRE_CONFIG_NEWLINE
1304 isc TRUE if called from "-C newline"
1305 Returns: nothing
1306 */
1307
1308 static void
1309 print_newline_config(int rc, BOOL isc)
1310 {
1311 const char *s = NULL;
1312 if (!isc) printf(" Newline sequence is ");
1313 switch(rc)
1314 {
1315 case CHAR_CR: s = "CR"; break;
1316 case CHAR_LF: s = "LF"; break;
1317 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1318 case -1: s = "ANY"; break;
1319 case -2: s = "ANYCRLF"; break;
1320
1321 default:
1322 printf("a non-standard value: 0x%04x\n", rc);
1323 return;
1324 }
1325
1326 printf("%s\n", s);
1327 }
1328
1329
1330
1331 /*************************************************
1332 * JIT memory callback *
1333 *************************************************/
1334
1335 static pcre_jit_stack* jit_callback(void *arg)
1336 {
1337 jit_was_used = TRUE;
1338 return (pcre_jit_stack *)arg;
1339 }
1340
1341
1342 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1343 /*************************************************
1344 * Convert UTF-8 string to value *
1345 *************************************************/
1346
1347 /* This function takes one or more bytes that represents a UTF-8 character,
1348 and returns the value of the character.
1349
1350 Argument:
1351 utf8bytes a pointer to the byte vector
1352 vptr a pointer to an int to receive the value
1353
1354 Returns: > 0 => the number of bytes consumed
1355 -6 to 0 => malformed UTF-8 character at offset = (-return)
1356 */
1357
1358 static int
1359 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1360 {
1361 pcre_uint32 c = *utf8bytes++;
1362 pcre_uint32 d = c;
1363 int i, j, s;
1364
1365 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1366 {
1367 if ((d & 0x80) == 0) break;
1368 d <<= 1;
1369 }
1370
1371 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1372 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1373
1374 /* i now has a value in the range 1-5 */
1375
1376 s = 6*i;
1377 d = (c & utf8_table3[i]) << s;
1378
1379 for (j = 0; j < i; j++)
1380 {
1381 c = *utf8bytes++;
1382 if ((c & 0xc0) != 0x80) return -(j+1);
1383 s -= 6;
1384 d |= (c & 0x3f) << s;
1385 }
1386
1387 /* Check that encoding was the correct unique one */
1388
1389 for (j = 0; j < utf8_table1_size; j++)
1390 if (d <= utf8_table1[j]) break;
1391 if (j != i) return -(i+1);
1392
1393 /* Valid value */
1394
1395 *vptr = d;
1396 return i+1;
1397 }
1398 #endif /* NOUTF || SUPPORT_PCRE16 */
1399
1400
1401
1402 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1403 /*************************************************
1404 * Convert character value to UTF-8 *
1405 *************************************************/
1406
1407 /* This function takes an integer value in the range 0 - 0x7fffffff
1408 and encodes it as a UTF-8 character in 0 to 6 bytes.
1409
1410 Arguments:
1411 cvalue the character value
1412 utf8bytes pointer to buffer for result - at least 6 bytes long
1413
1414 Returns: number of characters placed in the buffer
1415 */
1416
1417 static int
1418 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1419 {
1420 register int i, j;
1421 if (cvalue > 0x7fffffffu)
1422 return -1;
1423 for (i = 0; i < utf8_table1_size; i++)
1424 if (cvalue <= utf8_table1[i]) break;
1425 utf8bytes += i;
1426 for (j = i; j > 0; j--)
1427 {
1428 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1429 cvalue >>= 6;
1430 }
1431 *utf8bytes = utf8_table2[i] | cvalue;
1432 return i + 1;
1433 }
1434 #endif
1435
1436
1437 #ifdef SUPPORT_PCRE16
1438 /*************************************************
1439 * Convert a string to 16-bit *
1440 *************************************************/
1441
1442 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1443 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1444 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1445 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1446 result is always left in buffer16.
1447
1448 Note that this function does not object to surrogate values. This is
1449 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1450 for the purpose of testing that they are correctly faulted.
1451
1452 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1453 in UTF-8 so that values greater than 255 can be handled.
1454
1455 Arguments:
1456 data TRUE if converting a data line; FALSE for a regex
1457 p points to a byte string
1458 utf true if UTF-8 (to be converted to UTF-16)
1459 len number of bytes in the string (excluding trailing zero)
1460
1461 Returns: number of 16-bit data items used (excluding trailing zero)
1462 OR -1 if a UTF-8 string is malformed
1463 OR -2 if a value > 0x10ffff is encountered
1464 OR -3 if a value > 0xffff is encountered when not in UTF mode
1465 */
1466
1467 static int
1468 to16(int data, pcre_uint8 *p, int utf, int len)
1469 {
1470 pcre_uint16 *pp;
1471
1472 if (buffer16_size < 2*len + 2)
1473 {
1474 if (buffer16 != NULL) free(buffer16);
1475 buffer16_size = 2*len + 2;
1476 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1477 if (buffer16 == NULL)
1478 {
1479 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1480 exit(1);
1481 }
1482 }
1483
1484 pp = buffer16;
1485
1486 if (!utf && !data)
1487 {
1488 while (len-- > 0) *pp++ = *p++;
1489 }
1490
1491 else
1492 {
1493 pcre_uint32 c = 0;
1494 while (len > 0)
1495 {
1496 int chlen = utf82ord(p, &c);
1497 if (chlen <= 0) return -1;
1498 if (c > 0x10ffff) return -2;
1499 p += chlen;
1500 len -= chlen;
1501 if (c < 0x10000) *pp++ = c; else
1502 {
1503 if (!utf) return -3;
1504 c -= 0x10000;
1505 *pp++ = 0xD800 | (c >> 10);
1506 *pp++ = 0xDC00 | (c & 0x3ff);
1507 }
1508 }
1509 }
1510
1511 *pp = 0;
1512 return pp - buffer16;
1513 }
1514 #endif
1515
1516 #ifdef SUPPORT_PCRE32
1517 /*************************************************
1518 * Convert a string to 32-bit *
1519 *************************************************/
1520
1521 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1522 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1523 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1524 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1525 result is always left in buffer32.
1526
1527 Note that this function does not object to surrogate values. This is
1528 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1529 for the purpose of testing that they are correctly faulted.
1530
1531 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1532 in UTF-8 so that values greater than 255 can be handled.
1533
1534 Arguments:
1535 data TRUE if converting a data line; FALSE for a regex
1536 p points to a byte string
1537 utf true if UTF-8 (to be converted to UTF-32)
1538 len number of bytes in the string (excluding trailing zero)
1539
1540 Returns: number of 32-bit data items used (excluding trailing zero)
1541 OR -1 if a UTF-8 string is malformed
1542 OR -2 if a value > 0x10ffff is encountered
1543 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1544 */
1545
1546 static int
1547 to32(int data, pcre_uint8 *p, int utf, int len)
1548 {
1549 pcre_uint32 *pp;
1550
1551 if (buffer32_size < 4*len + 4)
1552 {
1553 if (buffer32 != NULL) free(buffer32);
1554 buffer32_size = 4*len + 4;
1555 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1556 if (buffer32 == NULL)
1557 {
1558 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1559 exit(1);
1560 }
1561 }
1562
1563 pp = buffer32;
1564
1565 if (!utf && !data)
1566 {
1567 while (len-- > 0) *pp++ = *p++;
1568 }
1569
1570 else
1571 {
1572 pcre_uint32 c = 0;
1573 while (len > 0)
1574 {
1575 int chlen = utf82ord(p, &c);
1576 if (chlen <= 0) return -1;
1577 if (utf)
1578 {
1579 if (c > 0x10ffff) return -2;
1580 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1581 }
1582
1583 p += chlen;
1584 len -= chlen;
1585 *pp++ = c;
1586 }
1587 }
1588
1589 *pp = 0;
1590 return pp - buffer32;
1591 }
1592 #endif
1593
1594 /*************************************************
1595 * Read or extend an input line *
1596 *************************************************/
1597
1598 /* Input lines are read into buffer, but both patterns and data lines can be
1599 continued over multiple input lines. In addition, if the buffer fills up, we
1600 want to automatically expand it so as to be able to handle extremely large
1601 lines that are needed for certain stress tests. When the input buffer is
1602 expanded, the other two buffers must also be expanded likewise, and the
1603 contents of pbuffer, which are a copy of the input for callouts, must be
1604 preserved (for when expansion happens for a data line). This is not the most
1605 optimal way of handling this, but hey, this is just a test program!
1606
1607 Arguments:
1608 f the file to read
1609 start where in buffer to start (this *must* be within buffer)
1610 prompt for stdin or readline()
1611
1612 Returns: pointer to the start of new data
1613 could be a copy of start, or could be moved
1614 NULL if no data read and EOF reached
1615 */
1616
1617 static pcre_uint8 *
1618 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1619 {
1620 pcre_uint8 *here = start;
1621
1622 for (;;)
1623 {
1624 size_t rlen = (size_t)(buffer_size - (here - buffer));
1625
1626 if (rlen > 1000)
1627 {
1628 int dlen;
1629
1630 /* If libreadline or libedit support is required, use readline() to read a
1631 line if the input is a terminal. Note that readline() removes the trailing
1632 newline, so we must put it back again, to be compatible with fgets(). */
1633
1634 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1635 if (isatty(fileno(f)))
1636 {
1637 size_t len;
1638 char *s = readline(prompt);
1639 if (s == NULL) return (here == start)? NULL : start;
1640 len = strlen(s);
1641 if (len > 0) add_history(s);
1642 if (len > rlen - 1) len = rlen - 1;
1643 memcpy(here, s, len);
1644 here[len] = '\n';
1645 here[len+1] = 0;
1646 free(s);
1647 }
1648 else
1649 #endif
1650
1651 /* Read the next line by normal means, prompting if the file is stdin. */
1652
1653 {
1654 if (f == stdin) printf("%s", prompt);
1655 if (fgets((char *)here, rlen, f) == NULL)
1656 return (here == start)? NULL : start;
1657 }
1658
1659 dlen = (int)strlen((char *)here);
1660 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1661 here += dlen;
1662 }
1663
1664 else
1665 {
1666 int new_buffer_size = 2*buffer_size;
1667 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1668 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1669
1670 if (new_buffer == NULL || new_pbuffer == NULL)
1671 {
1672 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1673 exit(1);
1674 }
1675
1676 memcpy(new_buffer, buffer, buffer_size);
1677 memcpy(new_pbuffer, pbuffer, buffer_size);
1678
1679 buffer_size = new_buffer_size;
1680
1681 start = new_buffer + (start - buffer);
1682 here = new_buffer + (here - buffer);
1683
1684 free(buffer);
1685 free(pbuffer);
1686
1687 buffer = new_buffer;
1688 pbuffer = new_pbuffer;
1689 }
1690 }
1691
1692 return NULL; /* Control never gets here */
1693 }
1694
1695
1696
1697 /*************************************************
1698 * Read number from string *
1699 *************************************************/
1700
1701 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1702 around with conditional compilation, just do the job by hand. It is only used
1703 for unpicking arguments, so just keep it simple.
1704
1705 Arguments:
1706 str string to be converted
1707 endptr where to put the end pointer
1708
1709 Returns: the unsigned long
1710 */
1711
1712 static int
1713 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1714 {
1715 int result = 0;
1716 while(*str != 0 && isspace(*str)) str++;
1717 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1718 *endptr = str;
1719 return(result);
1720 }
1721
1722
1723
1724 /*************************************************
1725 * Print one character *
1726 *************************************************/
1727
1728 /* Print a single character either literally, or as a hex escape. */
1729
1730 static int pchar(pcre_uint32 c, FILE *f)
1731 {
1732 int n;
1733 if (PRINTOK(c))
1734 {
1735 if (f != NULL) fprintf(f, "%c", c);
1736 return 1;
1737 }
1738
1739 if (c < 0x100)
1740 {
1741 if (use_utf)
1742 {
1743 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1744 return 6;
1745 }
1746 else
1747 {
1748 if (f != NULL) fprintf(f, "\\x%02x", c);
1749 return 4;
1750 }
1751 }
1752
1753 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1754 return n >= 0 ? n : 0;
1755 }
1756
1757
1758
1759 #ifdef SUPPORT_PCRE8
1760 /*************************************************
1761 * Print 8-bit character string *
1762 *************************************************/
1763
1764 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1765 If handed a NULL file, just counts chars without printing. */
1766
1767 static int pchars(pcre_uint8 *p, int length, FILE *f)
1768 {
1769 pcre_uint32 c = 0;
1770 int yield = 0;
1771
1772 if (length < 0)
1773 length = strlen((char *)p);
1774
1775 while (length-- > 0)
1776 {
1777 #if !defined NOUTF
1778 if (use_utf)
1779 {
1780 int rc = utf82ord(p, &c);
1781 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1782 {
1783 length -= rc - 1;
1784 p += rc;
1785 yield += pchar(c, f);
1786 continue;
1787 }
1788 }
1789 #endif
1790 c = *p++;
1791 yield += pchar(c, f);
1792 }
1793
1794 return yield;
1795 }
1796 #endif
1797
1798
1799
1800 #ifdef SUPPORT_PCRE16
1801 /*************************************************
1802 * Find length of 0-terminated 16-bit string *
1803 *************************************************/
1804
1805 static int strlen16(PCRE_SPTR16 p)
1806 {
1807 int len = 0;
1808 while (*p++ != 0) len++;
1809 return len;
1810 }
1811 #endif /* SUPPORT_PCRE16 */
1812
1813
1814
1815 #ifdef SUPPORT_PCRE32
1816 /*************************************************
1817 * Find length of 0-terminated 32-bit string *
1818 *************************************************/
1819
1820 static int strlen32(PCRE_SPTR32 p)
1821 {
1822 int len = 0;
1823 while (*p++ != 0) len++;
1824 return len;
1825 }
1826 #endif /* SUPPORT_PCRE32 */
1827
1828
1829
1830 #ifdef SUPPORT_PCRE16
1831 /*************************************************
1832 * Print 16-bit character string *
1833 *************************************************/
1834
1835 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1836 If handed a NULL file, just counts chars without printing. */
1837
1838 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1839 {
1840 int yield = 0;
1841
1842 if (length < 0)
1843 length = strlen16(p);
1844
1845 while (length-- > 0)
1846 {
1847 pcre_uint32 c = *p++ & 0xffff;
1848 #if !defined NOUTF
1849 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1850 {
1851 int d = *p & 0xffff;
1852 if (d >= 0xDC00 && d < 0xDFFF)
1853 {
1854 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1855 length--;
1856 p++;
1857 }
1858 }
1859 #endif
1860 yield += pchar(c, f);
1861 }
1862
1863 return yield;
1864 }
1865 #endif /* SUPPORT_PCRE16 */
1866
1867
1868
1869 #ifdef SUPPORT_PCRE32
1870 /*************************************************
1871 * Print 32-bit character string *
1872 *************************************************/
1873
1874 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
1875 If handed a NULL file, just counts chars without printing. */
1876
1877 static int pchars32(PCRE_SPTR32 p, int length, FILE *f)
1878 {
1879 int yield = 0;
1880
1881 if (length < 0)
1882 length = strlen32(p);
1883
1884 while (length-- > 0)
1885 {
1886 pcre_uint32 c = *p++;
1887 yield += pchar(c, f);
1888 }
1889
1890 return yield;
1891 }
1892 #endif /* SUPPORT_PCRE32 */
1893
1894
1895
1896 #ifdef SUPPORT_PCRE8
1897 /*************************************************
1898 * Read a capture name (8-bit) and check it *
1899 *************************************************/
1900
1901 static pcre_uint8 *
1902 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1903 {
1904 pcre_uint8 *npp = *pp;
1905 while (isalnum(*p)) *npp++ = *p++;
1906 *npp++ = 0;
1907 *npp = 0;
1908 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1909 {
1910 fprintf(outfile, "no parentheses with name \"");
1911 PCHARSV(*pp, 0, -1, outfile);
1912 fprintf(outfile, "\"\n");
1913 }
1914
1915 *pp = npp;
1916 return p;
1917 }
1918 #endif /* SUPPORT_PCRE8 */
1919
1920
1921
1922 #ifdef SUPPORT_PCRE16
1923 /*************************************************
1924 * Read a capture name (16-bit) and check it *
1925 *************************************************/
1926
1927 /* Note that the text being read is 8-bit. */
1928
1929 static pcre_uint8 *
1930 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1931 {
1932 pcre_uint16 *npp = *pp;
1933 while (isalnum(*p)) *npp++ = *p++;
1934 *npp++ = 0;
1935 *npp = 0;
1936 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1937 {
1938 fprintf(outfile, "no parentheses with name \"");
1939 PCHARSV(*pp, 0, -1, outfile);
1940 fprintf(outfile, "\"\n");
1941 }
1942 *pp = npp;
1943 return p;
1944 }
1945 #endif /* SUPPORT_PCRE16 */
1946
1947
1948
1949 #ifdef SUPPORT_PCRE32
1950 /*************************************************
1951 * Read a capture name (32-bit) and check it *
1952 *************************************************/
1953
1954 /* Note that the text being read is 8-bit. */
1955
1956 static pcre_uint8 *
1957 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
1958 {
1959 pcre_uint32 *npp = *pp;
1960 while (isalnum(*p)) *npp++ = *p++;
1961 *npp++ = 0;
1962 *npp = 0;
1963 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
1964 {
1965 fprintf(outfile, "no parentheses with name \"");
1966 PCHARSV(*pp, 0, -1, outfile);
1967 fprintf(outfile, "\"\n");
1968 }
1969 *pp = npp;
1970 return p;
1971 }
1972 #endif /* SUPPORT_PCRE32 */
1973
1974
1975
1976 /*************************************************
1977 * Callout function *
1978 *************************************************/
1979
1980 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1981 the match. Yield zero unless more callouts than the fail count, or the callout
1982 data is not zero. */
1983
1984 static int callout(pcre_callout_block *cb)
1985 {
1986 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1987 int i, pre_start, post_start, subject_length;
1988
1989 if (callout_extra)
1990 {
1991 fprintf(f, "Callout %d: last capture = %d\n",
1992 cb->callout_number, cb->capture_last);
1993
1994 for (i = 0; i < cb->capture_top * 2; i += 2)
1995 {
1996 if (cb->offset_vector[i] < 0)
1997 fprintf(f, "%2d: <unset>\n", i/2);
1998 else
1999 {
2000 fprintf(f, "%2d: ", i/2);
2001 PCHARSV(cb->subject, cb->offset_vector[i],
2002 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2003 fprintf(f, "\n");
2004 }
2005 }
2006 }
2007
2008 /* Re-print the subject in canonical form, the first time or if giving full
2009 datails. On subsequent calls in the same match, we use pchars just to find the
2010 printed lengths of the substrings. */
2011
2012 if (f != NULL) fprintf(f, "--->");
2013
2014 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2015 PCHARS(post_start, cb->subject, cb->start_match,
2016 cb->current_position - cb->start_match, f);
2017
2018 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2019
2020 PCHARSV(cb->subject, cb->current_position,
2021 cb->subject_length - cb->current_position, f);
2022
2023 if (f != NULL) fprintf(f, "\n");
2024
2025 /* Always print appropriate indicators, with callout number if not already
2026 shown. For automatic callouts, show the pattern offset. */
2027
2028 if (cb->callout_number == 255)
2029 {
2030 fprintf(outfile, "%+3d ", cb->pattern_position);
2031 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2032 }
2033 else
2034 {
2035 if (callout_extra) fprintf(outfile, " ");
2036 else fprintf(outfile, "%3d ", cb->callout_number);
2037 }
2038
2039 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2040 fprintf(outfile, "^");
2041
2042 if (post_start > 0)
2043 {
2044 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2045 fprintf(outfile, "^");
2046 }
2047
2048 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2049 fprintf(outfile, " ");
2050
2051 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2052 pbuffer + cb->pattern_position);
2053
2054 fprintf(outfile, "\n");
2055 first_callout = 0;
2056
2057 if (cb->mark != last_callout_mark)
2058 {
2059 if (cb->mark == NULL)
2060 fprintf(outfile, "Latest Mark: <unset>\n");
2061 else
2062 {
2063 fprintf(outfile, "Latest Mark: ");
2064 PCHARSV(cb->mark, 0, -1, outfile);
2065 putc('\n', outfile);
2066 }
2067 last_callout_mark = cb->mark;
2068 }
2069
2070 if (cb->callout_data != NULL)
2071 {
2072 int callout_data = *((int *)(cb->callout_data));
2073 if (callout_data != 0)
2074 {
2075 fprintf(outfile, "Callout data = %d\n", callout_data);
2076 return callout_data;
2077 }
2078 }
2079
2080 return (cb->callout_number != callout_fail_id)? 0 :
2081 (++callout_count >= callout_fail_count)? 1 : 0;
2082 }
2083
2084
2085 /*************************************************
2086 * Local malloc functions *
2087 *************************************************/
2088
2089 /* Alternative malloc function, to test functionality and save the size of a
2090 compiled re, which is the first store request that pcre_compile() makes. The
2091 show_malloc variable is set only during matching. */
2092
2093 static void *new_malloc(size_t size)
2094 {
2095 void *block = malloc(size);
2096 gotten_store = size;
2097 if (first_gotten_store == 0) first_gotten_store = size;
2098 if (show_malloc)
2099 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2100 return block;
2101 }
2102
2103 static void new_free(void *block)
2104 {
2105 if (show_malloc)
2106 fprintf(outfile, "free %p\n", block);
2107 free(block);
2108 }
2109
2110 /* For recursion malloc/free, to test stacking calls */
2111
2112 static void *stack_malloc(size_t size)
2113 {
2114 void *block = malloc(size);
2115 if (show_malloc)
2116 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2117 return block;
2118 }
2119
2120 static void stack_free(void *block)
2121 {
2122 if (show_malloc)
2123 fprintf(outfile, "stack_free %p\n", block);
2124 free(block);
2125 }
2126
2127
2128 /*************************************************
2129 * Call pcre_fullinfo() *
2130 *************************************************/
2131
2132 /* Get one piece of information from the pcre_fullinfo() function. When only
2133 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2134 value, but the code is defensive.
2135
2136 Arguments:
2137 re compiled regex
2138 study study data
2139 option PCRE_INFO_xxx option
2140 ptr where to put the data
2141
2142 Returns: 0 when OK, < 0 on error
2143 */
2144
2145 static int
2146 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2147 {
2148 int rc;
2149
2150 if (pcre_mode == PCRE32_MODE)
2151 #ifdef SUPPORT_PCRE32
2152 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2153 #else
2154 rc = PCRE_ERROR_BADMODE;
2155 #endif
2156 else if (pcre_mode == PCRE16_MODE)
2157 #ifdef SUPPORT_PCRE16
2158 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2159 #else
2160 rc = PCRE_ERROR_BADMODE;
2161 #endif
2162 else
2163 #ifdef SUPPORT_PCRE8
2164 rc = pcre_fullinfo(re, study, option, ptr);
2165 #else
2166 rc = PCRE_ERROR_BADMODE;
2167 #endif
2168
2169 if (rc < 0)
2170 {
2171 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2172 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2173 if (rc == PCRE_ERROR_BADMODE)
2174 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2175 "%d-bit mode\n", 8 * CHAR_SIZE,
2176 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2177 }
2178
2179 return rc;
2180 }
2181
2182
2183
2184 /*************************************************
2185 * Swap byte functions *
2186 *************************************************/
2187
2188 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2189 value, respectively.
2190
2191 Arguments:
2192 value any number
2193
2194 Returns: the byte swapped value
2195 */
2196
2197 static pcre_uint32
2198 swap_uint32(pcre_uint32 value)
2199 {
2200 return ((value & 0x000000ff) << 24) |
2201 ((value & 0x0000ff00) << 8) |
2202 ((value & 0x00ff0000) >> 8) |
2203 (value >> 24);
2204 }
2205
2206 static pcre_uint16
2207 swap_uint16(pcre_uint16 value)
2208 {
2209 return (value >> 8) | (value << 8);
2210 }
2211
2212
2213
2214 /*************************************************
2215 * Flip bytes in a compiled pattern *
2216 *************************************************/
2217
2218 /* This function is called if the 'F' option was present on a pattern that is
2219 to be written to a file. We flip the bytes of all the integer fields in the
2220 regex data block and the study block. In 16-bit mode this also flips relevant
2221 bytes in the pattern itself. This is to make it possible to test PCRE's
2222 ability to reload byte-flipped patterns, e.g. those compiled on a different
2223 architecture. */
2224
2225 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2226 static void
2227 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2228 {
2229 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2230 #ifdef SUPPORT_PCRE16
2231 int op;
2232 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2233 int length = re->name_count * re->name_entry_size;
2234 #ifdef SUPPORT_UTF
2235 BOOL utf = (re->options & PCRE_UTF16) != 0;
2236 BOOL utf16_char = FALSE;
2237 #endif /* SUPPORT_UTF */
2238 #endif /* SUPPORT_PCRE16 */
2239
2240 /* Always flip the bytes in the main data block and study blocks. */
2241
2242 re->magic_number = REVERSED_MAGIC_NUMBER;
2243 re->size = swap_uint32(re->size);
2244 re->options = swap_uint32(re->options);
2245 re->flags = swap_uint16(re->flags);
2246 re->top_bracket = swap_uint16(re->top_bracket);
2247 re->top_backref = swap_uint16(re->top_backref);
2248 re->first_char = swap_uint16(re->first_char);
2249 re->req_char = swap_uint16(re->req_char);
2250 re->name_table_offset = swap_uint16(re->name_table_offset);
2251 re->name_entry_size = swap_uint16(re->name_entry_size);
2252 re->name_count = swap_uint16(re->name_count);
2253
2254 if (extra != NULL)
2255 {
2256 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2257 rsd->size = swap_uint32(rsd->size);
2258 rsd->flags = swap_uint32(rsd->flags);
2259 rsd->minlength = swap_uint32(rsd->minlength);
2260 }
2261
2262 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2263 in the name table, if present, and then in the pattern itself. */
2264
2265 #ifdef SUPPORT_PCRE16
2266 if (pcre_mode != PCRE16_MODE) return;
2267
2268 while(TRUE)
2269 {
2270 /* Swap previous characters. */
2271 while (length-- > 0)
2272 {
2273 *ptr = swap_uint16(*ptr);
2274 ptr++;
2275 }
2276 #ifdef SUPPORT_UTF
2277 if (utf16_char)
2278 {
2279 if ((ptr[-1] & 0xfc00) == 0xd800)
2280 {
2281 /* We know that there is only one extra character in UTF-16. */
2282 *ptr = swap_uint16(*ptr);
2283 ptr++;
2284 }
2285 }
2286 utf16_char = FALSE;
2287 #endif /* SUPPORT_UTF */
2288
2289 /* Get next opcode. */
2290
2291 length = 0;
2292 op = *ptr;
2293 *ptr++ = swap_uint16(op);
2294
2295 switch (op)
2296 {
2297 case OP_END:
2298 return;
2299
2300 #ifdef SUPPORT_UTF
2301 case OP_CHAR:
2302 case OP_CHARI:
2303 case OP_NOT:
2304 case OP_NOTI:
2305 case OP_STAR:
2306 case OP_MINSTAR:
2307 case OP_PLUS:
2308 case OP_MINPLUS:
2309 case OP_QUERY:
2310 case OP_MINQUERY:
2311 case OP_UPTO:
2312 case OP_MINUPTO:
2313 case OP_EXACT:
2314 case OP_POSSTAR:
2315 case OP_POSPLUS:
2316 case OP_POSQUERY:
2317 case OP_POSUPTO:
2318 case OP_STARI:
2319 case OP_MINSTARI:
2320 case OP_PLUSI:
2321 case OP_MINPLUSI:
2322 case OP_QUERYI:
2323 case OP_MINQUERYI:
2324 case OP_UPTOI:
2325 case OP_MINUPTOI:
2326 case OP_EXACTI:
2327 case OP_POSSTARI:
2328 case OP_POSPLUSI:
2329 case OP_POSQUERYI:
2330 case OP_POSUPTOI:
2331 case OP_NOTSTAR:
2332 case OP_NOTMINSTAR:
2333 case OP_NOTPLUS:
2334 case OP_NOTMINPLUS:
2335 case OP_NOTQUERY:
2336 case OP_NOTMINQUERY:
2337 case OP_NOTUPTO:
2338 case OP_NOTMINUPTO:
2339 case OP_NOTEXACT:
2340 case OP_NOTPOSSTAR:
2341 case OP_NOTPOSPLUS:
2342 case OP_NOTPOSQUERY:
2343 case OP_NOTPOSUPTO:
2344 case OP_NOTSTARI:
2345 case OP_NOTMINSTARI:
2346 case OP_NOTPLUSI:
2347 case OP_NOTMINPLUSI:
2348 case OP_NOTQUERYI:
2349 case OP_NOTMINQUERYI:
2350 case OP_NOTUPTOI:
2351 case OP_NOTMINUPTOI:
2352 case OP_NOTEXACTI:
2353 case OP_NOTPOSSTARI:
2354 case OP_NOTPOSPLUSI:
2355 case OP_NOTPOSQUERYI:
2356 case OP_NOTPOSUPTOI:
2357 if (utf) utf16_char = TRUE;
2358 #endif
2359 /* Fall through. */
2360
2361 default:
2362 length = OP_lengths16[op] - 1;
2363 break;
2364
2365 case OP_CLASS:
2366 case OP_NCLASS:
2367 /* Skip the character bit map. */
2368 ptr += 32/sizeof(pcre_uint16);
2369 length = 0;
2370 break;
2371
2372 case OP_XCLASS:
2373 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2374 if (LINK_SIZE > 1)
2375 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2376 - (1 + LINK_SIZE + 1));
2377 else
2378 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2379
2380 /* Reverse the size of the XCLASS instance. */
2381 *ptr = swap_uint16(*ptr);
2382 ptr++;
2383 if (LINK_SIZE > 1)
2384 {
2385 *ptr = swap_uint16(*ptr);
2386 ptr++;
2387 }
2388
2389 op = *ptr;
2390 *ptr = swap_uint16(op);
2391 ptr++;
2392 if ((op & XCL_MAP) != 0)
2393 {
2394 /* Skip the character bit map. */
2395 ptr += 32/sizeof(pcre_uint16);
2396 length -= 32/sizeof(pcre_uint16);
2397 }
2398 break;
2399 }
2400 }
2401 /* Control should never reach here in 16 bit mode. */
2402 #endif /* SUPPORT_PCRE16 */
2403 }
2404 #endif /* SUPPORT_PCRE[8|16] */
2405
2406
2407
2408 #if defined SUPPORT_PCRE32
2409 static void
2410 regexflip_32(pcre *ere, pcre_extra *extra)
2411 {
2412 real_pcre32 *re = (real_pcre32 *)ere;
2413 int op;
2414 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2415 int length = re->name_count * re->name_entry_size;
2416 #ifdef SUPPORT_UTF
2417 BOOL utf = (re->options & PCRE_UTF32) != 0;
2418 #endif /* SUPPORT_UTF */
2419
2420 /* Always flip the bytes in the main data block and study blocks. */
2421
2422 re->magic_number = REVERSED_MAGIC_NUMBER;
2423 re->size = swap_uint32(re->size);
2424 re->options = swap_uint32(re->options);
2425 re->flags = swap_uint16(re->flags);
2426 re->top_bracket = swap_uint16(re->top_bracket);
2427 re->top_backref = swap_uint16(re->top_backref);
2428 re->first_char = swap_uint32(re->first_char);
2429 re->req_char = swap_uint32(re->req_char);
2430 re->name_table_offset = swap_uint16(re->name_table_offset);
2431 re->name_entry_size = swap_uint16(re->name_entry_size);
2432 re->name_count = swap_uint16(re->name_count);
2433
2434 if (extra != NULL)
2435 {
2436 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2437 rsd->size = swap_uint32(rsd->size);
2438 rsd->flags = swap_uint32(rsd->flags);
2439 rsd->minlength = swap_uint32(rsd->minlength);
2440 }
2441
2442 /* In 32-bit mode we must swap bytes
2443 in the name table, if present, and then in the pattern itself. */
2444
2445 while(TRUE)
2446 {
2447 /* Swap previous characters. */
2448 while (length-- > 0)
2449 {
2450 *ptr = swap_uint32(*ptr);
2451 ptr++;
2452 }
2453
2454 /* Get next opcode. */
2455
2456 length = 0;
2457 op = *ptr;
2458 *ptr++ = swap_uint32(op);
2459
2460 switch (op)
2461 {
2462 case OP_END:
2463 return;
2464
2465 default:
2466 length = OP_lengths32[op] - 1;
2467 break;
2468
2469 case OP_CLASS:
2470 case OP_NCLASS:
2471 /* Skip the character bit map. */
2472 ptr += 32/sizeof(pcre_uint32);
2473 length = 0;
2474 break;
2475
2476 case OP_XCLASS:
2477 /* LINK_SIZE can only be 1 in 32-bit mode. */
2478 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2479
2480 /* Reverse the size of the XCLASS instance. */
2481 *ptr = swap_uint32(*ptr);
2482 ptr++;
2483
2484 op = *ptr;
2485 *ptr = swap_uint32(op);
2486 ptr++;
2487 if ((op & XCL_MAP) != 0)
2488 {
2489 /* Skip the character bit map. */
2490 ptr += 32/sizeof(pcre_uint32);
2491 length -= 32/sizeof(pcre_uint32);
2492 }
2493 break;
2494 }
2495 }
2496 /* Control should never reach here in 32 bit mode. */
2497 }
2498
2499 #endif /* SUPPORT_PCRE32 */
2500
2501
2502
2503 static void
2504 regexflip(pcre *ere, pcre_extra *extra)
2505 {
2506 #if defined SUPPORT_PCRE32
2507 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2508 regexflip_32(ere, extra);
2509 #endif
2510 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2511 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2512 regexflip8_or_16(ere, extra);
2513 #endif
2514 }
2515
2516
2517
2518 /*************************************************
2519 * Check match or recursion limit *
2520 *************************************************/
2521
2522 static int
2523 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2524 int start_offset, int options, int *use_offsets, int use_size_offsets,
2525 int flag, unsigned long int *limit, int errnumber, const char *msg)
2526 {
2527 int count;
2528 int min = 0;
2529 int mid = 64;
2530 int max = -1;
2531
2532 extra->flags |= flag;
2533
2534 for (;;)
2535 {
2536 *limit = mid;
2537
2538 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2539 use_offsets, use_size_offsets);
2540
2541 if (count == errnumber)
2542 {
2543 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2544 min = mid;
2545 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2546 }
2547
2548 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2549 count == PCRE_ERROR_PARTIAL)
2550 {
2551 if (mid == min + 1)
2552 {
2553 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2554 break;
2555 }
2556 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2557 max = mid;
2558 mid = (min + mid)/2;
2559 }
2560 else break; /* Some other error */
2561 }
2562
2563 extra->flags &= ~flag;
2564 return count;
2565 }
2566
2567
2568
2569 /*************************************************
2570 * Case-independent strncmp() function *
2571 *************************************************/
2572
2573 /*
2574 Arguments:
2575 s first string
2576 t second string
2577 n number of characters to compare
2578
2579 Returns: < 0, = 0, or > 0, according to the comparison
2580 */
2581
2582 static int
2583 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2584 {
2585 while (n--)
2586 {
2587 int c = tolower(*s++) - tolower(*t++);
2588 if (c) return c;
2589 }
2590 return 0;
2591 }
2592
2593
2594
2595 /*************************************************
2596 * Check newline indicator *
2597 *************************************************/
2598
2599 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2600 a message and return 0 if there is no match.
2601
2602 Arguments:
2603 p points after the leading '<'
2604 f file for error message
2605
2606 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2607 */
2608
2609 static int
2610 check_newline(pcre_uint8 *p, FILE *f)
2611 {
2612 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2613 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2614 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2615 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2616 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2617 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2618 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2619 fprintf(f, "Unknown newline type at: <%s\n", p);
2620 return 0;
2621 }
2622
2623
2624
2625 /*************************************************
2626 * Usage function *
2627 *************************************************/
2628
2629 static void
2630 usage(void)
2631 {
2632 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2633 printf("Input and output default to stdin and stdout.\n");
2634 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2635 printf("If input is a terminal, readline() is used to read from it.\n");
2636 #else
2637 printf("This version of pcretest is not linked with readline().\n");
2638 #endif
2639 printf("\nOptions:\n");
2640 #ifdef SUPPORT_PCRE16
2641 printf(" -16 use the 16-bit library\n");
2642 #endif
2643 #ifdef SUPPORT_PCRE32
2644 printf(" -32 use the 32-bit library\n");
2645 #endif
2646 printf(" -b show compiled code\n");
2647 printf(" -C show PCRE compile-time options and exit\n");
2648 printf(" -C arg show a specific compile-time option\n");
2649 printf(" and exit with its value. The arg can be:\n");
2650 printf(" linksize internal link size [2, 3, 4]\n");
2651 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2652 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2653 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2654 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2655 printf(" ucp Unicode Properties supported [0, 1]\n");
2656 printf(" jit Just-in-time compiler supported [0, 1]\n");
2657 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2658 printf(" -d debug: show compiled code and information (-b and -i)\n");
2659 #if !defined NODFA
2660 printf(" -dfa force DFA matching for all subjects\n");
2661 #endif
2662 printf(" -help show usage information\n");
2663 printf(" -i show information about compiled patterns\n"
2664 " -M find MATCH_LIMIT minimum for each subject\n"
2665 " -m output memory used information\n"
2666 " -o <n> set size of offsets vector to <n>\n");
2667 #if !defined NOPOSIX
2668 printf(" -p use POSIX interface\n");
2669 #endif
2670 printf(" -q quiet: do not output PCRE version number at start\n");
2671 printf(" -S <n> set stack size to <n> megabytes\n");
2672 printf(" -s force each pattern to be studied at basic level\n"
2673 " -s+ force each pattern to be studied, using JIT if available\n"
2674 " -s++ ditto, verifying when JIT was actually used\n"
2675 " -s+n force each pattern to be studied, using JIT if available,\n"
2676 " where 1 <= n <= 7 selects JIT options\n"
2677 " -s++n ditto, verifying when JIT was actually used\n"
2678 " -t time compilation and execution\n");
2679 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2680 printf(" -tm time execution (matching) only\n");
2681 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2682 }
2683
2684
2685
2686 /*************************************************
2687 * Main Program *
2688 *************************************************/
2689
2690 /* Read lines from named file or stdin and write to named file or stdout; lines
2691 consist of a regular expression, in delimiters and optionally followed by
2692 options, followed by a set of test data, terminated by an empty line. */
2693
2694 int main(int argc, char **argv)
2695 {
2696 FILE *infile = stdin;
2697 const char *version;
2698 int options = 0;
2699 int study_options = 0;
2700 int default_find_match_limit = FALSE;
2701 int op = 1;
2702 int timeit = 0;
2703 int timeitm = 0;
2704 int showinfo = 0;
2705 int showstore = 0;
2706 int force_study = -1;
2707 int force_study_options = 0;
2708 int quiet = 0;
2709 int size_offsets = 45;
2710 int size_offsets_max;
2711 int *offsets = NULL;
2712 int debug = 0;
2713 int done = 0;
2714 int all_use_dfa = 0;
2715 int verify_jit = 0;
2716 int yield = 0;
2717 int stack_size;
2718 pcre_uint8 *dbuffer = NULL;
2719 size_t dbuffer_size = 1u << 14;
2720
2721 #if !defined NOPOSIX
2722 int posix = 0;
2723 #endif
2724 #if !defined NODFA
2725 int *dfa_workspace = NULL;
2726 #endif
2727
2728 pcre_jit_stack *jit_stack = NULL;
2729
2730 /* These vectors store, end-to-end, a list of zero-terminated captured
2731 substring names, each list itself being terminated by an empty name. Assume
2732 that 1024 is plenty long enough for the few names we'll be testing. It is
2733 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2734 for the actual memory, to ensure alignment. */
2735
2736 pcre_uint32 copynames[1024];
2737 pcre_uint32 getnames[1024];
2738
2739 #ifdef SUPPORT_PCRE32
2740 pcre_uint32 *cn32ptr;
2741 pcre_uint32 *gn32ptr;
2742 #endif
2743
2744 #ifdef SUPPORT_PCRE16
2745 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2746 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2747 pcre_uint16 *cn16ptr;
2748 pcre_uint16 *gn16ptr;
2749 #endif
2750
2751 #ifdef SUPPORT_PCRE8
2752 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2753 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2754 pcre_uint8 *cn8ptr;
2755 pcre_uint8 *gn8ptr;
2756 #endif
2757
2758 /* Get buffers from malloc() so that valgrind will check their misuse when
2759 debugging. They grow automatically when very long lines are read. The 16-
2760 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2761
2762 buffer = (pcre_uint8 *)malloc(buffer_size);
2763 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2764
2765 /* The outfile variable is static so that new_malloc can use it. */
2766
2767 outfile = stdout;
2768
2769 /* The following _setmode() stuff is some Windows magic that tells its runtime
2770 library to translate CRLF into a single LF character. At least, that's what
2771 I've been told: never having used Windows I take this all on trust. Originally
2772 it set 0x8000, but then I was advised that _O_BINARY was better. */
2773
2774 #if defined(_WIN32) || defined(WIN32)
2775 _setmode( _fileno( stdout ), _O_BINARY );
2776 #endif
2777
2778 /* Get the version number: both pcre_version() and pcre16_version() give the
2779 same answer. We just need to ensure that we call one that is available. */
2780
2781 #if defined SUPPORT_PCRE8
2782 version = pcre_version();
2783 #elif defined SUPPORT_PCRE16
2784 version = pcre16_version();
2785 #elif defined SUPPORT_PCRE32
2786 version = pcre32_version();
2787 #endif
2788
2789 /* Scan options */
2790
2791 while (argc > 1 && argv[op][0] == '-')
2792 {
2793 pcre_uint8 *endptr;
2794 char *arg = argv[op];
2795
2796 if (strcmp(arg, "-m") == 0) showstore = 1;
2797 else if (strcmp(arg, "-s") == 0) force_study = 0;
2798
2799 else if (strncmp(arg, "-s+", 3) == 0)
2800 {
2801 arg += 3;
2802 if (*arg == '+') { arg++; verify_jit = TRUE; }
2803 force_study = 1;
2804 if (*arg == 0)
2805 force_study_options = jit_study_bits[6];
2806 else if (*arg >= '1' && *arg <= '7')
2807 force_study_options = jit_study_bits[*arg - '1'];
2808 else goto BAD_ARG;
2809 }
2810 else if (strcmp(arg, "-16") == 0)
2811 {
2812 #ifdef SUPPORT_PCRE16
2813 pcre_mode = PCRE16_MODE;
2814 #else
2815 printf("** This version of PCRE was built without 16-bit support\n");
2816 exit(1);
2817 #endif
2818 }
2819 else if (strcmp(arg, "-32") == 0)
2820 {
2821 #ifdef SUPPORT_PCRE32
2822 pcre_mode = PCRE32_MODE;
2823 #else
2824 printf("** This version of PCRE was built without 32-bit support\n");
2825 exit(1);
2826 #endif
2827 }
2828 else if (strcmp(arg, "-q") == 0) quiet = 1;
2829 else if (strcmp(arg, "-b") == 0) debug = 1;
2830 else if (strcmp(arg, "-i") == 0) showinfo = 1;
2831 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2832 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2833 #if !defined NODFA
2834 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2835 #endif
2836 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2837 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2838 *endptr == 0))
2839 {
2840 op++;
2841 argc--;
2842 }
2843 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2844 {
2845 int both = arg[2] == 0;
2846 int temp;
2847 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2848 *endptr == 0))
2849 {
2850 timeitm = temp;
2851 op++;
2852 argc--;
2853 }
2854 else timeitm = LOOPREPEAT;
2855 if (both) timeit = timeitm;
2856 }
2857 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2858 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2859 *endptr == 0))
2860 {
2861 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2862 printf("PCRE: -S not supported on this OS\n");
2863 exit(1);
2864 #else
2865 int rc;
2866 struct rlimit rlim;
2867 getrlimit(RLIMIT_STACK, &rlim);
2868 rlim.rlim_cur = stack_size * 1024 * 1024;
2869 rc = setrlimit(RLIMIT_STACK, &rlim);
2870 if (rc != 0)
2871 {
2872 printf("PCRE: setrlimit() failed with error %d\n", rc);
2873 exit(1);
2874 }
2875 op++;
2876 argc--;
2877 #endif
2878 }
2879 #if !defined NOPOSIX
2880 else if (strcmp(arg, "-p") == 0) posix = 1;
2881 #endif
2882 else if (strcmp(arg, "-C") == 0)
2883 {
2884 int rc;
2885 unsigned long int lrc;
2886
2887 if (argc > 2)
2888 {
2889 if (strcmp(argv[op + 1], "linksize") == 0)
2890 {
2891 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2892 printf("%d\n", rc);
2893 yield = rc;
2894 }
2895 else if (strcmp(argv[op + 1], "pcre8") == 0)
2896 {
2897 #ifdef SUPPORT_PCRE8
2898 printf("1\n");
2899 yield = 1;
2900 #else
2901 printf("0\n");
2902 yield = 0;
2903 #endif
2904 }
2905 else if (strcmp(argv[op + 1], "pcre16") == 0)
2906 {
2907 #ifdef SUPPORT_PCRE16
2908 printf("1\n");
2909 yield = 1;
2910 #else
2911 printf("0\n");
2912 yield = 0;
2913 #endif
2914 }
2915 else if (strcmp(argv[op + 1], "pcre32") == 0)
2916 {
2917 #ifdef SUPPORT_PCRE32
2918 printf("1\n");
2919 yield = 1;
2920 #else
2921 printf("0\n");
2922 yield = 0;
2923 #endif
2924 goto EXIT;
2925 }
2926 if (strcmp(argv[op + 1], "utf") == 0)
2927 {
2928 #ifdef SUPPORT_PCRE8
2929 if (pcre_mode == PCRE8_MODE)
2930 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2931 #endif
2932 #ifdef SUPPORT_PCRE16
2933 if (pcre_mode == PCRE16_MODE)
2934 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2935 #endif
2936 #ifdef SUPPORT_PCRE32
2937 if (pcre_mode == PCRE32_MODE)
2938 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
2939 #endif
2940 printf("%d\n", rc);
2941 yield = rc;
2942 goto EXIT;
2943 }
2944 else if (strcmp(argv[op + 1], "ucp") == 0)
2945 {
2946 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2947 printf("%d\n", rc);
2948 yield = rc;
2949 }
2950 else if (strcmp(argv[op + 1], "jit") == 0)
2951 {
2952 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2953 printf("%d\n", rc);
2954 yield = rc;
2955 }
2956 else if (strcmp(argv[op + 1], "newline") == 0)
2957 {
2958 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2959 print_newline_config(rc, TRUE);
2960 }
2961 else if (strcmp(argv[op + 1], "ebcdic") == 0)
2962 {
2963 #ifdef EBCDIC
2964 printf("1\n");
2965 yield = 1;
2966 #else
2967 printf("0\n");
2968 #endif
2969 }
2970 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2971 {
2972 #ifdef EBCDIC
2973 printf("0x%02x\n", CHAR_LF);
2974 #else
2975 printf("0\n");
2976 #endif
2977 }
2978 else
2979 {
2980 printf("Unknown -C option: %s\n", argv[op + 1]);
2981 }
2982 goto EXIT;
2983 }
2984
2985 /* No argument for -C: output all configuration information. */
2986
2987 printf("PCRE version %s\n", version);
2988 printf("Compiled with\n");
2989
2990 #ifdef EBCDIC
2991 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2992 #endif
2993
2994 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2995 are set, either both UTFs are supported or both are not supported. */
2996
2997 #ifdef SUPPORT_PCRE8
2998 printf(" 8-bit support\n");
2999 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3000 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3001 #endif
3002 #ifdef SUPPORT_PCRE16
3003 printf(" 16-bit support\n");
3004 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3005 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3006 #endif
3007 #ifdef SUPPORT_PCRE32
3008 printf(" 32-bit support\n");
3009 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3010 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3011 #endif
3012
3013 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3014 printf(" %sUnicode properties support\n", rc? "" : "No ");
3015 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3016 if (rc)
3017 {
3018 const char *arch;
3019 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3020 printf(" Just-in-time compiler support: %s\n", arch);
3021 }
3022 else
3023 printf(" No just-in-time compiler support\n");
3024 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3025 print_newline_config(rc, FALSE);
3026 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3027 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3028 "all Unicode newlines");
3029 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3030 printf(" Internal link size = %d\n", rc);
3031 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3032 printf(" POSIX malloc threshold = %d\n", rc);
3033 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3034 printf(" Default match limit = %ld\n", lrc);
3035 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3036 printf(" Default recursion depth limit = %ld\n", lrc);
3037 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3038 printf(" Match recursion uses %s", rc? "stack" : "heap");
3039 if (showstore)
3040 {
3041 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3042 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3043 }
3044 printf("\n");
3045 goto EXIT;
3046 }
3047 else if (strcmp(arg, "-help") == 0 ||
3048 strcmp(arg, "--help") == 0)
3049 {
3050 usage();
3051 goto EXIT;
3052 }
3053 else
3054 {
3055 BAD_ARG:
3056 printf("** Unknown or malformed option %s\n", arg);
3057 usage();
3058 yield = 1;
3059 goto EXIT;
3060 }
3061 op++;
3062 argc--;
3063 }
3064
3065 /* Get the store for the offsets vector, and remember what it was */
3066
3067 size_offsets_max = size_offsets;
3068 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3069 if (offsets == NULL)
3070 {
3071 printf("** Failed to get %d bytes of memory for offsets vector\n",
3072 (int)(size_offsets_max * sizeof(int)));
3073 yield = 1;
3074 goto EXIT;
3075 }
3076
3077 /* Sort out the input and output files */
3078
3079 if (argc > 1)
3080 {
3081 infile = fopen(argv[op], INPUT_MODE);
3082 if (infile == NULL)
3083 {
3084 printf("** Failed to open %s\n", argv[op]);
3085 yield = 1;
3086 goto EXIT;
3087 }
3088 }
3089
3090 if (argc > 2)
3091 {
3092 outfile = fopen(argv[op+1], OUTPUT_MODE);
3093 if (outfile == NULL)
3094 {
3095 printf("** Failed to open %s\n", argv[op+1]);
3096 yield = 1;
3097 goto EXIT;
3098 }
3099 }
3100
3101 /* Set alternative malloc function */
3102
3103 #ifdef SUPPORT_PCRE8
3104 pcre_malloc = new_malloc;
3105 pcre_free = new_free;
3106 pcre_stack_malloc = stack_malloc;
3107 pcre_stack_free = stack_free;
3108 #endif
3109
3110 #ifdef SUPPORT_PCRE16
3111 pcre16_malloc = new_malloc;
3112 pcre16_free = new_free;
3113 pcre16_stack_malloc = stack_malloc;
3114 pcre16_stack_free = stack_free;
3115 #endif
3116
3117 #ifdef SUPPORT_PCRE32
3118 pcre32_malloc = new_malloc;
3119 pcre32_free = new_free;
3120 pcre32_stack_malloc = stack_malloc;
3121 pcre32_stack_free = stack_free;
3122 #endif
3123
3124 /* Heading line unless quiet, then prompt for first regex if stdin */
3125
3126 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3127
3128 /* Main loop */
3129
3130 while (!done)
3131 {
3132 pcre *re = NULL;
3133 pcre_extra *extra = NULL;
3134
3135 #if !defined NOPOSIX /* There are still compilers that require no indent */
3136 regex_t preg;
3137 int do_posix = 0;
3138 #endif
3139
3140 const char *error;
3141 pcre_uint8 *markptr;
3142 pcre_uint8 *p, *pp, *ppp;
3143 pcre_uint8 *to_file = NULL;
3144 const pcre_uint8 *tables = NULL;
3145 unsigned long int get_options;
3146 unsigned long int true_size, true_study_size = 0;
3147 size_t size, regex_gotten_store;
3148 int do_allcaps = 0;
3149 int do_mark = 0;
3150 int do_study = 0;
3151 int no_force_study = 0;
3152 int do_debug = debug;
3153 int do_G = 0;
3154 int do_g = 0;
3155 int do_showinfo = showinfo;
3156 int do_showrest = 0;
3157 int do_showcaprest = 0;
3158 int do_flip = 0;
3159 int erroroffset, len, delimiter, poffset;
3160
3161 #if !defined NODFA
3162 int dfa_matched = 0;
3163 #endif
3164
3165 use_utf = 0;
3166 debug_lengths = 1;
3167
3168 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3169 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3170 fflush(outfile);
3171
3172 p = buffer;
3173 while (isspace(*p)) p++;
3174 if (*p == 0) continue;
3175
3176 /* See if the pattern is to be loaded pre-compiled from a file. */
3177
3178 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3179 {
3180 pcre_uint32 magic;
3181 pcre_uint8 sbuf[8];
3182 FILE *f;
3183
3184 p++;
3185 if (*p == '!')
3186 {
3187 do_debug = TRUE;
3188 do_showinfo = TRUE;
3189 p++;
3190 }
3191
3192 pp = p + (int)strlen((char *)p);
3193 while (isspace(pp[-1])) pp--;
3194 *pp = 0;
3195
3196 f = fopen((char *)p, "rb");
3197 if (f == NULL)
3198 {
3199 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3200 continue;
3201 }
3202
3203 first_gotten_store = 0;
3204 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3205
3206 true_size =
3207 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3208 true_study_size =
3209 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3210
3211 re = (pcre *)new_malloc(true_size);
3212 if (re == NULL)
3213 {
3214 printf("** Failed to get %d bytes of memory for pcre object\n",
3215 (int)true_size);
3216 yield = 1;
3217 goto EXIT;
3218 }
3219 regex_gotten_store = first_gotten_store;
3220
3221 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3222
3223 magic = REAL_PCRE_MAGIC(re);
3224 if (magic != MAGIC_NUMBER)
3225 {
3226 if (swap_uint32(magic) == MAGIC_NUMBER)
3227 {
3228 do_flip = 1;
3229 }
3230 else
3231 {
3232 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3233 new_free(re);
3234 fclose(f);
3235 continue;
3236 }
3237 }
3238
3239 /* We hide the byte-invert info for little and big endian tests. */
3240 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3241 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3242
3243 /* Now see if there is any following study data. */
3244
3245 if (true_study_size != 0)
3246 {
3247 pcre_study_data *psd;
3248
3249 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3250 extra->flags = PCRE_EXTRA_STUDY_DATA;
3251
3252 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3253 extra->study_data = psd;
3254
3255 if (fread(psd, 1, true_study_size, f) != true_study_size)
3256 {
3257 FAIL_READ:
3258 fprintf(outfile, "Failed to read data from %s\n", p);
3259 if (extra != NULL)
3260 {
3261 PCRE_FREE_STUDY(extra);
3262 }
3263 new_free(re);
3264 fclose(f);
3265 continue;
3266 }
3267 fprintf(outfile, "Study data loaded from %s\n", p);
3268 do_study = 1; /* To get the data output if requested */
3269 }
3270 else fprintf(outfile, "No study data\n");
3271
3272 /* Flip the necessary bytes. */
3273 if (do_flip)
3274 {
3275 int rc;
3276 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3277 if (rc == PCRE_ERROR_BADMODE)
3278 {
3279 /* Simulate the result of the function call below. */
3280 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3281 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3282 PCRE_INFO_OPTIONS);
3283 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3284 "%d-bit mode\n", 8 * CHAR_SIZE,
3285 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3286 new_free(re);
3287 fclose(f);
3288 continue;
3289 }
3290 }
3291
3292 /* Need to know if UTF-8 for printing data strings. */
3293
3294 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3295 {
3296 new_free(re);
3297 fclose(f);
3298 continue;
3299 }
3300 use_utf = (get_options & PCRE_UTF8) != 0;
3301
3302 fclose(f);
3303 goto SHOW_INFO;
3304 }
3305
3306 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3307 the pattern; if it isn't complete, read more. */
3308
3309 delimiter = *p++;
3310
3311 if (isalnum(delimiter) || delimiter == '\\')
3312 {
3313 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3314 goto SKIP_DATA;
3315 }
3316
3317 pp = p;
3318 poffset = (int)(p - buffer);
3319
3320 for(;;)
3321 {
3322 while (*pp != 0)
3323 {
3324 if (*pp == '\\' && pp[1] != 0) pp++;
3325 else if (*pp == delimiter) break;
3326 pp++;
3327 }
3328 if (*pp != 0) break;
3329 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3330 {
3331 fprintf(outfile, "** Unexpected EOF\n");
3332 done = 1;
3333 goto CONTINUE;
3334 }
3335 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3336 }
3337
3338 /* The buffer may have moved while being extended; reset the start of data
3339 pointer to the correct relative point in the buffer. */
3340
3341 p = buffer + poffset;
3342
3343 /* If the first character after the delimiter is backslash, make
3344 the pattern end with backslash. This is purely to provide a way
3345 of testing for the error message when a pattern ends with backslash. */
3346
3347 if (pp[1] == '\\') *pp++ = '\\';
3348
3349 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3350 for callouts. */
3351
3352 *pp++ = 0;
3353 strcpy((char *)pbuffer, (char *)p);
3354
3355 /* Look for options after final delimiter */
3356
3357 options = 0;
3358 study_options = force_study_options;
3359 log_store = showstore; /* default from command line */
3360
3361 while (*pp != 0)
3362 {
3363 switch (*pp++)
3364 {
3365 case 'f': options |= PCRE_FIRSTLINE; break;
3366 case 'g': do_g = 1; break;
3367 case 'i': options |= PCRE_CASELESS; break;
3368 case 'm': options |= PCRE_MULTILINE; break;
3369 case 's': options |= PCRE_DOTALL; break;
3370 case 'x': options |= PCRE_EXTENDED; break;
3371
3372 case '+':
3373 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3374 break;
3375
3376 case '=': do_allcaps = 1; break;
3377 case 'A': options |= PCRE_ANCHORED; break;
3378 case 'B': do_debug = 1; break;
3379 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3380 case 'D': do_debug = do_showinfo = 1; break;
3381 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3382 case 'F': do_flip = 1; break;
3383 case 'G': do_G = 1; break;
3384 case 'I': do_showinfo = 1; break;
3385 case 'J': options |= PCRE_DUPNAMES; break;
3386 case 'K': do_mark = 1; break;
3387 case 'M': log_store = 1; break;
3388 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3389
3390 #if !defined NOPOSIX
3391 case 'P': do_posix = 1; break;
3392 #endif
3393
3394 case 'S':
3395 do_study = 1;
3396 for (;;)
3397 {
3398 switch (*pp++)
3399 {
3400 case 'S':
3401 do_study = 0;
3402 no_force_study = 1;
3403 break;
3404
3405 case '!':
3406 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3407 break;
3408
3409 case '+':
3410 if (*pp == '+')
3411 {
3412 verify_jit = TRUE;
3413 pp++;
3414 }
3415 if (*pp >= '1' && *pp <= '7')
3416 study_options |= jit_study_bits[*pp++ - '1'];
3417 else
3418 study_options |= jit_study_bits[6];
3419 break;
3420
3421 case '-':
3422 study_options &= ~PCRE_STUDY_ALLJIT;
3423 break;
3424
3425 default:
3426 pp--;
3427 goto ENDLOOP;
3428 }
3429 }
3430 ENDLOOP:
3431 break;
3432
3433 case 'U': options |= PCRE_UNGREEDY; break;
3434 case 'W': options |= PCRE_UCP; break;
3435 case 'X': options |= PCRE_EXTRA; break;
3436 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3437 case 'Z': debug_lengths = 0; break;
3438 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3439 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3440
3441 case 'T':
3442 switch (*pp++)
3443 {
3444 case '0': tables = tables0; break;
3445 case '1': tables = tables1; break;
3446
3447 case '\r':
3448 case '\n':
3449 case ' ':
3450 case 0:
3451 fprintf(outfile, "** Missing table number after /T\n");
3452 goto SKIP_DATA;
3453
3454 default:
3455 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3456 goto SKIP_DATA;
3457 }
3458 break;
3459
3460 case 'L':
3461 ppp = pp;
3462 /* The '\r' test here is so that it works on Windows. */
3463 /* The '0' test is just in case this is an unterminated line. */
3464 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3465 *ppp = 0;
3466 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3467 {
3468 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3469 goto SKIP_DATA;
3470 }
3471 locale_set = 1;
3472 tables = PCRE_MAKETABLES;
3473 pp = ppp;
3474 break;
3475
3476 case '>':
3477 to_file = pp;
3478 while (*pp != 0) pp++;
3479 while (isspace(pp[-1])) pp--;
3480 *pp = 0;
3481 break;
3482
3483 case '<':
3484 {
3485 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3486 {
3487 options |= PCRE_JAVASCRIPT_COMPAT;
3488 pp += 3;
3489 }
3490 else
3491 {
3492 int x = check_newline(pp, outfile);
3493 if (x == 0) goto SKIP_DATA;
3494 options |= x;
3495 while (*pp++ != '>');
3496 }
3497 }
3498 break;
3499
3500 case '\r': /* So that it works in Windows */
3501 case '\n':
3502 case ' ':
3503 break;
3504
3505 default:
3506 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3507 goto SKIP_DATA;
3508 }
3509 }
3510
3511 /* Handle compiling via the POSIX interface, which doesn't support the
3512 timing, showing, or debugging options, nor the ability to pass over
3513 local character tables. Neither does it have 16-bit support. */
3514
3515 #if !defined NOPOSIX
3516 if (posix || do_posix)
3517 {
3518 int rc;
3519 int cflags = 0;
3520
3521 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3522 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3523 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3524 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3525 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3526 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3527 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3528
3529 first_gotten_store = 0;
3530 rc = regcomp(&preg, (char *)p, cflags);
3531
3532 /* Compilation failed; go back for another re, skipping to blank line
3533 if non-interactive. */
3534
3535 if (rc != 0)
3536 {
3537 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3538 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3539 goto SKIP_DATA;
3540 }
3541 }
3542
3543 /* Handle compiling via the native interface */
3544
3545 else
3546 #endif /* !defined NOPOSIX */
3547
3548 {
3549 /* In 16- or 32-bit mode, convert the input. */
3550
3551 #ifdef SUPPORT_PCRE16
3552 if (pcre_mode == PCRE16_MODE)
3553 {
3554 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3555 {
3556 case -1:
3557 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3558 "converted to UTF-16\n");
3559 goto SKIP_DATA;
3560
3561 case -2:
3562 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3563 "cannot be converted to UTF-16\n");
3564 goto SKIP_DATA;
3565
3566 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3567 fprintf(outfile, "**Failed: character value greater than 0xffff "
3568 "cannot be converted to 16-bit in non-UTF mode\n");
3569 goto SKIP_DATA;
3570
3571 default:
3572 break;
3573 }
3574 p = (pcre_uint8 *)buffer16;
3575 }
3576 #endif
3577
3578 #ifdef SUPPORT_PCRE32
3579 if (pcre_mode == PCRE32_MODE)
3580 {
3581 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3582 {
3583 case -1:
3584 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3585 "converted to UTF-32\n");
3586 goto SKIP_DATA;
3587
3588 case -2:
3589 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3590 "cannot be converted to UTF-32\n");
3591 goto SKIP_DATA;
3592
3593 case -3:
3594 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3595 goto SKIP_DATA;
3596
3597 default:
3598 break;
3599 }
3600 p = (pcre_uint8 *)buffer32;
3601 }
3602 #endif
3603
3604 /* Compile many times when timing */
3605
3606 if (timeit > 0)
3607 {
3608 register int i;
3609 clock_t time_taken;
3610 clock_t start_time = clock();
3611 for (i = 0; i < timeit; i++)
3612 {
3613 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3614 if (re != NULL) free(re);
3615 }
3616 time_taken = clock() - start_time;
3617 fprintf(outfile, "Compile time %.4f milliseconds\n",
3618 (((double)time_taken * 1000.0) / (double)timeit) /
3619 (double)CLOCKS_PER_SEC);
3620 }
3621
3622 first_gotten_store = 0;
3623 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3624
3625 /* Compilation failed; go back for another re, skipping to blank line
3626 if non-interactive. */
3627
3628 if (re == NULL)
3629 {
3630 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3631 SKIP_DATA:
3632 if (infile != stdin)
3633 {
3634 for (;;)
3635 {
3636 if (extend_inputline(infile, buffer, NULL) == NULL)
3637 {
3638 done = 1;
3639 goto CONTINUE;
3640 }
3641 len = (int)strlen((char *)buffer);
3642 while (len > 0 && isspace(buffer[len-1])) len--;
3643 if (len == 0) break;
3644 }
3645 fprintf(outfile, "\n");
3646 }
3647 goto CONTINUE;
3648 }
3649
3650 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3651 within the regex; check for this so that we know how to process the data
3652 lines. */
3653
3654 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3655 goto SKIP_DATA;
3656 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3657
3658 /* Extract the size for possible writing before possibly flipping it,
3659 and remember the store that was got. */
3660
3661 true_size = REAL_PCRE_SIZE(re);
3662 regex_gotten_store = first_gotten_store;
3663
3664 /* Output code size information if requested */
3665
3666 if (log_store)
3667 {
3668 int name_count, name_entry_size, real_pcre_size;
3669
3670 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3671 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3672 #ifdef SUPPORT_PCRE8
3673 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3674 real_pcre_size = sizeof(real_pcre);
3675 #endif
3676 #ifdef SUPPORT_PCRE16
3677 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3678 real_pcre_size = sizeof(real_pcre16);
3679 #endif
3680 #ifdef SUPPORT_PCRE32
3681 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3682 real_pcre_size = sizeof(real_pcre32);
3683 #endif
3684 fprintf(outfile, "Memory allocation (code space): %d\n",
3685 (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3686 }
3687
3688 /* If -s or /S was present, study the regex to generate additional info to
3689 help with the matching, unless the pattern has the SS option, which
3690 suppresses the effect of /S (used for a few test patterns where studying is
3691 never sensible). */
3692
3693 if (do_study || (force_study >= 0 && !no_force_study))
3694 {
3695 if (timeit > 0)
3696 {
3697 register int i;
3698 clock_t time_taken;
3699 clock_t start_time = clock();
3700 for (i = 0; i < timeit; i++)
3701 {
3702 PCRE_STUDY(extra, re, study_options, &error);
3703 }
3704 time_taken = clock() - start_time;
3705 if (extra != NULL)
3706 {
3707 PCRE_FREE_STUDY(extra);
3708 }
3709 fprintf(outfile, " Study time %.4f milliseconds\n",
3710 (((double)time_taken * 1000.0) / (double)timeit) /
3711 (double)CLOCKS_PER_SEC);
3712 }
3713 PCRE_STUDY(extra, re, study_options, &error);
3714 if (error != NULL)
3715 fprintf(outfile, "Failed to study: %s\n", error);
3716 else if (extra != NULL)
3717 {
3718 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3719 if (log_store)
3720 {
3721 size_t jitsize;
3722 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3723 jitsize != 0)
3724 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3725 }
3726 }
3727 }
3728
3729 /* If /K was present, we set up for handling MARK data. */
3730
3731 if (do_mark)
3732 {
3733 if (extra == NULL)
3734 {
3735 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3736 extra->flags = 0;
3737 }
3738 extra->mark = &markptr;
3739 extra->flags |= PCRE_EXTRA_MARK;
3740 }
3741
3742 /* Extract and display information from the compiled data if required. */
3743
3744 SHOW_INFO:
3745
3746 if (do_debug)
3747 {
3748 fprintf(outfile, "------------------------------------------------------------------\n");
3749 PCRE_PRINTINT(re, outfile, debug_lengths);
3750 }
3751
3752 /* We already have the options in get_options (see above) */
3753
3754 if (do_showinfo)
3755 {
3756 unsigned long int all_options;
3757 pcre_uint32 first_char, need_char;
3758 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
3759 hascrorlf, maxlookbehind;
3760 int nameentrysize, namecount;
3761 const pcre_uint8 *nametable;
3762
3763 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3764 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3765 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3766 new_info(re, NULL, PCRE_INFO_FIRSTLITERAL, &first_char) +
3767 new_info(re, NULL, PCRE_INFO_FIRSTLITERALSET, &first_char_set) +
3768 new_info(re, NULL, PCRE_INFO_LASTLITERAL2, &need_char) +
3769 new_info(re, NULL, PCRE_INFO_LASTLITERAL2SET, &need_char_set) +
3770 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3771 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3772 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3773 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3774 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3775 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3776 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3777 != 0)
3778 goto SKIP_DATA;
3779
3780 if (size != regex_gotten_store) fprintf(outfile,
3781 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3782 (int)size, (int)regex_gotten_store);
3783
3784 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3785 if (backrefmax > 0)
3786 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3787
3788 if (namecount > 0)
3789 {
3790 fprintf(outfile, "Named capturing subpatterns:\n");
3791 while (namecount-- > 0)
3792 {
3793 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
3794 int length = (int)STRLEN(nametable + imm2_size);
3795 fprintf(outfile, " ");
3796 PCHARSV(nametable, imm2_size, length, outfile);
3797 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3798 #ifdef SUPPORT_PCRE32
3799 if (pcre_mode == PCRE32_MODE)
3800 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
3801 #endif
3802 #ifdef SUPPORT_PCRE16
3803 if (pcre_mode == PCRE16_MODE)
3804 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
3805 #endif
3806 #ifdef SUPPORT_PCRE8
3807 if (pcre_mode == PCRE8_MODE)
3808 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
3809 #endif
3810 nametable += nameentrysize * CHAR_SIZE;
3811 }
3812 }
3813
3814 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3815 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3816
3817 all_options = REAL_PCRE_OPTIONS(re);
3818 if (do_flip) all_options = swap_uint32(all_options);
3819
3820 if (get_options == 0) fprintf(outfile, "No options\n");
3821 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3822 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3823 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3824 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3825 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3826 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3827 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3828 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3829 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3830 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3831 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3832 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3833 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3834 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3835 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3836 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3837 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3838 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3839
3840 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3841
3842 switch (get_options & PCRE_NEWLINE_BITS)
3843 {
3844 case PCRE_NEWLINE_CR:
3845 fprintf(outfile, "Forced newline sequence: CR\n");
3846 break;
3847
3848 case PCRE_NEWLINE_LF:
3849 fprintf(outfile, "Forced newline sequence: LF\n");
3850 break;
3851
3852 case PCRE_NEWLINE_CRLF:
3853 fprintf(outfile, "Forced newline sequence: CRLF\n");
3854 break;
3855
3856 case PCRE_NEWLINE_ANYCRLF:
3857 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3858 break;
3859
3860 case PCRE_NEWLINE_ANY:
3861 fprintf(outfile, "Forced newline sequence: ANY\n");
3862 break;
3863
3864 default:
3865 break;
3866 }
3867
3868 if (first_char_set == 2)
3869 {
3870 fprintf(outfile, "First char at start or follows newline\n");
3871 }
3872 else if (first_char_set == 1)
3873 {
3874 const char *caseless =
3875 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
3876 "" : " (caseless)";
3877
3878 if (PRINTOK(first_char))
3879 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3880 else
3881 {
3882 fprintf(outfile, "First char = ");
3883 pchar(first_char, outfile);
3884 fprintf(outfile, "%s\n", caseless);
3885 }
3886 }
3887 else
3888 {
3889 fprintf(outfile, "No first char\n");
3890 }
3891
3892 if (need_char_set == 0)
3893 {
3894 fprintf(outfile, "No need char\n");
3895 }
3896 else
3897 {
3898 const char *caseless =
3899 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
3900 "" : " (caseless)";
3901
3902 if (PRINTOK(need_char))
3903 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3904 else
3905 {
3906 fprintf(outfile, "Need char = ");
3907 pchar(need_char, outfile);
3908 fprintf(outfile, "%s\n", caseless);
3909 }
3910 }
3911
3912 if (maxlookbehind > 0)
3913 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3914
3915 /* Don't output study size; at present it is in any case a fixed
3916 value, but it varies, depending on the computer architecture, and
3917 so messes up the test suite. (And with the /F option, it might be
3918 flipped.) If study was forced by an external -s, don't show this
3919 information unless -i or -d was also present. This means that, except
3920 when auto-callouts are involved, the output from runs with and without
3921 -s should be identical. */
3922
3923 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3924 {
3925 if (extra == NULL)
3926 fprintf(outfile, "Study returned NULL\n");
3927 else
3928 {
3929 pcre_uint8 *start_bits = NULL;
3930 int minlength;
3931
3932 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3933 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3934
3935 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3936 {
3937 if (start_bits == NULL)
3938 fprintf(outfile, "No set of starting bytes\n");
3939 else
3940 {
3941 int i;
3942 int c = 24;
3943 fprintf(outfile, "Starting byte set: ");
3944 for (i = 0; i < 256; i++)
3945 {
3946 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3947 {
3948 if (c > 75)
3949 {
3950 fprintf(outfile, "\n ");
3951 c = 2;
3952 }
3953 if (PRINTOK(i) && i != ' ')
3954 {
3955 fprintf(outfile, "%c ", i);
3956 c += 2;
3957 }
3958 else
3959 {
3960 fprintf(outfile, "\\x%02x ", i);
3961 c += 5;
3962 }
3963 }
3964 }
3965 fprintf(outfile, "\n");
3966 }
3967 }
3968 }
3969
3970 /* Show this only if the JIT was set by /S, not by -s. */
3971
3972 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3973 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3974 {
3975 int jit;
3976 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3977 {
3978 if (jit)
3979 fprintf(outfile, "JIT study was successful\n");
3980 else
3981 #ifdef SUPPORT_JIT
3982 fprintf(outfile, "JIT study was not successful\n");
3983 #else
3984 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3985 #endif
3986 }
3987 }
3988 }
3989 }
3990
3991 /* If the '>' option was present, we write out the regex to a file, and
3992 that is all. The first 8 bytes of the file are the regex length and then
3993 the study length, in big-endian order. */
3994
3995 if (to_file != NULL)
3996 {
3997 FILE *f = fopen((char *)to_file, "wb");
3998 if (f == NULL)
3999 {
4000 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4001 }
4002 else
4003 {
4004 pcre_uint8 sbuf[8];
4005
4006 if (do_flip) regexflip(re, extra);
4007 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4008 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4009 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4010 sbuf[3] = (pcre_uint8)((true_size) & 255);
4011 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4012 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4013 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4014 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4015
4016 if (fwrite(sbuf, 1, 8, f) < 8 ||
4017 fwrite(re, 1, true_size, f) < true_size)
4018 {
4019 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4020 }
4021 else
4022 {
4023 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4024
4025 /* If there is study data, write it. */
4026
4027 if (extra != NULL)
4028 {
4029 if (fwrite(extra->study_data, 1, true_study_size, f) <
4030 true_study_size)
4031 {
4032 fprintf(outfile, "Write error on %s: %s\n", to_file,
4033 strerror(errno));
4034 }
4035 else fprintf(outfile, "Study data written to %s\n", to_file);
4036 }
4037 }
4038 fclose(f);
4039 }
4040
4041 new_free(re);
4042 if (extra != NULL)
4043 {
4044 PCRE_FREE_STUDY(extra);
4045 }
4046 if (locale_set)
4047 {
4048 new_free((void *)tables);
4049 setlocale(LC_CTYPE, "C");
4050 locale_set = 0;
4051 }
4052 continue; /* With next regex */
4053 }
4054 } /* End of non-POSIX compile */
4055
4056 /* Read data lines and test them */
4057
4058 for (;;)
4059 {
4060 #ifdef SUPPORT_PCRE8
4061 pcre_uint8 *q8;
4062 #endif
4063 #ifdef SUPPORT_PCRE16
4064 pcre_uint16 *q16;
4065 #endif
4066 #ifdef SUPPORT_PCRE32
4067 pcre_uint32 *q32;
4068 #endif
4069 pcre_uint8 *bptr;
4070 int *use_offsets = offsets;
4071 int use_size_offsets = size_offsets;
4072 int callout_data = 0;
4073 int callout_data_set = 0;
4074 int count;
4075 pcre_uint32 c;
4076 int copystrings = 0;
4077 int find_match_limit = default_find_match_limit;
4078 int getstrings = 0;
4079 int getlist = 0;
4080 int gmatched = 0;
4081 int start_offset = 0;
4082 int start_offset_sign = 1;
4083 int g_notempty = 0;
4084 int use_dfa = 0;
4085
4086 *copynames = 0;
4087 *getnames = 0;
4088
4089 #ifdef SUPPORT_PCRE32
4090 cn32ptr = copynames;
4091 gn32ptr = getnames;
4092 #endif
4093 #ifdef SUPPORT_PCRE16
4094 cn16ptr = copynames16;
4095 gn16ptr = getnames16;
4096 #endif
4097 #ifdef SUPPORT_PCRE8
4098 cn8ptr = copynames8;
4099 gn8ptr = getnames8;
4100 #endif
4101
4102 SET_PCRE_CALLOUT(callout);
4103 first_callout = 1;
4104 last_callout_mark = NULL;
4105 callout_extra = 0;
4106 callout_count = 0;
4107 callout_fail_count = 999999;
4108 callout_fail_id = -1;
4109 show_malloc = 0;
4110 options = 0;
4111
4112 if (extra != NULL) extra->flags &=
4113 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4114
4115 len = 0;
4116 for (;;)
4117 {
4118 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4119 {
4120 if (len > 0) /* Reached EOF without hitting a newline */
4121 {
4122 fprintf(outfile, "\n");
4123 break;
4124 }
4125 done = 1;
4126 goto CONTINUE;
4127 }
4128 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4129 len = (int)strlen((char *)buffer);
4130 if (buffer[len-1] == '\n') break;
4131 }
4132
4133 while (len > 0 && isspace(buffer[len-1])) len--;
4134 buffer[len] = 0;
4135 if (len == 0) break;
4136
4137 p = buffer;
4138 while (isspace(*p)) p++;
4139
4140 #ifndef NOUTF
4141 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4142 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4143 if (use_utf)
4144 {
4145 char *q;
4146 pcre_uint32 c;
4147 int n = 1;
4148
4149 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &c);
4150 if (n <= 0)
4151 {
4152 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4153 goto NEXT_DATA;
4154 }
4155 }
4156 #endif
4157
4158 /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4159 the number of pcre_uchar units that will be needed. */
4160 if (dbuffer == NULL || len >= dbuffer_size)
4161 {
4162 dbuffer_size *= 2;
4163 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4164 if (dbuffer == NULL)
4165 {
4166 fprintf(stderr, "pcretest: malloc(%d) failed\n", dbuffer_size);
4167 exit(1);
4168 }
4169 }
4170
4171 #ifdef SUPPORT_PCRE8
4172 q8 = (pcre_uint8 *) dbuffer;
4173 #endif
4174 #ifdef SUPPORT_PCRE16
4175 q16 = (pcre_uint16 *) dbuffer;
4176 #endif
4177 #ifdef SUPPORT_PCRE32
4178 q32 = (pcre_uint32 *) dbuffer;
4179 #endif
4180
4181 while ((c = *p++) != 0)
4182 {
4183 int i = 0;
4184 int n = 0;
4185
4186 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4187 In non-UTF mode, allow the value of the byte to fall through to later,
4188 where values greater than 127 are turned into UTF-8 when running in
4189 16-bit or 32-bit mode. */
4190
4191 if (c != '\\')
4192 {
4193 #ifndef NOUTF
4194 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4195 #endif
4196 }
4197
4198 /* Handle backslash escapes */
4199
4200 else switch ((c = *p++))
4201 {
4202 case 'a': c = 7; break;
4203 case 'b': c = '\b'; break;
4204 case 'e': c = 27; break;
4205 case 'f': c = '\f'; break;
4206 case 'n': c = '\n'; break;
4207 case 'r': c = '\r'; break;
4208 case 't': c = '\t'; break;
4209 case 'v': c = '\v'; break;
4210
4211 case '0': case '1': case '2': case '3':
4212 case '4': case '5': case '6': case '7':
4213 c -= '0';
4214 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4215 c = c * 8 + *p++ - '0';
4216 break;
4217
4218 case 'x':
4219 if (*p == '{')
4220 {
4221 pcre_uint8 *pt = p;
4222 c = 0;
4223
4224 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4225 when isxdigit() is a macro that refers to its argument more than
4226 once. This is banned by the C Standard, but apparently happens in at
4227 least one MacOS environment. */
4228
4229 for (pt++; isxdigit(*pt); pt++)
4230 {
4231 if (++i == 9)
4232 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4233 "using only the first eight.\n");
4234 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4235 }
4236 if (*pt == '}')
4237 {
4238 p = pt + 1;
4239 break;
4240 }
4241 /* Not correct form for \x{...}; fall through */
4242 }
4243
4244 /* \x without {} always defines just one byte in 8-bit mode. This
4245 allows UTF-8 characters to be constructed byte by byte, and also allows
4246 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4247 Otherwise, pass it down to later code so that it can be turned into
4248 UTF-8 when running in 16/32-bit mode. */
4249
4250 c = 0;
4251 while (i++ < 2 && isxdigit(*p))
4252 {
4253 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4254 p++;
4255 }
4256 #if !defined NOUTF && defined SUPPORT_PCRE8
4257 if (use_utf && (pcre_mode == PCRE8_MODE))
4258 {
4259 *q8++ = c;
4260 continue;
4261 }
4262 #endif
4263 break;
4264
4265 case 0: /* \ followed by EOF allows for an empty line */
4266 p--;
4267 continue;
4268
4269 case '>':
4270 if (*p == '-')
4271 {
4272 start_offset_sign = -1;
4273 p++;
4274 }
4275 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4276 start_offset *= start_offset_sign;
4277 continue;
4278
4279 case 'A': /* Option setting */
4280 options |= PCRE_ANCHORED;
4281 continue;
4282
4283 case 'B':
4284 options |= PCRE_NOTBOL;
4285 continue;
4286
4287 case 'C':
4288 if (isdigit(*p)) /* Set copy string */
4289 {
4290 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4291 copystrings |= 1 << n;
4292 }
4293 else if (isalnum(*p))
4294 {
4295 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4296 }
4297 else if (*p == '+')
4298 {
4299 callout_extra = 1;
4300 p++;
4301 }
4302 else if (*p == '-')
4303 {
4304 SET_PCRE_CALLOUT(NULL);
4305 p++;
4306 }
4307 else if (*p == '!')
4308 {
4309 callout_fail_id = 0;
4310 p++;
4311 while(isdigit(*p))
4312 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4313 callout_fail_count = 0;
4314 if (*p == '!')
4315 {
4316 p++;
4317 while(isdigit(*p))
4318 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4319 }
4320 }
4321 else if (*p == '*')
4322 {
4323 int sign = 1;
4324 callout_data = 0;
4325 if (*(++p) == '-') { sign = -1; p++; }
4326 while(isdigit(*p))
4327 callout_data = callout_data * 10 + *p++ - '0';
4328 callout_data *= sign;
4329 callout_data_set = 1;
4330 }
4331 continue;
4332
4333 #if !defined NODFA
4334 case 'D':
4335 #if !defined NOPOSIX
4336 if (posix || do_posix)
4337 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4338 else
4339 #endif
4340 use_dfa = 1;
4341 continue;
4342 #endif
4343
4344 #if !defined NODFA
4345 case 'F':
4346 options |= PCRE_DFA_SHORTEST;
4347 continue;
4348 #endif
4349
4350 case 'G':
4351 if (isdigit(*p))
4352 {
4353 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4354 getstrings |= 1 << n;
4355 }
4356 else if (isalnum(*p))
4357 {
4358 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4359 }
4360 continue;
4361
4362 case 'J':
4363 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4364 if (extra != NULL
4365 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4366 && extra->executable_jit != NULL)
4367 {
4368 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4369 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4370 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4371 }
4372 continue;
4373
4374 case 'L':
4375 getlist = 1;
4376 continue;
4377
4378 case 'M':
4379 find_match_limit = 1;
4380 continue;
4381
4382 case 'N':
4383 if ((options & PCRE_NOTEMPTY) != 0)
4384 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4385 else
4386 options |= PCRE_NOTEMPTY;
4387 continue;
4388
4389 case 'O':
4390 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4391 if (n > size_offsets_max)
4392 {
4393 size_offsets_max = n;
4394 free(offsets);
4395 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4396 if (offsets == NULL)
4397 {
4398 printf("** Failed to get %d bytes of memory for offsets vector\n",
4399 (int)(size_offsets_max * sizeof(int)));
4400 yield = 1;
4401 goto EXIT;
4402 }
4403 }
4404 use_size_offsets = n;
4405 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4406 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4407 continue;
4408
4409 case 'P':
4410 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4411 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4412 continue;
4413
4414 case 'Q':
4415 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4416 if (extra == NULL)
4417 {
4418 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4419 extra->flags = 0;
4420 }
4421 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4422 extra->match_limit_recursion = n;
4423 continue;
4424
4425 case 'q':
4426 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4427 if (extra == NULL)
4428 {
4429 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4430 extra->flags = 0;
4431 }
4432 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4433 extra->match_limit = n;
4434 continue;
4435
4436 #if !defined NODFA
4437 case 'R':
4438 options |= PCRE_DFA_RESTART;
4439 continue;
4440 #endif
4441
4442 case 'S':
4443 show_malloc = 1;
4444 continue;
4445
4446 case 'Y':
4447 options |= PCRE_NO_START_OPTIMIZE;
4448 continue;
4449
4450 case 'Z':
4451 options |= PCRE_NOTEOL;
4452 continue;
4453
4454 case '?':
4455 options |= PCRE_NO_UTF8_CHECK;
4456 continue;
4457
4458 case '<':
4459 {
4460 int x = check_newline(p, outfile);
4461 if (x == 0) goto NEXT_DATA;
4462 options |= x;
4463 while (*p++ != '>');
4464 }
4465 continue;
4466 }
4467
4468 /* We now have a character value in c that may be greater than 255. In
4469 16-bit or 32-bit mode, we always convert characters to UTF-8 so that
4470 values greater than 255 can be passed to non-UTF 16- or 32-bit strings.
4471 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4472 than 127 in UTF mode must have come from \x{...} or octal constructs
4473 because values from \x.. get this far only in non-UTF mode. */
4474
4475 #ifdef SUPPORT_PCRE8
4476 if (pcre_mode == PCRE8_MODE)
4477 {
4478 #ifndef NOUTF
4479 if (use_utf)
4480 {
4481 q8 += ord2utf8(c, q8);
4482 }
4483 else
4484 #endif
4485 {
4486 if (c > 0xffu)
4487 {
4488 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4489 "and UTF-8 mode is not enabled.\n", c);
4490 fprintf(outfile, "** Truncation will probably give the wrong "
4491 "result.\n");
4492 }
4493
4494 *q8++ = c;
4495 }
4496 }
4497 #endif
4498 #ifdef SUPPORT_PCRE16
4499 if (pcre_mode == PCRE16_MODE)
4500 {
4501 #ifndef NOUTF
4502 if (use_utf)
4503 {
4504 if (c > 0x10ffffu)
4505 {
4506 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4507 "cannot be converted to UTF-16\n");
4508 goto NEXT_DATA;
4509 }
4510 else if (c >= 0x10000u)
4511 {
4512 c-= 0x10000u;
4513 *q16++ = 0xD800 | (c >> 10);
4514 *q16++ = 0xDC00 | (c & 0x3ff);
4515 }
4516 else
4517 *q16++ = c;
4518 }
4519 else
4520 #endif
4521 {
4522 if (c > 0xffffu)
4523 {
4524 fprintf(outfile, "** Character value is greater than 0xffff "
4525 "and UTF-16 mode is not enabled.\n", c);
4526 fprintf(outfile, "** Truncation will probably give the wrong "
4527 "result.\n");
4528 }
4529
4530 *q16++ = c;
4531 }
4532 }
4533 #endif
4534 #ifdef SUPPORT_PCRE32
4535 if (pcre_mode == PCRE32_MODE)
4536 {
4537 *q32++ = c;
4538 }
4539 #endif
4540
4541 }
4542
4543 /* Reached end of subject string */
4544
4545 #ifdef SUPPORT_PCRE8
4546 if (pcre_mode == PCRE8_MODE)
4547 {
4548 *q8 = 0;
4549 len = (int)(q8 - (pcre_uint8 *)dbuffer);
4550 }
4551 #endif
4552 #ifdef SUPPORT_PCRE16
4553 if (pcre_mode == PCRE16_MODE)
4554 {
4555 *q16 = 0;
4556 len = (int)(q16 - (pcre_uint16 *)dbuffer);
4557 }
4558 #endif
4559 #ifdef SUPPORT_PCRE32
4560 if (pcre_mode == PCRE32_MODE)
4561 {
4562 *q32 = 0;
4563 len = (int)(q32 - (pcre_uint32 *)dbuffer);
4564 }
4565 #endif
4566
4567 /* Move the data to the end of the buffer so that a read over the end of
4568 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
4569 we are using the POSIX interface, we must include the terminating zero. */
4570
4571 bptr = dbuffer;
4572
4573 #if !defined NOPOSIX
4574 if (posix || do_posix)
4575 {
4576 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4577 bptr += dbuffer_size - len - 1;
4578 }
4579 else
4580 #endif
4581 {
4582 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
4583 }
4584
4585 if ((all_use_dfa || use_dfa) && find_match_limit)
4586 {
4587 printf("**Match limit not relevant for DFA matching: ignored\n");
4588 find_match_limit = 0;
4589 }
4590
4591 /* Handle matching via the POSIX interface, which does not
4592 support timing or playing with the match limit or callout data. */
4593
4594 #if !defined NOPOSIX
4595 if (posix || do_posix)
4596 {
4597 int rc;
4598 int eflags = 0;
4599 regmatch_t *pmatch = NULL;
4600 if (use_size_offsets > 0)
4601 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4602 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4603 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4604 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
4605
4606 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
4607
4608 if (rc != 0)
4609 {
4610 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4611 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4612 }
4613 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
4614 {
4615 fprintf(outfile, "Matched with REG_NOSUB\n");
4616 }
4617 else
4618 {
4619 size_t i;
4620 for (i = 0; i < (size_t)use_size_offsets; i++)
4621 {
4622 if (pmatch[i].rm_so >= 0)
4623 {
4624 fprintf(outfile, "%2d: ", (int)i);
4625 PCHARSV(dbuffer, pmatch[i].rm_so,
4626 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
4627 fprintf(outfile, "\n");
4628 if (do_showcaprest || (i == 0 && do_showrest))
4629 {
4630 fprintf(outfile, "%2d+ ", (int)i);
4631 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
4632 outfile);
4633 fprintf(outfile, "\n");
4634 }
4635 }
4636 }
4637 }
4638 free(pmatch);
4639 goto NEXT_DATA;
4640 }
4641
4642 #endif /* !defined NOPOSIX */
4643
4644 /* Handle matching via the native interface - repeats for /g and /G */
4645
4646 /* Ensure that there is a JIT callback if we want to verify that JIT was
4647 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4648
4649 if (verify_jit && jit_stack == NULL && extra != NULL)
4650 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4651
4652 for (;; gmatched++) /* Loop for /g or /G */
4653 {
4654 markptr = NULL;
4655 jit_was_used = FALSE;
4656
4657 if (timeitm > 0)
4658 {
4659 register int i;
4660 clock_t time_taken;
4661 clock_t start_time = clock();
4662
4663 #if !defined NODFA
4664 if (all_use_dfa || use_dfa)
4665 {
4666 if ((options & PCRE_DFA_RESTART) != 0)
4667 {
4668 fprintf(outfile, "Timing DFA restarts is not supported\n");
4669 break;
4670 }
4671 if (dfa_workspace == NULL)
4672 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4673 for (i = 0; i < timeitm; i++)
4674 {
4675 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4676 (options | g_notempty), use_offsets, use_size_offsets,
4677 dfa_workspace, DFA_WS_DIMENSION);
4678 }
4679 }
4680 else
4681 #endif
4682
4683 for (i = 0; i < timeitm; i++)
4684 {
4685 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4686 (options | g_notempty), use_offsets, use_size_offsets);
4687 }
4688 time_taken = clock() - start_time;
4689 fprintf(outfile, "Execute time %.4f milliseconds\n",
4690 (((double)time_taken * 1000.0) / (double)timeitm) /
4691 (double)CLOCKS_PER_SEC);
4692 }
4693
4694 /* If find_match_limit is set, we want to do repeated matches with
4695 varying limits in order to find the minimum value for the match limit and
4696 for the recursion limit. The match limits are relevant only to the normal
4697 running of pcre_exec(), so disable the JIT optimization. This makes it
4698 possible to run the same set of tests with and without JIT externally
4699 requested. */
4700
4701 if (find_match_limit)
4702 {
4703 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4704 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4705 extra->flags = 0;
4706
4707 (void)check_match_limit(re, extra, bptr, len, start_offset,
4708 options|g_notempty, use_offsets, use_size_offsets,
4709 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4710 PCRE_ERROR_MATCHLIMIT, "match()");
4711
4712 count = check_match_limit(re, extra, bptr, len, start_offset,
4713 options|g_notempty, use_offsets, use_size_offsets,
4714 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4715 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4716 }
4717
4718 /* If callout_data is set, use the interface with additional data */
4719
4720 else if (callout_data_set)
4721 {
4722 if (extra == NULL)
4723 {
4724 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4725 extra->flags = 0;
4726 }
4727 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4728 extra->callout_data = &callout_data;
4729 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4730 options | g_notempty, use_offsets, use_size_offsets);
4731 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4732 }
4733
4734 /* The normal case is just to do the match once, with the default
4735 value of match_limit. */
4736
4737 #if !defined NODFA
4738 else if (all_use_dfa || use_dfa)
4739 {
4740 if (dfa_workspace == NULL)
4741 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4742 if (dfa_matched++ == 0)
4743 dfa_workspace[0] = -1; /* To catch bad restart */
4744 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4745 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4746 DFA_WS_DIMENSION);
4747 if (count == 0)
4748 {
4749 fprintf(outfile, "Matched, but too many subsidiary matches\n");
4750 count = use_size_offsets/2;
4751 }
4752 }
4753 #endif
4754
4755 else
4756 {
4757 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4758 options | g_notempty, use_offsets, use_size_offsets);
4759 if (count == 0)
4760 {
4761 fprintf(outfile, "Matched, but too many substrings\n");
4762 count = use_size_offsets/3;
4763 }
4764 }
4765
4766 /* Matched */
4767
4768 if (count >= 0)
4769 {
4770 int i, maxcount;
4771 void *cnptr, *gnptr;
4772
4773 #if !defined NODFA
4774 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4775 #endif
4776 maxcount = use_size_offsets/3;
4777
4778 /* This is a check against a lunatic return value. */
4779
4780 if (count > maxcount)
4781 {
4782 fprintf(outfile,
4783 "** PCRE error: returned count %d is too big for offset size %d\n",
4784 count, use_size_offsets);
4785 count = use_size_offsets/3;
4786 if (do_g || do_G)
4787 {
4788 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4789 do_g = do_G = FALSE; /* Break g/G loop */
4790 }
4791 }
4792
4793 /* do_allcaps requests showing of all captures in the pattern, to check
4794 unset ones at the end. */
4795
4796 if (do_allcaps)
4797 {
4798 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4799 goto SKIP_DATA;
4800 count++; /* Allow for full match */
4801 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4802 }
4803
4804 /* Output the captured substrings */
4805
4806 for (i = 0; i < count * 2; i += 2)
4807 {
4808 if (use_offsets[i] < 0)
4809 {
4810 if (use_offsets[i] != -1)
4811 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4812 use_offsets[i], i);
4813 if (use_offsets[i+1] != -1)
4814 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4815 use_offsets[i+1], i+1);
4816 fprintf(outfile, "%2d: <unset>\n", i/2);
4817 }
4818 else
4819 {
4820 fprintf(outfile, "%2d: ", i/2);
4821 PCHARSV(bptr, use_offsets[i],
4822 use_offsets[i+1] - use_offsets[i], outfile);
4823 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4824 fprintf(outfile, "\n");
4825 if (do_showcaprest || (i == 0 && do_showrest))
4826 {
4827 fprintf(outfile, "%2d+ ", i/2);
4828 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4829 outfile);
4830 fprintf(outfile, "\n");
4831 }
4832 }
4833 }
4834
4835 if (markptr != NULL)
4836 {
4837 fprintf(outfile, "MK: ");
4838 PCHARSV(markptr, 0, -1, outfile);
4839 fprintf(outfile, "\n");
4840 }
4841
4842 for (i = 0; i < 32; i++)
4843 {
4844 if ((copystrings & (1 << i)) != 0)
4845 {
4846 int rc;
4847 char copybuffer[256];
4848 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4849 copybuffer, sizeof(copybuffer));
4850 if (rc < 0)
4851 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4852 else
4853 {
4854 fprintf(outfile, "%2dC ", i);
4855 PCHARSV(copybuffer, 0, rc, outfile);
4856 fprintf(outfile, " (%d)\n", rc);
4857 }
4858 }
4859 }
4860
4861 cnptr = copynames;
4862 for (;;)
4863 {
4864 int rc;
4865 char copybuffer[256];
4866
4867 if (pcre_mode == PCRE16_MODE)
4868 {
4869 if (*(pcre_uint16 *)cnptr == 0) break;
4870 }
4871 else
4872 {
4873 if (*(pcre_uint8 *)cnptr == 0) break;
4874 }
4875
4876 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4877 cnptr, copybuffer, sizeof(copybuffer));
4878
4879 if (rc < 0)
4880 {
4881 fprintf(outfile, "copy substring ");
4882 PCHARSV(cnptr, 0, -1, outfile);
4883 fprintf(outfile, " failed %d\n", rc);
4884 }
4885 else
4886 {
4887 fprintf(outfile, " C ");
4888 PCHARSV(copybuffer, 0, rc, outfile);
4889 fprintf(outfile, " (%d) ", rc);
4890 PCHARSV(cnptr, 0, -1, outfile);
4891 putc('\n', outfile);
4892 }
4893
4894 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4895 }
4896
4897 for (i = 0; i < 32; i++)
4898 {
4899 if ((getstrings & (1 << i)) != 0)
4900 {
4901 int rc;
4902 const char *substring;
4903 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4904 if (rc < 0)
4905 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4906 else
4907 {
4908 fprintf(outfile, "%2dG ", i);
4909 PCHARSV(substring, 0, rc, outfile);
4910 fprintf(outfile, " (%d)\n", rc);
4911 PCRE_FREE_SUBSTRING(substring);
4912 }
4913 }
4914 }
4915
4916 gnptr = getnames;
4917 for (;;)
4918 {
4919 int rc;
4920 const char *substring;
4921
4922 if (pcre_mode == PCRE16_MODE)
4923 {
4924 if (*(pcre_uint16 *)gnptr == 0) break;
4925 }
4926 else
4927 {
4928 if (*(pcre_uint8 *)gnptr == 0) break;
4929 }
4930
4931 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4932 gnptr, &substring);
4933 if (rc < 0)
4934 {
4935 fprintf(outfile, "get substring ");
4936 PCHARSV(gnptr, 0, -1, outfile);
4937 fprintf(outfile, " failed %d\n", rc);
4938 }
4939 else
4940 {
4941 fprintf(outfile, " G ");
4942 PCHARSV(substring, 0, rc, outfile);
4943 fprintf(outfile, " (%d) ", rc);
4944 PCHARSV(gnptr, 0, -1, outfile);
4945 PCRE_FREE_SUBSTRING(substring);
4946 putc('\n', outfile);
4947 }
4948
4949 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4950 }
4951
4952 if (getlist)
4953 {
4954 int rc;
4955 const char **stringlist;
4956 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4957 if (rc < 0)
4958 fprintf(outfile, "get substring list failed %d\n", rc);
4959 else
4960 {
4961 for (i = 0; i < count; i++)
4962 {
4963 fprintf(outfile, "%2dL ", i);
4964 PCHARSV(stringlist[i], 0, -1, outfile);
4965 putc('\n', outfile);
4966 }
4967 if (stringlist[i] != NULL)
4968 fprintf(outfile, "string list not terminated by NULL\n");
4969 PCRE_FREE_SUBSTRING_LIST(stringlist);
4970 }
4971 }
4972 }
4973
4974 /* There was a partial match */
4975
4976 else if (count == PCRE_ERROR_PARTIAL)
4977 {
4978 if (markptr == NULL) fprintf(outfile, "Partial match");
4979 else
4980 {
4981 fprintf(outfile, "Partial match, mark=");
4982 PCHARSV(markptr, 0, -1, outfile);
4983 }
4984 if (use_size_offsets > 1)
4985 {
4986 fprintf(outfile, ": ");
4987 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4988 outfile);
4989 }
4990 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4991 fprintf(outfile, "\n");
4992 break; /* Out of the /g loop */
4993 }
4994
4995 /* Failed to match. If this is a /g or /G loop and we previously set
4996 g_notempty after a null match, this is not necessarily the end. We want
4997 to advance the start offset, and continue. We won't be at the end of the
4998 string - that was checked before setting g_notempty.
4999
5000 Complication arises in the case when the newline convention is "any",
5001 "crlf", or "anycrlf". If the previous match was at the end of a line
5002 terminated by CRLF, an advance of one character just passes the \r,
5003 whereas we should prefer the longer newline sequence, as does the code in
5004 pcre_exec(). Fudge the offset value to achieve this. We check for a
5005 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5006 find the default.
5007
5008 Otherwise, in the case of UTF-8 matching, the advance must be one
5009 character, not one byte. */
5010
5011 else
5012 {
5013 if (g_notempty != 0)
5014 {
5015 int onechar = 1;
5016 unsigned int obits = REAL_PCRE_OPTIONS(re);
5017 use_offsets[0] = start_offset;
5018 if ((obits & PCRE_NEWLINE_BITS) == 0)
5019 {
5020 int d;
5021 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5022 /* Note that these values are always the ASCII ones, even in
5023 EBCDIC environments. CR = 13, NL = 10. */
5024 obits = (d == 13)? PCRE_NEWLINE_CR :
5025 (d == 10)? PCRE_NEWLINE_LF :
5026 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5027 (d == -2)? PCRE_NEWLINE_ANYCRLF :
5028 (d == -1)? PCRE_NEWLINE_ANY : 0;
5029 }
5030 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5031 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5032 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5033 &&
5034 start_offset < len - 1 && (
5035 #ifdef SUPPORT_PCRE8
5036 (pcre_mode == PCRE8_MODE &&
5037 bptr[start_offset] == '\r' &&
5038 bptr[start_offset + 1] == '\n') ||
5039 #endif
5040 #ifdef SUPPORT_PCRE16
5041 (pcre_mode == PCRE16_MODE &&
5042 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5043 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5044 #endif
5045 #ifdef SUPPORT_PCRE32
5046 (pcre_mode == PCRE32_MODE &&
5047 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5048 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5049 #endif
5050 0))
5051 onechar++;
5052 else if (use_utf)
5053 {
5054 while (start_offset + onechar < len)
5055 {
5056 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5057 onechar++;
5058 }
5059 }
5060 use_offsets[1] = start_offset + onechar;
5061 }
5062 else
5063 {
5064 switch(count)
5065 {
5066 case PCRE_ERROR_NOMATCH:
5067 if (gmatched == 0)
5068 {
5069 if (markptr == NULL)
5070 {
5071 fprintf(outfile, "No match");
5072 }
5073 else
5074 {
5075 fprintf(outfile, "No match, mark = ");
5076 PCHARSV(markptr, 0, -1, outfile);
5077 }
5078 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5079 putc('\n', outfile);
5080 }
5081 break;
5082
5083 case PCRE_ERROR_BADUTF8:
5084 case PCRE_ERROR_SHORTUTF8:
5085 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5086 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5087 8 * CHAR_SIZE);
5088 if (use_size_offsets >= 2)
5089 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5090 use_offsets[1]);
5091 fprintf(outfile, "\n");
5092 break;
5093
5094 case PCRE_ERROR_BADUTF8_OFFSET:
5095 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5096 8 * CHAR_SIZE);
5097 break;
5098
5099 default:
5100 if (count < 0 &&
5101 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5102 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5103 else
5104 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5105 break;
5106 }
5107
5108 break; /* Out of the /g loop */
5109 }
5110 }
5111
5112 /* If not /g or /G we are done */
5113
5114 if (!do_g && !do_G) break;
5115
5116 /* If we have matched an empty string, first check to see if we are at
5117 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5118 Perl's /g options does. This turns out to be rather cunning. First we set
5119 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5120 same point. If this fails (picked up above) we advance to the next
5121 character. */
5122
5123 g_notempty = 0;
5124
5125 if (use_offsets[0] == use_offsets[1])
5126 {
5127 if (use_offsets[0] == len) break;
5128 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5129 }
5130
5131 /* For /g, update the start offset, leaving the rest alone */
5132
5133 if (do_g) start_offset = use_offsets[1];
5134
5135 /* For /G, update the pointer and length */
5136
5137 else
5138 {
5139 bptr += use_offsets[1] * CHAR_SIZE;
5140 len -= use_offsets[1];
5141 }
5142 } /* End of loop for /g and /G */
5143
5144 NEXT_DATA: continue;
5145 } /* End of loop for data lines */
5146
5147 CONTINUE:
5148
5149 #if !defined NOPOSIX
5150 if (posix || do_posix) regfree(&preg);
5151 #endif
5152
5153 if (re != NULL) new_free(re);
5154 if (extra != NULL)
5155 {
5156 PCRE_FREE_STUDY(extra);
5157 }
5158 if (locale_set)
5159 {
5160 new_free((void *)tables);
5161 setlocale(LC_CTYPE, "C");
5162 locale_set = 0;
5163 }
5164 if (jit_stack != NULL)
5165 {
5166 PCRE_JIT_STACK_FREE(jit_stack);
5167 jit_stack = NULL;
5168 }
5169 }
5170
5171 if (infile == stdin) fprintf(outfile, "\n");
5172
5173 EXIT:
5174
5175 if (infile != NULL && infile != stdin) fclose(infile);
5176 if (outfile != NULL && outfile != stdout) fclose(outfile);
5177
5178 free(buffer);
5179 free(dbuffer);
5180 free(pbuffer);
5181 free(offsets);
5182
5183 #ifdef SUPPORT_PCRE16
5184 if (buffer16 != NULL) free(buffer16);
5185 #endif
5186 #ifdef SUPPORT_PCRE32
5187 if (buffer32 != NULL) free(buffer32);
5188 #endif
5189
5190 #if !defined NODFA
5191 if (dfa_workspace != NULL)
5192 free(dfa_workspace);
5193 #endif
5194
5195 return yield;
5196 }
5197
5198 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5