/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 922 - (show annotations)
Mon Feb 20 18:44:42 2012 UTC (7 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 131298 byte(s)
Set PCRE_EXTRA_USED_JIT when JIT was actually used at runtime. Add /S++ and
-s++ to pcretest to show whether JIT was used or not. 
1 /*.************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49
50 #ifdef HAVE_CONFIG_H
51 #include "config.h"
52 #endif
53
54 #include <ctype.h>
55 #include <stdio.h>
56 #include <string.h>
57 #include <stdlib.h>
58 #include <time.h>
59 #include <locale.h>
60 #include <errno.h>
61
62 #ifdef SUPPORT_LIBREADLINE
63 #ifdef HAVE_UNISTD_H
64 #include <unistd.h>
65 #endif
66 #include <readline/readline.h>
67 #include <readline/history.h>
68 #endif
69
70
71 /* A number of things vary for Windows builds. Originally, pcretest opened its
72 input and output without "b"; then I was told that "b" was needed in some
73 environments, so it was added for release 5.0 to both the input and output. (It
74 makes no difference on Unix-like systems.) Later I was told that it is wrong
75 for the input on Windows. I've now abstracted the modes into two macros that
76 are set here, to make it easier to fiddle with them, and removed "b" from the
77 input mode under Windows. */
78
79 #if defined(_WIN32) || defined(WIN32)
80 #include <io.h> /* For _setmode() */
81 #include <fcntl.h> /* For _O_BINARY */
82 #define INPUT_MODE "r"
83 #define OUTPUT_MODE "wb"
84
85 #ifndef isatty
86 #define isatty _isatty /* This is what Windows calls them, I'm told, */
87 #endif /* though in some environments they seem to */
88 /* be already defined, hence the #ifndefs. */
89 #ifndef fileno
90 #define fileno _fileno
91 #endif
92
93 /* A user sent this fix for Borland Builder 5 under Windows. */
94
95 #ifdef __BORLANDC__
96 #define _setmode(handle, mode) setmode(handle, mode)
97 #endif
98
99 /* Not Windows */
100
101 #else
102 #include <sys/time.h> /* These two includes are needed */
103 #include <sys/resource.h> /* for setrlimit(). */
104 #define INPUT_MODE "rb"
105 #define OUTPUT_MODE "wb"
106 #endif
107
108 #define PRIV(name) name
109
110 /* We have to include pcre_internal.h because we need the internal info for
111 displaying the results of pcre_study() and we also need to know about the
112 internal macros, structures, and other internal data values; pcretest has
113 "inside information" compared to a program that strictly follows the PCRE API.
114
115 Although pcre_internal.h does itself include pcre.h, we explicitly include it
116 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
117 appropriately for an application, not for building PCRE. */
118
119 #include "pcre.h"
120
121 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
122 /* Configure internal macros to 16 bit mode. */
123 #define COMPILE_PCRE16
124 #endif
125
126 #include "pcre_internal.h"
127
128 /* The pcre_printint() function, which prints the internal form of a compiled
129 regex, is held in a separate file so that (a) it can be compiled in either
130 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
131 when that is compiled in debug mode. */
132
133 #ifdef SUPPORT_PCRE8
134 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
135 #endif
136 #ifdef SUPPORT_PCRE16
137 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
138 #endif
139
140 /* We need access to some of the data tables that PCRE uses. So as not to have
141 to keep two copies, we include the source file here, changing the names of the
142 external symbols to prevent clashes. */
143
144 #define PCRE_INCLUDED
145
146 #include "pcre_tables.c"
147
148 /* The definition of the macro PRINTABLE, which determines whether to print an
149 output character as-is or as a hex value when showing compiled patterns, is
150 the same as in the printint.src file. We uses it here in cases when the locale
151 has not been explicitly changed, so as to get consistent output from systems
152 that differ in their output from isprint() even in the "C" locale. */
153
154 #ifdef EBCDIC
155 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
156 #else
157 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
158 #endif
159
160 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
161
162 /* Posix support is disabled in 16 bit only mode. */
163 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
164 #define NOPOSIX
165 #endif
166
167 /* It is possible to compile this test program without including support for
168 testing the POSIX interface, though this is not available via the standard
169 Makefile. */
170
171 #if !defined NOPOSIX
172 #include "pcreposix.h"
173 #endif
174
175 /* It is also possible, originally for the benefit of a version that was
176 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
177 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
178 automatically cut out the UTF support if PCRE is built without it. */
179
180 #ifndef SUPPORT_UTF
181 #ifndef NOUTF
182 #define NOUTF
183 #endif
184 #endif
185
186 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
187 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
188 only from one place and is handled differently). I couldn't dream up any way of
189 using a single macro to do this in a generic way, because of the many different
190 argument requirements. We know that at least one of SUPPORT_PCRE8 and
191 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
192 use these in the definitions of generic macros.
193
194 **** Special note about the PCHARSxxx macros: the address of the string to be
195 printed is always given as two arguments: a base address followed by an offset.
196 The base address is cast to the correct data size for 8 or 16 bit data; the
197 offset is in units of this size. If the string were given as base+offset in one
198 argument, the casting might be incorrectly applied. */
199
200 #ifdef SUPPORT_PCRE8
201
202 #define PCHARS8(lv, p, offset, len, f) \
203 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
204
205 #define PCHARSV8(p, offset, len, f) \
206 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
207
208 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
209 p = read_capture_name8(p, cn8, re)
210
211 #define STRLEN8(p) ((int)strlen((char *)p))
212
213 #define SET_PCRE_CALLOUT8(callout) \
214 pcre_callout = callout
215
216 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
217 pcre_assign_jit_stack(extra, callback, userdata)
218
219 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
220 re = pcre_compile((char *)pat, options, error, erroffset, tables)
221
222 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
223 namesptr, cbuffer, size) \
224 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
225 (char *)namesptr, cbuffer, size)
226
227 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
228 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
229
230 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
231 offsets, size_offsets, workspace, size_workspace) \
232 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
233 offsets, size_offsets, workspace, size_workspace)
234
235 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
236 offsets, size_offsets) \
237 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
238 offsets, size_offsets)
239
240 #define PCRE_FREE_STUDY8(extra) \
241 pcre_free_study(extra)
242
243 #define PCRE_FREE_SUBSTRING8(substring) \
244 pcre_free_substring(substring)
245
246 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
247 pcre_free_substring_list(listptr)
248
249 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
250 getnamesptr, subsptr) \
251 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
252 (char *)getnamesptr, subsptr)
253
254 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
255 n = pcre_get_stringnumber(re, (char *)ptr)
256
257 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
258 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
259
260 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
261 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
262
263 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
264 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
265
266 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
267 pcre_printint(re, outfile, debug_lengths)
268
269 #define PCRE_STUDY8(extra, re, options, error) \
270 extra = pcre_study(re, options, error)
271
272 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
273 pcre_jit_stack_alloc(startsize, maxsize)
274
275 #define PCRE_JIT_STACK_FREE8(stack) \
276 pcre_jit_stack_free(stack)
277
278 #endif /* SUPPORT_PCRE8 */
279
280 /* -----------------------------------------------------------*/
281
282 #ifdef SUPPORT_PCRE16
283
284 #define PCHARS16(lv, p, offset, len, f) \
285 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
286
287 #define PCHARSV16(p, offset, len, f) \
288 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
289
290 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
291 p = read_capture_name16(p, cn16, re)
292
293 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
294
295 #define SET_PCRE_CALLOUT16(callout) \
296 pcre16_callout = (int (*)(pcre16_callout_block *))callout
297
298 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
299 pcre16_assign_jit_stack((pcre16_extra *)extra, \
300 (pcre16_jit_callback)callback, userdata)
301
302 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
303 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
304 tables)
305
306 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
307 namesptr, cbuffer, size) \
308 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
309 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
310
311 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
312 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
313 (PCRE_UCHAR16 *)cbuffer, size/2)
314
315 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
316 offsets, size_offsets, workspace, size_workspace) \
317 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
318 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
319 workspace, size_workspace)
320
321 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
322 offsets, size_offsets) \
323 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
324 len, start_offset, options, offsets, size_offsets)
325
326 #define PCRE_FREE_STUDY16(extra) \
327 pcre16_free_study((pcre16_extra *)extra)
328
329 #define PCRE_FREE_SUBSTRING16(substring) \
330 pcre16_free_substring((PCRE_SPTR16)substring)
331
332 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
333 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
334
335 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
336 getnamesptr, subsptr) \
337 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
338 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
339
340 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
341 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
342
343 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
344 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
345 (PCRE_SPTR16 *)(void*)subsptr)
346
347 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
348 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
349 (PCRE_SPTR16 **)(void*)listptr)
350
351 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
352 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
353 tables)
354
355 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
356 pcre16_printint(re, outfile, debug_lengths)
357
358 #define PCRE_STUDY16(extra, re, options, error) \
359 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
360
361 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
362 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
363
364 #define PCRE_JIT_STACK_FREE16(stack) \
365 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
366
367 #endif /* SUPPORT_PCRE16 */
368
369
370 /* ----- Both modes are supported; a runtime test is needed, except for
371 pcre_config(), and the JIT stack functions, when it doesn't matter which
372 version is called. ----- */
373
374 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
375
376 #define CHAR_SIZE (use_pcre16? 2:1)
377
378 #define PCHARS(lv, p, offset, len, f) \
379 if (use_pcre16) \
380 PCHARS16(lv, p, offset, len, f); \
381 else \
382 PCHARS8(lv, p, offset, len, f)
383
384 #define PCHARSV(p, offset, len, f) \
385 if (use_pcre16) \
386 PCHARSV16(p, offset, len, f); \
387 else \
388 PCHARSV8(p, offset, len, f)
389
390 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
391 if (use_pcre16) \
392 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
393 else \
394 READ_CAPTURE_NAME8(p, cn8, cn16, re)
395
396 #define SET_PCRE_CALLOUT(callout) \
397 if (use_pcre16) \
398 SET_PCRE_CALLOUT16(callout); \
399 else \
400 SET_PCRE_CALLOUT8(callout)
401
402 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
403
404 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
405 if (use_pcre16) \
406 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
407 else \
408 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
409
410 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
411 if (use_pcre16) \
412 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
413 else \
414 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
415
416 #define PCRE_CONFIG pcre_config
417
418 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
419 namesptr, cbuffer, size) \
420 if (use_pcre16) \
421 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
422 namesptr, cbuffer, size); \
423 else \
424 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
425 namesptr, cbuffer, size)
426
427 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
428 if (use_pcre16) \
429 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
430 else \
431 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
432
433 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
434 offsets, size_offsets, workspace, size_workspace) \
435 if (use_pcre16) \
436 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
437 offsets, size_offsets, workspace, size_workspace); \
438 else \
439 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
440 offsets, size_offsets, workspace, size_workspace)
441
442 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
443 offsets, size_offsets) \
444 if (use_pcre16) \
445 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
446 offsets, size_offsets); \
447 else \
448 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
449 offsets, size_offsets)
450
451 #define PCRE_FREE_STUDY(extra) \
452 if (use_pcre16) \
453 PCRE_FREE_STUDY16(extra); \
454 else \
455 PCRE_FREE_STUDY8(extra)
456
457 #define PCRE_FREE_SUBSTRING(substring) \
458 if (use_pcre16) \
459 PCRE_FREE_SUBSTRING16(substring); \
460 else \
461 PCRE_FREE_SUBSTRING8(substring)
462
463 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
464 if (use_pcre16) \
465 PCRE_FREE_SUBSTRING_LIST16(listptr); \
466 else \
467 PCRE_FREE_SUBSTRING_LIST8(listptr)
468
469 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
470 getnamesptr, subsptr) \
471 if (use_pcre16) \
472 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
473 getnamesptr, subsptr); \
474 else \
475 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
476 getnamesptr, subsptr)
477
478 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
479 if (use_pcre16) \
480 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
481 else \
482 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
483
484 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
485 if (use_pcre16) \
486 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
487 else \
488 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
489
490 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
491 if (use_pcre16) \
492 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
493 else \
494 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
495
496 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
497 (use_pcre16 ? \
498 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
499 :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
500
501 #define PCRE_JIT_STACK_FREE(stack) \
502 if (use_pcre16) \
503 PCRE_JIT_STACK_FREE16(stack); \
504 else \
505 PCRE_JIT_STACK_FREE8(stack)
506
507 #define PCRE_MAKETABLES \
508 (use_pcre16? pcre16_maketables() : pcre_maketables())
509
510 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
511 if (use_pcre16) \
512 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
513 else \
514 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
515
516 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
517 if (use_pcre16) \
518 PCRE_PRINTINT16(re, outfile, debug_lengths); \
519 else \
520 PCRE_PRINTINT8(re, outfile, debug_lengths)
521
522 #define PCRE_STUDY(extra, re, options, error) \
523 if (use_pcre16) \
524 PCRE_STUDY16(extra, re, options, error); \
525 else \
526 PCRE_STUDY8(extra, re, options, error)
527
528 /* ----- Only 8-bit mode is supported ----- */
529
530 #elif defined SUPPORT_PCRE8
531 #define CHAR_SIZE 1
532 #define PCHARS PCHARS8
533 #define PCHARSV PCHARSV8
534 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
535 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
536 #define STRLEN STRLEN8
537 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
538 #define PCRE_COMPILE PCRE_COMPILE8
539 #define PCRE_CONFIG pcre_config
540 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
541 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
542 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
543 #define PCRE_EXEC PCRE_EXEC8
544 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
545 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
546 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
547 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
548 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
549 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
550 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
551 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
552 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
553 #define PCRE_MAKETABLES pcre_maketables()
554 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
555 #define PCRE_PRINTINT PCRE_PRINTINT8
556 #define PCRE_STUDY PCRE_STUDY8
557
558 /* ----- Only 16-bit mode is supported ----- */
559
560 #else
561 #define CHAR_SIZE 2
562 #define PCHARS PCHARS16
563 #define PCHARSV PCHARSV16
564 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
565 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
566 #define STRLEN STRLEN16
567 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
568 #define PCRE_COMPILE PCRE_COMPILE16
569 #define PCRE_CONFIG pcre16_config
570 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
571 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
572 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
573 #define PCRE_EXEC PCRE_EXEC16
574 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
575 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
576 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
577 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
578 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
579 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
580 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
581 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
582 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
583 #define PCRE_MAKETABLES pcre16_maketables()
584 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
585 #define PCRE_PRINTINT PCRE_PRINTINT16
586 #define PCRE_STUDY PCRE_STUDY16
587 #endif
588
589 /* ----- End of mode-specific function call macros ----- */
590
591
592 /* Other parameters */
593
594 #ifndef CLOCKS_PER_SEC
595 #ifdef CLK_TCK
596 #define CLOCKS_PER_SEC CLK_TCK
597 #else
598 #define CLOCKS_PER_SEC 100
599 #endif
600 #endif
601
602 /* This is the default loop count for timing. */
603
604 #define LOOPREPEAT 500000
605
606 /* Static variables */
607
608 static FILE *outfile;
609 static int log_store = 0;
610 static int callout_count;
611 static int callout_extra;
612 static int callout_fail_count;
613 static int callout_fail_id;
614 static int debug_lengths;
615 static int first_callout;
616 static int locale_set = 0;
617 static int show_malloc;
618 static int use_utf;
619 static size_t gotten_store;
620 static size_t first_gotten_store = 0;
621 static const unsigned char *last_callout_mark = NULL;
622
623 /* The buffers grow automatically if very long input lines are encountered. */
624
625 static int buffer_size = 50000;
626 static pcre_uint8 *buffer = NULL;
627 static pcre_uint8 *dbuffer = NULL;
628 static pcre_uint8 *pbuffer = NULL;
629
630 /* Another buffer is needed translation to 16-bit character strings. It will
631 obtained and extended as required. */
632
633 #ifdef SUPPORT_PCRE16
634 static int buffer16_size = 0;
635 static pcre_uint16 *buffer16 = NULL;
636
637 #ifdef SUPPORT_PCRE8
638
639 /* We need the table of operator lengths that is used for 16-bit compiling, in
640 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
641 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
642 appropriately for the 16-bit world. Just as a safety check, make sure that
643 COMPILE_PCRE16 is *not* set. */
644
645 #ifdef COMPILE_PCRE16
646 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
647 #endif
648
649 #if LINK_SIZE == 2
650 #undef LINK_SIZE
651 #define LINK_SIZE 1
652 #elif LINK_SIZE == 3 || LINK_SIZE == 4
653 #undef LINK_SIZE
654 #define LINK_SIZE 2
655 #else
656 #error LINK_SIZE must be either 2, 3, or 4
657 #endif
658
659 #undef IMM2_SIZE
660 #define IMM2_SIZE 1
661
662 #endif /* SUPPORT_PCRE8 */
663
664 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
665 #endif /* SUPPORT_PCRE16 */
666
667 /* If we have 8-bit support, default use_pcre16 to false; if there is also
668 16-bit support, it can be changed by an option. If there is no 8-bit support,
669 there must be 16-bit support, so default it to 1. */
670
671 #ifdef SUPPORT_PCRE8
672 static int use_pcre16 = 0;
673 #else
674 static int use_pcre16 = 1;
675 #endif
676
677 /* Textual explanations for runtime error codes */
678
679 static const char *errtexts[] = {
680 NULL, /* 0 is no error */
681 NULL, /* NOMATCH is handled specially */
682 "NULL argument passed",
683 "bad option value",
684 "magic number missing",
685 "unknown opcode - pattern overwritten?",
686 "no more memory",
687 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
688 "match limit exceeded",
689 "callout error code",
690 NULL, /* BADUTF8/16 is handled specially */
691 NULL, /* BADUTF8/16 offset is handled specially */
692 NULL, /* PARTIAL is handled specially */
693 "not used - internal error",
694 "internal error - pattern overwritten?",
695 "bad count value",
696 "item unsupported for DFA matching",
697 "backreference condition or recursion test not supported for DFA matching",
698 "match limit not supported for DFA matching",
699 "workspace size exceeded in DFA matching",
700 "too much recursion for DFA matching",
701 "recursion limit exceeded",
702 "not used - internal error",
703 "invalid combination of newline options",
704 "bad offset value",
705 NULL, /* SHORTUTF8/16 is handled specially */
706 "nested recursion at the same subject position",
707 "JIT stack limit reached",
708 "pattern compiled in wrong mode: 8-bit/16-bit error"
709 };
710
711
712 /*************************************************
713 * Alternate character tables *
714 *************************************************/
715
716 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
717 using the default tables of the library. However, the T option can be used to
718 select alternate sets of tables, for different kinds of testing. Note also that
719 the L (locale) option also adjusts the tables. */
720
721 /* This is the set of tables distributed as default with PCRE. It recognizes
722 only ASCII characters. */
723
724 static const pcre_uint8 tables0[] = {
725
726 /* This table is a lower casing table. */
727
728 0, 1, 2, 3, 4, 5, 6, 7,
729 8, 9, 10, 11, 12, 13, 14, 15,
730 16, 17, 18, 19, 20, 21, 22, 23,
731 24, 25, 26, 27, 28, 29, 30, 31,
732 32, 33, 34, 35, 36, 37, 38, 39,
733 40, 41, 42, 43, 44, 45, 46, 47,
734 48, 49, 50, 51, 52, 53, 54, 55,
735 56, 57, 58, 59, 60, 61, 62, 63,
736 64, 97, 98, 99,100,101,102,103,
737 104,105,106,107,108,109,110,111,
738 112,113,114,115,116,117,118,119,
739 120,121,122, 91, 92, 93, 94, 95,
740 96, 97, 98, 99,100,101,102,103,
741 104,105,106,107,108,109,110,111,
742 112,113,114,115,116,117,118,119,
743 120,121,122,123,124,125,126,127,
744 128,129,130,131,132,133,134,135,
745 136,137,138,139,140,141,142,143,
746 144,145,146,147,148,149,150,151,
747 152,153,154,155,156,157,158,159,
748 160,161,162,163,164,165,166,167,
749 168,169,170,171,172,173,174,175,
750 176,177,178,179,180,181,182,183,
751 184,185,186,187,188,189,190,191,
752 192,193,194,195,196,197,198,199,
753 200,201,202,203,204,205,206,207,
754 208,209,210,211,212,213,214,215,
755 216,217,218,219,220,221,222,223,
756 224,225,226,227,228,229,230,231,
757 232,233,234,235,236,237,238,239,
758 240,241,242,243,244,245,246,247,
759 248,249,250,251,252,253,254,255,
760
761 /* This table is a case flipping table. */
762
763 0, 1, 2, 3, 4, 5, 6, 7,
764 8, 9, 10, 11, 12, 13, 14, 15,
765 16, 17, 18, 19, 20, 21, 22, 23,
766 24, 25, 26, 27, 28, 29, 30, 31,
767 32, 33, 34, 35, 36, 37, 38, 39,
768 40, 41, 42, 43, 44, 45, 46, 47,
769 48, 49, 50, 51, 52, 53, 54, 55,
770 56, 57, 58, 59, 60, 61, 62, 63,
771 64, 97, 98, 99,100,101,102,103,
772 104,105,106,107,108,109,110,111,
773 112,113,114,115,116,117,118,119,
774 120,121,122, 91, 92, 93, 94, 95,
775 96, 65, 66, 67, 68, 69, 70, 71,
776 72, 73, 74, 75, 76, 77, 78, 79,
777 80, 81, 82, 83, 84, 85, 86, 87,
778 88, 89, 90,123,124,125,126,127,
779 128,129,130,131,132,133,134,135,
780 136,137,138,139,140,141,142,143,
781 144,145,146,147,148,149,150,151,
782 152,153,154,155,156,157,158,159,
783 160,161,162,163,164,165,166,167,
784 168,169,170,171,172,173,174,175,
785 176,177,178,179,180,181,182,183,
786 184,185,186,187,188,189,190,191,
787 192,193,194,195,196,197,198,199,
788 200,201,202,203,204,205,206,207,
789 208,209,210,211,212,213,214,215,
790 216,217,218,219,220,221,222,223,
791 224,225,226,227,228,229,230,231,
792 232,233,234,235,236,237,238,239,
793 240,241,242,243,244,245,246,247,
794 248,249,250,251,252,253,254,255,
795
796 /* This table contains bit maps for various character classes. Each map is 32
797 bytes long and the bits run from the least significant end of each byte. The
798 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
799 graph, print, punct, and cntrl. Other classes are built from combinations. */
800
801 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
802 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
803 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805
806 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
807 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
808 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
809 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810
811 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
812 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
813 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815
816 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
817 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
818 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820
821 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
822 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
823 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
824 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825
826 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
827 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
828 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
829 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830
831 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
832 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
833 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
834 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835
836 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
837 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
838 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
839 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840
841 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
842 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
843 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845
846 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
847 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850
851 /* This table identifies various classes of character by individual bits:
852 0x01 white space character
853 0x02 letter
854 0x04 decimal digit
855 0x08 hexadecimal digit
856 0x10 alphanumeric or '_'
857 0x80 regular expression metacharacter or binary zero
858 */
859
860 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
861 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
862 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
863 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
864 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
865 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
866 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
867 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
868 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
869 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
870 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
871 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
872 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
873 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
874 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
875 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
876 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
877 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
878 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
879 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
880 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
881 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
882 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
883 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
884 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
885 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
886 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
887 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
888 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
889 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
890 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
891 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
892
893 /* This is a set of tables that came orginally from a Windows user. It seems to
894 be at least an approximation of ISO 8859. In particular, there are characters
895 greater than 128 that are marked as spaces, letters, etc. */
896
897 static const pcre_uint8 tables1[] = {
898 0,1,2,3,4,5,6,7,
899 8,9,10,11,12,13,14,15,
900 16,17,18,19,20,21,22,23,
901 24,25,26,27,28,29,30,31,
902 32,33,34,35,36,37,38,39,
903 40,41,42,43,44,45,46,47,
904 48,49,50,51,52,53,54,55,
905 56,57,58,59,60,61,62,63,
906 64,97,98,99,100,101,102,103,
907 104,105,106,107,108,109,110,111,
908 112,113,114,115,116,117,118,119,
909 120,121,122,91,92,93,94,95,
910 96,97,98,99,100,101,102,103,
911 104,105,106,107,108,109,110,111,
912 112,113,114,115,116,117,118,119,
913 120,121,122,123,124,125,126,127,
914 128,129,130,131,132,133,134,135,
915 136,137,138,139,140,141,142,143,
916 144,145,146,147,148,149,150,151,
917 152,153,154,155,156,157,158,159,
918 160,161,162,163,164,165,166,167,
919 168,169,170,171,172,173,174,175,
920 176,177,178,179,180,181,182,183,
921 184,185,186,187,188,189,190,191,
922 224,225,226,227,228,229,230,231,
923 232,233,234,235,236,237,238,239,
924 240,241,242,243,244,245,246,215,
925 248,249,250,251,252,253,254,223,
926 224,225,226,227,228,229,230,231,
927 232,233,234,235,236,237,238,239,
928 240,241,242,243,244,245,246,247,
929 248,249,250,251,252,253,254,255,
930 0,1,2,3,4,5,6,7,
931 8,9,10,11,12,13,14,15,
932 16,17,18,19,20,21,22,23,
933 24,25,26,27,28,29,30,31,
934 32,33,34,35,36,37,38,39,
935 40,41,42,43,44,45,46,47,
936 48,49,50,51,52,53,54,55,
937 56,57,58,59,60,61,62,63,
938 64,97,98,99,100,101,102,103,
939 104,105,106,107,108,109,110,111,
940 112,113,114,115,116,117,118,119,
941 120,121,122,91,92,93,94,95,
942 96,65,66,67,68,69,70,71,
943 72,73,74,75,76,77,78,79,
944 80,81,82,83,84,85,86,87,
945 88,89,90,123,124,125,126,127,
946 128,129,130,131,132,133,134,135,
947 136,137,138,139,140,141,142,143,
948 144,145,146,147,148,149,150,151,
949 152,153,154,155,156,157,158,159,
950 160,161,162,163,164,165,166,167,
951 168,169,170,171,172,173,174,175,
952 176,177,178,179,180,181,182,183,
953 184,185,186,187,188,189,190,191,
954 224,225,226,227,228,229,230,231,
955 232,233,234,235,236,237,238,239,
956 240,241,242,243,244,245,246,215,
957 248,249,250,251,252,253,254,223,
958 192,193,194,195,196,197,198,199,
959 200,201,202,203,204,205,206,207,
960 208,209,210,211,212,213,214,247,
961 216,217,218,219,220,221,222,255,
962 0,62,0,0,1,0,0,0,
963 0,0,0,0,0,0,0,0,
964 32,0,0,0,1,0,0,0,
965 0,0,0,0,0,0,0,0,
966 0,0,0,0,0,0,255,3,
967 126,0,0,0,126,0,0,0,
968 0,0,0,0,0,0,0,0,
969 0,0,0,0,0,0,0,0,
970 0,0,0,0,0,0,255,3,
971 0,0,0,0,0,0,0,0,
972 0,0,0,0,0,0,12,2,
973 0,0,0,0,0,0,0,0,
974 0,0,0,0,0,0,0,0,
975 254,255,255,7,0,0,0,0,
976 0,0,0,0,0,0,0,0,
977 255,255,127,127,0,0,0,0,
978 0,0,0,0,0,0,0,0,
979 0,0,0,0,254,255,255,7,
980 0,0,0,0,0,4,32,4,
981 0,0,0,128,255,255,127,255,
982 0,0,0,0,0,0,255,3,
983 254,255,255,135,254,255,255,7,
984 0,0,0,0,0,4,44,6,
985 255,255,127,255,255,255,127,255,
986 0,0,0,0,254,255,255,255,
987 255,255,255,255,255,255,255,127,
988 0,0,0,0,254,255,255,255,
989 255,255,255,255,255,255,255,255,
990 0,2,0,0,255,255,255,255,
991 255,255,255,255,255,255,255,127,
992 0,0,0,0,255,255,255,255,
993 255,255,255,255,255,255,255,255,
994 0,0,0,0,254,255,0,252,
995 1,0,0,248,1,0,0,120,
996 0,0,0,0,254,255,255,255,
997 0,0,128,0,0,0,128,0,
998 255,255,255,255,0,0,0,0,
999 0,0,0,0,0,0,0,128,
1000 255,255,255,255,0,0,0,0,
1001 0,0,0,0,0,0,0,0,
1002 128,0,0,0,0,0,0,0,
1003 0,1,1,0,1,1,0,0,
1004 0,0,0,0,0,0,0,0,
1005 0,0,0,0,0,0,0,0,
1006 1,0,0,0,128,0,0,0,
1007 128,128,128,128,0,0,128,0,
1008 28,28,28,28,28,28,28,28,
1009 28,28,0,0,0,0,0,128,
1010 0,26,26,26,26,26,26,18,
1011 18,18,18,18,18,18,18,18,
1012 18,18,18,18,18,18,18,18,
1013 18,18,18,128,128,0,128,16,
1014 0,26,26,26,26,26,26,18,
1015 18,18,18,18,18,18,18,18,
1016 18,18,18,18,18,18,18,18,
1017 18,18,18,128,128,0,0,0,
1018 0,0,0,0,0,1,0,0,
1019 0,0,0,0,0,0,0,0,
1020 0,0,0,0,0,0,0,0,
1021 0,0,0,0,0,0,0,0,
1022 1,0,0,0,0,0,0,0,
1023 0,0,18,0,0,0,0,0,
1024 0,0,20,20,0,18,0,0,
1025 0,20,18,0,0,0,0,0,
1026 18,18,18,18,18,18,18,18,
1027 18,18,18,18,18,18,18,18,
1028 18,18,18,18,18,18,18,0,
1029 18,18,18,18,18,18,18,18,
1030 18,18,18,18,18,18,18,18,
1031 18,18,18,18,18,18,18,18,
1032 18,18,18,18,18,18,18,0,
1033 18,18,18,18,18,18,18,18
1034 };
1035
1036
1037
1038
1039 #ifndef HAVE_STRERROR
1040 /*************************************************
1041 * Provide strerror() for non-ANSI libraries *
1042 *************************************************/
1043
1044 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1045 in their libraries, but can provide the same facility by this simple
1046 alternative function. */
1047
1048 extern int sys_nerr;
1049 extern char *sys_errlist[];
1050
1051 char *
1052 strerror(int n)
1053 {
1054 if (n < 0 || n >= sys_nerr) return "unknown error number";
1055 return sys_errlist[n];
1056 }
1057 #endif /* HAVE_STRERROR */
1058
1059
1060 /*************************************************
1061 * JIT memory callback *
1062 *************************************************/
1063
1064 static pcre_jit_stack* jit_callback(void *arg)
1065 {
1066 return (pcre_jit_stack *)arg;
1067 }
1068
1069
1070 #if !defined NOUTF || defined SUPPORT_PCRE16
1071 /*************************************************
1072 * Convert UTF-8 string to value *
1073 *************************************************/
1074
1075 /* This function takes one or more bytes that represents a UTF-8 character,
1076 and returns the value of the character.
1077
1078 Argument:
1079 utf8bytes a pointer to the byte vector
1080 vptr a pointer to an int to receive the value
1081
1082 Returns: > 0 => the number of bytes consumed
1083 -6 to 0 => malformed UTF-8 character at offset = (-return)
1084 */
1085
1086 static int
1087 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1088 {
1089 int c = *utf8bytes++;
1090 int d = c;
1091 int i, j, s;
1092
1093 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1094 {
1095 if ((d & 0x80) == 0) break;
1096 d <<= 1;
1097 }
1098
1099 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1100 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1101
1102 /* i now has a value in the range 1-5 */
1103
1104 s = 6*i;
1105 d = (c & utf8_table3[i]) << s;
1106
1107 for (j = 0; j < i; j++)
1108 {
1109 c = *utf8bytes++;
1110 if ((c & 0xc0) != 0x80) return -(j+1);
1111 s -= 6;
1112 d |= (c & 0x3f) << s;
1113 }
1114
1115 /* Check that encoding was the correct unique one */
1116
1117 for (j = 0; j < utf8_table1_size; j++)
1118 if (d <= utf8_table1[j]) break;
1119 if (j != i) return -(i+1);
1120
1121 /* Valid value */
1122
1123 *vptr = d;
1124 return i+1;
1125 }
1126 #endif /* NOUTF || SUPPORT_PCRE16 */
1127
1128
1129
1130 #if !defined NOUTF || defined SUPPORT_PCRE16
1131 /*************************************************
1132 * Convert character value to UTF-8 *
1133 *************************************************/
1134
1135 /* This function takes an integer value in the range 0 - 0x7fffffff
1136 and encodes it as a UTF-8 character in 0 to 6 bytes.
1137
1138 Arguments:
1139 cvalue the character value
1140 utf8bytes pointer to buffer for result - at least 6 bytes long
1141
1142 Returns: number of characters placed in the buffer
1143 */
1144
1145 static int
1146 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1147 {
1148 register int i, j;
1149 for (i = 0; i < utf8_table1_size; i++)
1150 if (cvalue <= utf8_table1[i]) break;
1151 utf8bytes += i;
1152 for (j = i; j > 0; j--)
1153 {
1154 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1155 cvalue >>= 6;
1156 }
1157 *utf8bytes = utf8_table2[i] | cvalue;
1158 return i + 1;
1159 }
1160 #endif
1161
1162
1163 #ifdef SUPPORT_PCRE16
1164 /*************************************************
1165 * Convert a string to 16-bit *
1166 *************************************************/
1167
1168 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1169 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1170 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1171 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1172 result is always left in buffer16.
1173
1174 Note that this function does not object to surrogate values. This is
1175 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1176 for the purpose of testing that they are correctly faulted.
1177
1178 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1179 in UTF-8 so that values greater than 255 can be handled.
1180
1181 Arguments:
1182 data TRUE if converting a data line; FALSE for a regex
1183 p points to a byte string
1184 utf true if UTF-8 (to be converted to UTF-16)
1185 len number of bytes in the string (excluding trailing zero)
1186
1187 Returns: number of 16-bit data items used (excluding trailing zero)
1188 OR -1 if a UTF-8 string is malformed
1189 OR -2 if a value > 0x10ffff is encountered
1190 OR -3 if a value > 0xffff is encountered when not in UTF mode
1191 */
1192
1193 static int
1194 to16(int data, pcre_uint8 *p, int utf, int len)
1195 {
1196 pcre_uint16 *pp;
1197
1198 if (buffer16_size < 2*len + 2)
1199 {
1200 if (buffer16 != NULL) free(buffer16);
1201 buffer16_size = 2*len + 2;
1202 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1203 if (buffer16 == NULL)
1204 {
1205 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1206 exit(1);
1207 }
1208 }
1209
1210 pp = buffer16;
1211
1212 if (!utf && !data)
1213 {
1214 while (len-- > 0) *pp++ = *p++;
1215 }
1216
1217 else
1218 {
1219 int c = 0;
1220 while (len > 0)
1221 {
1222 int chlen = utf82ord(p, &c);
1223 if (chlen <= 0) return -1;
1224 if (c > 0x10ffff) return -2;
1225 p += chlen;
1226 len -= chlen;
1227 if (c < 0x10000) *pp++ = c; else
1228 {
1229 if (!utf) return -3;
1230 c -= 0x10000;
1231 *pp++ = 0xD800 | (c >> 10);
1232 *pp++ = 0xDC00 | (c & 0x3ff);
1233 }
1234 }
1235 }
1236
1237 *pp = 0;
1238 return pp - buffer16;
1239 }
1240 #endif
1241
1242
1243 /*************************************************
1244 * Read or extend an input line *
1245 *************************************************/
1246
1247 /* Input lines are read into buffer, but both patterns and data lines can be
1248 continued over multiple input lines. In addition, if the buffer fills up, we
1249 want to automatically expand it so as to be able to handle extremely large
1250 lines that are needed for certain stress tests. When the input buffer is
1251 expanded, the other two buffers must also be expanded likewise, and the
1252 contents of pbuffer, which are a copy of the input for callouts, must be
1253 preserved (for when expansion happens for a data line). This is not the most
1254 optimal way of handling this, but hey, this is just a test program!
1255
1256 Arguments:
1257 f the file to read
1258 start where in buffer to start (this *must* be within buffer)
1259 prompt for stdin or readline()
1260
1261 Returns: pointer to the start of new data
1262 could be a copy of start, or could be moved
1263 NULL if no data read and EOF reached
1264 */
1265
1266 static pcre_uint8 *
1267 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1268 {
1269 pcre_uint8 *here = start;
1270
1271 for (;;)
1272 {
1273 size_t rlen = (size_t)(buffer_size - (here - buffer));
1274
1275 if (rlen > 1000)
1276 {
1277 int dlen;
1278
1279 /* If libreadline support is required, use readline() to read a line if the
1280 input is a terminal. Note that readline() removes the trailing newline, so
1281 we must put it back again, to be compatible with fgets(). */
1282
1283 #ifdef SUPPORT_LIBREADLINE
1284 if (isatty(fileno(f)))
1285 {
1286 size_t len;
1287 char *s = readline(prompt);
1288 if (s == NULL) return (here == start)? NULL : start;
1289 len = strlen(s);
1290 if (len > 0) add_history(s);
1291 if (len > rlen - 1) len = rlen - 1;
1292 memcpy(here, s, len);
1293 here[len] = '\n';
1294 here[len+1] = 0;
1295 free(s);
1296 }
1297 else
1298 #endif
1299
1300 /* Read the next line by normal means, prompting if the file is stdin. */
1301
1302 {
1303 if (f == stdin) printf("%s", prompt);
1304 if (fgets((char *)here, rlen, f) == NULL)
1305 return (here == start)? NULL : start;
1306 }
1307
1308 dlen = (int)strlen((char *)here);
1309 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1310 here += dlen;
1311 }
1312
1313 else
1314 {
1315 int new_buffer_size = 2*buffer_size;
1316 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1317 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1318 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319
1320 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1321 {
1322 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1323 exit(1);
1324 }
1325
1326 memcpy(new_buffer, buffer, buffer_size);
1327 memcpy(new_pbuffer, pbuffer, buffer_size);
1328
1329 buffer_size = new_buffer_size;
1330
1331 start = new_buffer + (start - buffer);
1332 here = new_buffer + (here - buffer);
1333
1334 free(buffer);
1335 free(dbuffer);
1336 free(pbuffer);
1337
1338 buffer = new_buffer;
1339 dbuffer = new_dbuffer;
1340 pbuffer = new_pbuffer;
1341 }
1342 }
1343
1344 return NULL; /* Control never gets here */
1345 }
1346
1347
1348
1349 /*************************************************
1350 * Read number from string *
1351 *************************************************/
1352
1353 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1354 around with conditional compilation, just do the job by hand. It is only used
1355 for unpicking arguments, so just keep it simple.
1356
1357 Arguments:
1358 str string to be converted
1359 endptr where to put the end pointer
1360
1361 Returns: the unsigned long
1362 */
1363
1364 static int
1365 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1366 {
1367 int result = 0;
1368 while(*str != 0 && isspace(*str)) str++;
1369 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1370 *endptr = str;
1371 return(result);
1372 }
1373
1374
1375
1376 /*************************************************
1377 * Print one character *
1378 *************************************************/
1379
1380 /* Print a single character either literally, or as a hex escape. */
1381
1382 static int pchar(int c, FILE *f)
1383 {
1384 if (PRINTOK(c))
1385 {
1386 if (f != NULL) fprintf(f, "%c", c);
1387 return 1;
1388 }
1389
1390 if (c < 0x100)
1391 {
1392 if (use_utf)
1393 {
1394 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1395 return 6;
1396 }
1397 else
1398 {
1399 if (f != NULL) fprintf(f, "\\x%02x", c);
1400 return 4;
1401 }
1402 }
1403
1404 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1405 return (c <= 0x000000ff)? 6 :
1406 (c <= 0x00000fff)? 7 :
1407 (c <= 0x0000ffff)? 8 :
1408 (c <= 0x000fffff)? 9 : 10;
1409 }
1410
1411
1412
1413 #ifdef SUPPORT_PCRE8
1414 /*************************************************
1415 * Print 8-bit character string *
1416 *************************************************/
1417
1418 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1419 If handed a NULL file, just counts chars without printing. */
1420
1421 static int pchars(pcre_uint8 *p, int length, FILE *f)
1422 {
1423 int c = 0;
1424 int yield = 0;
1425
1426 if (length < 0)
1427 length = strlen((char *)p);
1428
1429 while (length-- > 0)
1430 {
1431 #if !defined NOUTF
1432 if (use_utf)
1433 {
1434 int rc = utf82ord(p, &c);
1435 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1436 {
1437 length -= rc - 1;
1438 p += rc;
1439 yield += pchar(c, f);
1440 continue;
1441 }
1442 }
1443 #endif
1444 c = *p++;
1445 yield += pchar(c, f);
1446 }
1447
1448 return yield;
1449 }
1450 #endif
1451
1452
1453
1454 #ifdef SUPPORT_PCRE16
1455 /*************************************************
1456 * Find length of 0-terminated 16-bit string *
1457 *************************************************/
1458
1459 static int strlen16(PCRE_SPTR16 p)
1460 {
1461 int len = 0;
1462 while (*p++ != 0) len++;
1463 return len;
1464 }
1465 #endif /* SUPPORT_PCRE16 */
1466
1467
1468 #ifdef SUPPORT_PCRE16
1469 /*************************************************
1470 * Print 16-bit character string *
1471 *************************************************/
1472
1473 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1474 If handed a NULL file, just counts chars without printing. */
1475
1476 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1477 {
1478 int yield = 0;
1479
1480 if (length < 0)
1481 length = strlen16(p);
1482
1483 while (length-- > 0)
1484 {
1485 int c = *p++ & 0xffff;
1486 #if !defined NOUTF
1487 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1488 {
1489 int d = *p & 0xffff;
1490 if (d >= 0xDC00 && d < 0xDFFF)
1491 {
1492 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1493 length--;
1494 p++;
1495 }
1496 }
1497 #endif
1498 yield += pchar(c, f);
1499 }
1500
1501 return yield;
1502 }
1503 #endif /* SUPPORT_PCRE16 */
1504
1505
1506
1507 #ifdef SUPPORT_PCRE8
1508 /*************************************************
1509 * Read a capture name (8-bit) and check it *
1510 *************************************************/
1511
1512 static pcre_uint8 *
1513 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1514 {
1515 pcre_uint8 *npp = *pp;
1516 while (isalnum(*p)) *npp++ = *p++;
1517 *npp++ = 0;
1518 *npp = 0;
1519 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1520 {
1521 fprintf(outfile, "no parentheses with name \"");
1522 PCHARSV(*pp, 0, -1, outfile);
1523 fprintf(outfile, "\"\n");
1524 }
1525
1526 *pp = npp;
1527 return p;
1528 }
1529 #endif /* SUPPORT_PCRE8 */
1530
1531
1532
1533 #ifdef SUPPORT_PCRE16
1534 /*************************************************
1535 * Read a capture name (16-bit) and check it *
1536 *************************************************/
1537
1538 /* Note that the text being read is 8-bit. */
1539
1540 static pcre_uint8 *
1541 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1542 {
1543 pcre_uint16 *npp = *pp;
1544 while (isalnum(*p)) *npp++ = *p++;
1545 *npp++ = 0;
1546 *npp = 0;
1547 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1548 {
1549 fprintf(outfile, "no parentheses with name \"");
1550 PCHARSV(*pp, 0, -1, outfile);
1551 fprintf(outfile, "\"\n");
1552 }
1553 *pp = npp;
1554 return p;
1555 }
1556 #endif /* SUPPORT_PCRE16 */
1557
1558
1559
1560 /*************************************************
1561 * Callout function *
1562 *************************************************/
1563
1564 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1565 the match. Yield zero unless more callouts than the fail count, or the callout
1566 data is not zero. */
1567
1568 static int callout(pcre_callout_block *cb)
1569 {
1570 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1571 int i, pre_start, post_start, subject_length;
1572
1573 if (callout_extra)
1574 {
1575 fprintf(f, "Callout %d: last capture = %d\n",
1576 cb->callout_number, cb->capture_last);
1577
1578 for (i = 0; i < cb->capture_top * 2; i += 2)
1579 {
1580 if (cb->offset_vector[i] < 0)
1581 fprintf(f, "%2d: <unset>\n", i/2);
1582 else
1583 {
1584 fprintf(f, "%2d: ", i/2);
1585 PCHARSV(cb->subject, cb->offset_vector[i],
1586 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1587 fprintf(f, "\n");
1588 }
1589 }
1590 }
1591
1592 /* Re-print the subject in canonical form, the first time or if giving full
1593 datails. On subsequent calls in the same match, we use pchars just to find the
1594 printed lengths of the substrings. */
1595
1596 if (f != NULL) fprintf(f, "--->");
1597
1598 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1599 PCHARS(post_start, cb->subject, cb->start_match,
1600 cb->current_position - cb->start_match, f);
1601
1602 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1603
1604 PCHARSV(cb->subject, cb->current_position,
1605 cb->subject_length - cb->current_position, f);
1606
1607 if (f != NULL) fprintf(f, "\n");
1608
1609 /* Always print appropriate indicators, with callout number if not already
1610 shown. For automatic callouts, show the pattern offset. */
1611
1612 if (cb->callout_number == 255)
1613 {
1614 fprintf(outfile, "%+3d ", cb->pattern_position);
1615 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1616 }
1617 else
1618 {
1619 if (callout_extra) fprintf(outfile, " ");
1620 else fprintf(outfile, "%3d ", cb->callout_number);
1621 }
1622
1623 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1624 fprintf(outfile, "^");
1625
1626 if (post_start > 0)
1627 {
1628 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1629 fprintf(outfile, "^");
1630 }
1631
1632 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1633 fprintf(outfile, " ");
1634
1635 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1636 pbuffer + cb->pattern_position);
1637
1638 fprintf(outfile, "\n");
1639 first_callout = 0;
1640
1641 if (cb->mark != last_callout_mark)
1642 {
1643 if (cb->mark == NULL)
1644 fprintf(outfile, "Latest Mark: <unset>\n");
1645 else
1646 {
1647 fprintf(outfile, "Latest Mark: ");
1648 PCHARSV(cb->mark, 0, -1, outfile);
1649 putc('\n', outfile);
1650 }
1651 last_callout_mark = cb->mark;
1652 }
1653
1654 if (cb->callout_data != NULL)
1655 {
1656 int callout_data = *((int *)(cb->callout_data));
1657 if (callout_data != 0)
1658 {
1659 fprintf(outfile, "Callout data = %d\n", callout_data);
1660 return callout_data;
1661 }
1662 }
1663
1664 return (cb->callout_number != callout_fail_id)? 0 :
1665 (++callout_count >= callout_fail_count)? 1 : 0;
1666 }
1667
1668
1669 /*************************************************
1670 * Local malloc functions *
1671 *************************************************/
1672
1673 /* Alternative malloc function, to test functionality and save the size of a
1674 compiled re, which is the first store request that pcre_compile() makes. The
1675 show_malloc variable is set only during matching. */
1676
1677 static void *new_malloc(size_t size)
1678 {
1679 void *block = malloc(size);
1680 gotten_store = size;
1681 if (first_gotten_store == 0) first_gotten_store = size;
1682 if (show_malloc)
1683 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1684 return block;
1685 }
1686
1687 static void new_free(void *block)
1688 {
1689 if (show_malloc)
1690 fprintf(outfile, "free %p\n", block);
1691 free(block);
1692 }
1693
1694 /* For recursion malloc/free, to test stacking calls */
1695
1696 static void *stack_malloc(size_t size)
1697 {
1698 void *block = malloc(size);
1699 if (show_malloc)
1700 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1701 return block;
1702 }
1703
1704 static void stack_free(void *block)
1705 {
1706 if (show_malloc)
1707 fprintf(outfile, "stack_free %p\n", block);
1708 free(block);
1709 }
1710
1711
1712 /*************************************************
1713 * Call pcre_fullinfo() *
1714 *************************************************/
1715
1716 /* Get one piece of information from the pcre_fullinfo() function. When only
1717 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1718 value, but the code is defensive.
1719
1720 Arguments:
1721 re compiled regex
1722 study study data
1723 option PCRE_INFO_xxx option
1724 ptr where to put the data
1725
1726 Returns: 0 when OK, < 0 on error
1727 */
1728
1729 static int
1730 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1731 {
1732 int rc;
1733
1734 if (use_pcre16)
1735 #ifdef SUPPORT_PCRE16
1736 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1737 #else
1738 rc = PCRE_ERROR_BADMODE;
1739 #endif
1740 else
1741 #ifdef SUPPORT_PCRE8
1742 rc = pcre_fullinfo(re, study, option, ptr);
1743 #else
1744 rc = PCRE_ERROR_BADMODE;
1745 #endif
1746
1747 if (rc < 0)
1748 {
1749 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1750 use_pcre16? "16" : "", option);
1751 if (rc == PCRE_ERROR_BADMODE)
1752 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1753 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1754 }
1755
1756 return rc;
1757 }
1758
1759
1760
1761 /*************************************************
1762 * Swap byte functions *
1763 *************************************************/
1764
1765 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1766 value, respectively.
1767
1768 Arguments:
1769 value any number
1770
1771 Returns: the byte swapped value
1772 */
1773
1774 static pcre_uint32
1775 swap_uint32(pcre_uint32 value)
1776 {
1777 return ((value & 0x000000ff) << 24) |
1778 ((value & 0x0000ff00) << 8) |
1779 ((value & 0x00ff0000) >> 8) |
1780 (value >> 24);
1781 }
1782
1783 static pcre_uint16
1784 swap_uint16(pcre_uint16 value)
1785 {
1786 return (value >> 8) | (value << 8);
1787 }
1788
1789
1790
1791 /*************************************************
1792 * Flip bytes in a compiled pattern *
1793 *************************************************/
1794
1795 /* This function is called if the 'F' option was present on a pattern that is
1796 to be written to a file. We flip the bytes of all the integer fields in the
1797 regex data block and the study block. In 16-bit mode this also flips relevant
1798 bytes in the pattern itself. This is to make it possible to test PCRE's
1799 ability to reload byte-flipped patterns, e.g. those compiled on a different
1800 architecture. */
1801
1802 static void
1803 regexflip(pcre *ere, pcre_extra *extra)
1804 {
1805 REAL_PCRE *re = (REAL_PCRE *)ere;
1806 #ifdef SUPPORT_PCRE16
1807 int op;
1808 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1809 int length = re->name_count * re->name_entry_size;
1810 #ifdef SUPPORT_UTF
1811 BOOL utf = (re->options & PCRE_UTF16) != 0;
1812 BOOL utf16_char = FALSE;
1813 #endif /* SUPPORT_UTF */
1814 #endif /* SUPPORT_PCRE16 */
1815
1816 /* Always flip the bytes in the main data block and study blocks. */
1817
1818 re->magic_number = REVERSED_MAGIC_NUMBER;
1819 re->size = swap_uint32(re->size);
1820 re->options = swap_uint32(re->options);
1821 re->flags = swap_uint16(re->flags);
1822 re->top_bracket = swap_uint16(re->top_bracket);
1823 re->top_backref = swap_uint16(re->top_backref);
1824 re->first_char = swap_uint16(re->first_char);
1825 re->req_char = swap_uint16(re->req_char);
1826 re->name_table_offset = swap_uint16(re->name_table_offset);
1827 re->name_entry_size = swap_uint16(re->name_entry_size);
1828 re->name_count = swap_uint16(re->name_count);
1829
1830 if (extra != NULL)
1831 {
1832 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1833 rsd->size = swap_uint32(rsd->size);
1834 rsd->flags = swap_uint32(rsd->flags);
1835 rsd->minlength = swap_uint32(rsd->minlength);
1836 }
1837
1838 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1839 in the name table, if present, and then in the pattern itself. */
1840
1841 #ifdef SUPPORT_PCRE16
1842 if (!use_pcre16) return;
1843
1844 while(TRUE)
1845 {
1846 /* Swap previous characters. */
1847 while (length-- > 0)
1848 {
1849 *ptr = swap_uint16(*ptr);
1850 ptr++;
1851 }
1852 #ifdef SUPPORT_UTF
1853 if (utf16_char)
1854 {
1855 if ((ptr[-1] & 0xfc00) == 0xd800)
1856 {
1857 /* We know that there is only one extra character in UTF-16. */
1858 *ptr = swap_uint16(*ptr);
1859 ptr++;
1860 }
1861 }
1862 utf16_char = FALSE;
1863 #endif /* SUPPORT_UTF */
1864
1865 /* Get next opcode. */
1866
1867 length = 0;
1868 op = *ptr;
1869 *ptr++ = swap_uint16(op);
1870
1871 switch (op)
1872 {
1873 case OP_END:
1874 return;
1875
1876 #ifdef SUPPORT_UTF
1877 case OP_CHAR:
1878 case OP_CHARI:
1879 case OP_NOT:
1880 case OP_NOTI:
1881 case OP_STAR:
1882 case OP_MINSTAR:
1883 case OP_PLUS:
1884 case OP_MINPLUS:
1885 case OP_QUERY:
1886 case OP_MINQUERY:
1887 case OP_UPTO:
1888 case OP_MINUPTO:
1889 case OP_EXACT:
1890 case OP_POSSTAR:
1891 case OP_POSPLUS:
1892 case OP_POSQUERY:
1893 case OP_POSUPTO:
1894 case OP_STARI:
1895 case OP_MINSTARI:
1896 case OP_PLUSI:
1897 case OP_MINPLUSI:
1898 case OP_QUERYI:
1899 case OP_MINQUERYI:
1900 case OP_UPTOI:
1901 case OP_MINUPTOI:
1902 case OP_EXACTI:
1903 case OP_POSSTARI:
1904 case OP_POSPLUSI:
1905 case OP_POSQUERYI:
1906 case OP_POSUPTOI:
1907 case OP_NOTSTAR:
1908 case OP_NOTMINSTAR:
1909 case OP_NOTPLUS:
1910 case OP_NOTMINPLUS:
1911 case OP_NOTQUERY:
1912 case OP_NOTMINQUERY:
1913 case OP_NOTUPTO:
1914 case OP_NOTMINUPTO:
1915 case OP_NOTEXACT:
1916 case OP_NOTPOSSTAR:
1917 case OP_NOTPOSPLUS:
1918 case OP_NOTPOSQUERY:
1919 case OP_NOTPOSUPTO:
1920 case OP_NOTSTARI:
1921 case OP_NOTMINSTARI:
1922 case OP_NOTPLUSI:
1923 case OP_NOTMINPLUSI:
1924 case OP_NOTQUERYI:
1925 case OP_NOTMINQUERYI:
1926 case OP_NOTUPTOI:
1927 case OP_NOTMINUPTOI:
1928 case OP_NOTEXACTI:
1929 case OP_NOTPOSSTARI:
1930 case OP_NOTPOSPLUSI:
1931 case OP_NOTPOSQUERYI:
1932 case OP_NOTPOSUPTOI:
1933 if (utf) utf16_char = TRUE;
1934 #endif
1935 /* Fall through. */
1936
1937 default:
1938 length = OP_lengths16[op] - 1;
1939 break;
1940
1941 case OP_CLASS:
1942 case OP_NCLASS:
1943 /* Skip the character bit map. */
1944 ptr += 32/sizeof(pcre_uint16);
1945 length = 0;
1946 break;
1947
1948 case OP_XCLASS:
1949 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1950 if (LINK_SIZE > 1)
1951 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1952 - (1 + LINK_SIZE + 1));
1953 else
1954 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1955
1956 /* Reverse the size of the XCLASS instance. */
1957 *ptr = swap_uint16(*ptr);
1958 ptr++;
1959 if (LINK_SIZE > 1)
1960 {
1961 *ptr = swap_uint16(*ptr);
1962 ptr++;
1963 }
1964
1965 op = *ptr;
1966 *ptr = swap_uint16(op);
1967 ptr++;
1968 if ((op & XCL_MAP) != 0)
1969 {
1970 /* Skip the character bit map. */
1971 ptr += 32/sizeof(pcre_uint16);
1972 length -= 32/sizeof(pcre_uint16);
1973 }
1974 break;
1975 }
1976 }
1977 /* Control should never reach here in 16 bit mode. */
1978 #endif /* SUPPORT_PCRE16 */
1979 }
1980
1981
1982
1983 /*************************************************
1984 * Check match or recursion limit *
1985 *************************************************/
1986
1987 static int
1988 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1989 int start_offset, int options, int *use_offsets, int use_size_offsets,
1990 int flag, unsigned long int *limit, int errnumber, const char *msg)
1991 {
1992 int count;
1993 int min = 0;
1994 int mid = 64;
1995 int max = -1;
1996
1997 extra->flags |= flag;
1998
1999 for (;;)
2000 {
2001 *limit = mid;
2002
2003 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2004 use_offsets, use_size_offsets);
2005
2006 if (count == errnumber)
2007 {
2008 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2009 min = mid;
2010 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2011 }
2012
2013 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2014 count == PCRE_ERROR_PARTIAL)
2015 {
2016 if (mid == min + 1)
2017 {
2018 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2019 break;
2020 }
2021 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2022 max = mid;
2023 mid = (min + mid)/2;
2024 }
2025 else break; /* Some other error */
2026 }
2027
2028 extra->flags &= ~flag;
2029 return count;
2030 }
2031
2032
2033
2034 /*************************************************
2035 * Case-independent strncmp() function *
2036 *************************************************/
2037
2038 /*
2039 Arguments:
2040 s first string
2041 t second string
2042 n number of characters to compare
2043
2044 Returns: < 0, = 0, or > 0, according to the comparison
2045 */
2046
2047 static int
2048 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2049 {
2050 while (n--)
2051 {
2052 int c = tolower(*s++) - tolower(*t++);
2053 if (c) return c;
2054 }
2055 return 0;
2056 }
2057
2058
2059
2060 /*************************************************
2061 * Check newline indicator *
2062 *************************************************/
2063
2064 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2065 a message and return 0 if there is no match.
2066
2067 Arguments:
2068 p points after the leading '<'
2069 f file for error message
2070
2071 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2072 */
2073
2074 static int
2075 check_newline(pcre_uint8 *p, FILE *f)
2076 {
2077 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2078 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2079 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2080 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2081 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2082 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2083 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2084 fprintf(f, "Unknown newline type at: <%s\n", p);
2085 return 0;
2086 }
2087
2088
2089
2090 /*************************************************
2091 * Usage function *
2092 *************************************************/
2093
2094 static void
2095 usage(void)
2096 {
2097 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2098 printf("Input and output default to stdin and stdout.\n");
2099 #ifdef SUPPORT_LIBREADLINE
2100 printf("If input is a terminal, readline() is used to read from it.\n");
2101 #else
2102 printf("This version of pcretest is not linked with readline().\n");
2103 #endif
2104 printf("\nOptions:\n");
2105 #ifdef SUPPORT_PCRE16
2106 printf(" -16 use the 16-bit library\n");
2107 #endif
2108 printf(" -b show compiled code\n");
2109 printf(" -C show PCRE compile-time options and exit\n");
2110 printf(" -C arg show a specific compile-time option\n");
2111 printf(" and exit with its value. The arg can be:\n");
2112 printf(" linksize internal link size [2, 3, 4]\n");
2113 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2114 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2115 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2116 printf(" ucp Unicode Properties supported [0, 1]\n");
2117 printf(" jit Just-in-time compiler supported [0, 1]\n");
2118 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2119 printf(" -d debug: show compiled code and information (-b and -i)\n");
2120 #if !defined NODFA
2121 printf(" -dfa force DFA matching for all subjects\n");
2122 #endif
2123 printf(" -help show usage information\n");
2124 printf(" -i show information about compiled patterns\n"
2125 " -M find MATCH_LIMIT minimum for each subject\n"
2126 " -m output memory used information\n"
2127 " -o <n> set size of offsets vector to <n>\n");
2128 #if !defined NOPOSIX
2129 printf(" -p use POSIX interface\n");
2130 #endif
2131 printf(" -q quiet: do not output PCRE version number at start\n");
2132 printf(" -S <n> set stack size to <n> megabytes\n");
2133 printf(" -s force each pattern to be studied at basic level\n"
2134 " -s+ force each pattern to be studied, using JIT if available\n"
2135 " -s++ ditto, verifying when JIT was actually used\n"
2136 " -t time compilation and execution\n");
2137 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2138 printf(" -tm time execution (matching) only\n");
2139 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2140 }
2141
2142
2143
2144 /*************************************************
2145 * Main Program *
2146 *************************************************/
2147
2148 /* Read lines from named file or stdin and write to named file or stdout; lines
2149 consist of a regular expression, in delimiters and optionally followed by
2150 options, followed by a set of test data, terminated by an empty line. */
2151
2152 int main(int argc, char **argv)
2153 {
2154 FILE *infile = stdin;
2155 const char *version;
2156 int options = 0;
2157 int study_options = 0;
2158 int default_find_match_limit = FALSE;
2159 int op = 1;
2160 int timeit = 0;
2161 int timeitm = 0;
2162 int showinfo = 0;
2163 int showstore = 0;
2164 int force_study = -1;
2165 int force_study_options = 0;
2166 int quiet = 0;
2167 int size_offsets = 45;
2168 int size_offsets_max;
2169 int *offsets = NULL;
2170 #if !defined NOPOSIX
2171 int posix = 0;
2172 #endif
2173 int debug = 0;
2174 int done = 0;
2175 int all_use_dfa = 0;
2176 int verify_jit = 0;
2177 int yield = 0;
2178 int stack_size;
2179
2180 pcre_jit_stack *jit_stack = NULL;
2181
2182 /* These vectors store, end-to-end, a list of zero-terminated captured
2183 substring names, each list itself being terminated by an empty name. Assume
2184 that 1024 is plenty long enough for the few names we'll be testing. It is
2185 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2186 for the actual memory, to ensure alignment. */
2187
2188 pcre_uint16 copynames[1024];
2189 pcre_uint16 getnames[1024];
2190
2191 #ifdef SUPPORT_PCRE16
2192 pcre_uint16 *cn16ptr;
2193 pcre_uint16 *gn16ptr;
2194 #endif
2195
2196 #ifdef SUPPORT_PCRE8
2197 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2198 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2199 pcre_uint8 *cn8ptr;
2200 pcre_uint8 *gn8ptr;
2201 #endif
2202
2203 /* Get buffers from malloc() so that valgrind will check their misuse when
2204 debugging. They grow automatically when very long lines are read. The 16-bit
2205 buffer (buffer16) is obtained only if needed. */
2206
2207 buffer = (pcre_uint8 *)malloc(buffer_size);
2208 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2209 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2210
2211 /* The outfile variable is static so that new_malloc can use it. */
2212
2213 outfile = stdout;
2214
2215 /* The following _setmode() stuff is some Windows magic that tells its runtime
2216 library to translate CRLF into a single LF character. At least, that's what
2217 I've been told: never having used Windows I take this all on trust. Originally
2218 it set 0x8000, but then I was advised that _O_BINARY was better. */
2219
2220 #if defined(_WIN32) || defined(WIN32)
2221 _setmode( _fileno( stdout ), _O_BINARY );
2222 #endif
2223
2224 /* Get the version number: both pcre_version() and pcre16_version() give the
2225 same answer. We just need to ensure that we call one that is available. */
2226
2227 #ifdef SUPPORT_PCRE8
2228 version = pcre_version();
2229 #else
2230 version = pcre16_version();
2231 #endif
2232
2233 /* Scan options */
2234
2235 while (argc > 1 && argv[op][0] == '-')
2236 {
2237 pcre_uint8 *endptr;
2238 char *arg = argv[op];
2239
2240 if (strcmp(arg, "-m") == 0) showstore = 1;
2241 else if (strcmp(arg, "-s") == 0) force_study = 0;
2242
2243 else if (strncmp(arg, "-s+", 3) == 0)
2244 {
2245 arg += 3;
2246 if (*arg == '+') { arg++; verify_jit = TRUE; }
2247
2248 if (*arg != 0) goto BAD_ARG;
2249
2250 force_study = 1;
2251 force_study_options = PCRE_STUDY_JIT_COMPILE
2252 | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
2253 | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
2254 }
2255 else if (strcmp(arg, "-16") == 0)
2256 {
2257 #ifdef SUPPORT_PCRE16
2258 use_pcre16 = 1;
2259 #else
2260 printf("** This version of PCRE was built without 16-bit support\n");
2261 exit(1);
2262 #endif
2263 }
2264 else if (strcmp(arg, "-q") == 0) quiet = 1;
2265 else if (strcmp(arg, "-b") == 0) debug = 1;
2266 else if (strcmp(arg, "-i") == 0) showinfo = 1;
2267 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2268 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2269 #if !defined NODFA
2270 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2271 #endif
2272 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2273 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2274 *endptr == 0))
2275 {
2276 op++;
2277 argc--;
2278 }
2279 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2280 {
2281 int both = arg[2] == 0;
2282 int temp;
2283 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2284 *endptr == 0))
2285 {
2286 timeitm = temp;
2287 op++;
2288 argc--;
2289 }
2290 else timeitm = LOOPREPEAT;
2291 if (both) timeit = timeitm;
2292 }
2293 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2294 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2295 *endptr == 0))
2296 {
2297 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2298 printf("PCRE: -S not supported on this OS\n");
2299 exit(1);
2300 #else
2301 int rc;
2302 struct rlimit rlim;
2303 getrlimit(RLIMIT_STACK, &rlim);
2304 rlim.rlim_cur = stack_size * 1024 * 1024;
2305 rc = setrlimit(RLIMIT_STACK, &rlim);
2306 if (rc != 0)
2307 {
2308 printf("PCRE: setrlimit() failed with error %d\n", rc);
2309 exit(1);
2310 }
2311 op++;
2312 argc--;
2313 #endif
2314 }
2315 #if !defined NOPOSIX
2316 else if (strcmp(arg, "-p") == 0) posix = 1;
2317 #endif
2318 else if (strcmp(arg, "-C") == 0)
2319 {
2320 int rc;
2321 unsigned long int lrc;
2322
2323 if (argc > 2)
2324 {
2325 if (strcmp(argv[op + 1], "linksize") == 0)
2326 {
2327 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2328 printf("%d\n", rc);
2329 yield = rc;
2330 goto EXIT;
2331 }
2332 if (strcmp(argv[op + 1], "pcre8") == 0)
2333 {
2334 #ifdef SUPPORT_PCRE8
2335 printf("1\n");
2336 yield = 1;
2337 #else
2338 printf("0\n");
2339 yield = 0;
2340 #endif
2341 goto EXIT;
2342 }
2343 if (strcmp(argv[op + 1], "pcre16") == 0)
2344 {
2345 #ifdef SUPPORT_PCRE16
2346 printf("1\n");
2347 yield = 1;
2348 #else
2349 printf("0\n");
2350 yield = 0;
2351 #endif
2352 goto EXIT;
2353 }
2354 if (strcmp(argv[op + 1], "utf") == 0)
2355 {
2356 #ifdef SUPPORT_PCRE8
2357 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2358 printf("%d\n", rc);
2359 yield = rc;
2360 #else
2361 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2362 printf("%d\n", rc);
2363 yield = rc;
2364 #endif
2365 goto EXIT;
2366 }
2367 if (strcmp(argv[op + 1], "ucp") == 0)
2368 {
2369 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2370 printf("%d\n", rc);
2371 yield = rc;
2372 goto EXIT;
2373 }
2374 if (strcmp(argv[op + 1], "jit") == 0)
2375 {
2376 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2377 printf("%d\n", rc);
2378 yield = rc;
2379 goto EXIT;
2380 }
2381 if (strcmp(argv[op + 1], "newline") == 0)
2382 {
2383 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2384 /* Note that these values are always the ASCII values, even
2385 in EBCDIC environments. CR is 13 and NL is 10. */
2386 printf("%s\n", (rc == 13)? "CR" :
2387 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2388 (rc == -2)? "ANYCRLF" :
2389 (rc == -1)? "ANY" : "???");
2390 goto EXIT;
2391 }
2392 printf("Unknown -C option: %s\n", argv[op + 1]);
2393 goto EXIT;
2394 }
2395
2396 printf("PCRE version %s\n", version);
2397 printf("Compiled with\n");
2398
2399 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2400 are set, either both UTFs are supported or both are not supported. */
2401
2402 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2403 printf(" 8-bit and 16-bit support\n");
2404 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2405 if (rc)
2406 printf(" UTF-8 and UTF-16 support\n");
2407 else
2408 printf(" No UTF-8 or UTF-16 support\n");
2409 #elif defined SUPPORT_PCRE8
2410 printf(" 8-bit support only\n");
2411 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2412 printf(" %sUTF-8 support\n", rc? "" : "No ");
2413 #else
2414 printf(" 16-bit support only\n");
2415 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2416 printf(" %sUTF-16 support\n", rc? "" : "No ");
2417 #endif
2418
2419 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2420 printf(" %sUnicode properties support\n", rc? "" : "No ");
2421 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2422 if (rc)
2423 {
2424 const char *arch;
2425 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2426 printf(" Just-in-time compiler support: %s\n", arch);
2427 }
2428 else
2429 printf(" No just-in-time compiler support\n");
2430 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2431 /* Note that these values are always the ASCII values, even
2432 in EBCDIC environments. CR is 13 and NL is 10. */
2433 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2434 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2435 (rc == -2)? "ANYCRLF" :
2436 (rc == -1)? "ANY" : "???");
2437 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2438 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2439 "all Unicode newlines");
2440 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2441 printf(" Internal link size = %d\n", rc);
2442 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2443 printf(" POSIX malloc threshold = %d\n", rc);
2444 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2445 printf(" Default match limit = %ld\n", lrc);
2446 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2447 printf(" Default recursion depth limit = %ld\n", lrc);
2448 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2449 printf(" Match recursion uses %s", rc? "stack" : "heap");
2450 if (showstore)
2451 {
2452 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2453 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2454 }
2455 printf("\n");
2456 goto EXIT;
2457 }
2458 else if (strcmp(arg, "-help") == 0 ||
2459 strcmp(arg, "--help") == 0)
2460 {
2461 usage();
2462 goto EXIT;
2463 }
2464 else
2465 {
2466 BAD_ARG:
2467 printf("** Unknown or malformed option %s\n", arg);
2468 usage();
2469 yield = 1;
2470 goto EXIT;
2471 }
2472 op++;
2473 argc--;
2474 }
2475
2476 /* Get the store for the offsets vector, and remember what it was */
2477
2478 size_offsets_max = size_offsets;
2479 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2480 if (offsets == NULL)
2481 {
2482 printf("** Failed to get %d bytes of memory for offsets vector\n",
2483 (int)(size_offsets_max * sizeof(int)));
2484 yield = 1;
2485 goto EXIT;
2486 }
2487
2488 /* Sort out the input and output files */
2489
2490 if (argc > 1)
2491 {
2492 infile = fopen(argv[op], INPUT_MODE);
2493 if (infile == NULL)
2494 {
2495 printf("** Failed to open %s\n", argv[op]);
2496 yield = 1;
2497 goto EXIT;
2498 }
2499 }
2500
2501 if (argc > 2)
2502 {
2503 outfile = fopen(argv[op+1], OUTPUT_MODE);
2504 if (outfile == NULL)
2505 {
2506 printf("** Failed to open %s\n", argv[op+1]);
2507 yield = 1;
2508 goto EXIT;
2509 }
2510 }
2511
2512 /* Set alternative malloc function */
2513
2514 #ifdef SUPPORT_PCRE8
2515 pcre_malloc = new_malloc;
2516 pcre_free = new_free;
2517 pcre_stack_malloc = stack_malloc;
2518 pcre_stack_free = stack_free;
2519 #endif
2520
2521 #ifdef SUPPORT_PCRE16
2522 pcre16_malloc = new_malloc;
2523 pcre16_free = new_free;
2524 pcre16_stack_malloc = stack_malloc;
2525 pcre16_stack_free = stack_free;
2526 #endif
2527
2528 /* Heading line unless quiet, then prompt for first regex if stdin */
2529
2530 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2531
2532 /* Main loop */
2533
2534 while (!done)
2535 {
2536 pcre *re = NULL;
2537 pcre_extra *extra = NULL;
2538
2539 #if !defined NOPOSIX /* There are still compilers that require no indent */
2540 regex_t preg;
2541 int do_posix = 0;
2542 #endif
2543
2544 const char *error;
2545 pcre_uint8 *markptr;
2546 pcre_uint8 *p, *pp, *ppp;
2547 pcre_uint8 *to_file = NULL;
2548 const pcre_uint8 *tables = NULL;
2549 unsigned long int get_options;
2550 unsigned long int true_size, true_study_size = 0;
2551 size_t size, regex_gotten_store;
2552 int do_allcaps = 0;
2553 int do_mark = 0;
2554 int do_study = 0;
2555 int no_force_study = 0;
2556 int do_debug = debug;
2557 int do_G = 0;
2558 int do_g = 0;
2559 int do_showinfo = showinfo;
2560 int do_showrest = 0;
2561 int do_showcaprest = 0;
2562 int do_flip = 0;
2563 int erroroffset, len, delimiter, poffset;
2564
2565 use_utf = 0;
2566 debug_lengths = 1;
2567
2568 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2569 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2570 fflush(outfile);
2571
2572 p = buffer;
2573 while (isspace(*p)) p++;
2574 if (*p == 0) continue;
2575
2576 /* See if the pattern is to be loaded pre-compiled from a file. */
2577
2578 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2579 {
2580 pcre_uint32 magic;
2581 pcre_uint8 sbuf[8];
2582 FILE *f;
2583
2584 p++;
2585 if (*p == '!')
2586 {
2587 do_debug = TRUE;
2588 do_showinfo = TRUE;
2589 p++;
2590 }
2591
2592 pp = p + (int)strlen((char *)p);
2593 while (isspace(pp[-1])) pp--;
2594 *pp = 0;
2595
2596 f = fopen((char *)p, "rb");
2597 if (f == NULL)
2598 {
2599 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2600 continue;
2601 }
2602
2603 first_gotten_store = 0;
2604 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2605
2606 true_size =
2607 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2608 true_study_size =
2609 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2610
2611 re = (pcre *)new_malloc(true_size);
2612 regex_gotten_store = first_gotten_store;
2613
2614 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2615
2616 magic = ((REAL_PCRE *)re)->magic_number;
2617 if (magic != MAGIC_NUMBER)
2618 {
2619 if (swap_uint32(magic) == MAGIC_NUMBER)
2620 {
2621 do_flip = 1;
2622 }
2623 else
2624 {
2625 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2626 fclose(f);
2627 continue;
2628 }
2629 }
2630
2631 /* We hide the byte-invert info for little and big endian tests. */
2632 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2633 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2634
2635 /* Now see if there is any following study data. */
2636
2637 if (true_study_size != 0)
2638 {
2639 pcre_study_data *psd;
2640
2641 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2642 extra->flags = PCRE_EXTRA_STUDY_DATA;
2643
2644 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2645 extra->study_data = psd;
2646
2647 if (fread(psd, 1, true_study_size, f) != true_study_size)
2648 {
2649 FAIL_READ:
2650 fprintf(outfile, "Failed to read data from %s\n", p);
2651 if (extra != NULL)
2652 {
2653 PCRE_FREE_STUDY(extra);
2654 }
2655 if (re != NULL) new_free(re);
2656 fclose(f);
2657 continue;
2658 }
2659 fprintf(outfile, "Study data loaded from %s\n", p);
2660 do_study = 1; /* To get the data output if requested */
2661 }
2662 else fprintf(outfile, "No study data\n");
2663
2664 /* Flip the necessary bytes. */
2665 if (do_flip)
2666 {
2667 int rc;
2668 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2669 if (rc == PCRE_ERROR_BADMODE)
2670 {
2671 /* Simulate the result of the function call below. */
2672 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2673 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2674 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2675 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2676 continue;
2677 }
2678 }
2679
2680 /* Need to know if UTF-8 for printing data strings. */
2681
2682 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2683 use_utf = (get_options & PCRE_UTF8) != 0;
2684
2685 fclose(f);
2686 goto SHOW_INFO;
2687 }
2688
2689 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2690 the pattern; if it isn't complete, read more. */
2691
2692 delimiter = *p++;
2693
2694 if (isalnum(delimiter) || delimiter == '\\')
2695 {
2696 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2697 goto SKIP_DATA;
2698 }
2699
2700 pp = p;
2701 poffset = (int)(p - buffer);
2702
2703 for(;;)
2704 {
2705 while (*pp != 0)
2706 {
2707 if (*pp == '\\' && pp[1] != 0) pp++;
2708 else if (*pp == delimiter) break;
2709 pp++;
2710 }
2711 if (*pp != 0) break;
2712 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2713 {
2714 fprintf(outfile, "** Unexpected EOF\n");
2715 done = 1;
2716 goto CONTINUE;
2717 }
2718 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2719 }
2720
2721 /* The buffer may have moved while being extended; reset the start of data
2722 pointer to the correct relative point in the buffer. */
2723
2724 p = buffer + poffset;
2725
2726 /* If the first character after the delimiter is backslash, make
2727 the pattern end with backslash. This is purely to provide a way
2728 of testing for the error message when a pattern ends with backslash. */
2729
2730 if (pp[1] == '\\') *pp++ = '\\';
2731
2732 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2733 for callouts. */
2734
2735 *pp++ = 0;
2736 strcpy((char *)pbuffer, (char *)p);
2737
2738 /* Look for options after final delimiter */
2739
2740 options = 0;
2741 study_options = 0;
2742 log_store = showstore; /* default from command line */
2743
2744 while (*pp != 0)
2745 {
2746 switch (*pp++)
2747 {
2748 case 'f': options |= PCRE_FIRSTLINE; break;
2749 case 'g': do_g = 1; break;
2750 case 'i': options |= PCRE_CASELESS; break;
2751 case 'm': options |= PCRE_MULTILINE; break;
2752 case 's': options |= PCRE_DOTALL; break;
2753 case 'x': options |= PCRE_EXTENDED; break;
2754
2755 case '+':
2756 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2757 break;
2758
2759 case '=': do_allcaps = 1; break;
2760 case 'A': options |= PCRE_ANCHORED; break;
2761 case 'B': do_debug = 1; break;
2762 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2763 case 'D': do_debug = do_showinfo = 1; break;
2764 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2765 case 'F': do_flip = 1; break;
2766 case 'G': do_G = 1; break;
2767 case 'I': do_showinfo = 1; break;
2768 case 'J': options |= PCRE_DUPNAMES; break;
2769 case 'K': do_mark = 1; break;
2770 case 'M': log_store = 1; break;
2771 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2772
2773 #if !defined NOPOSIX
2774 case 'P': do_posix = 1; break;
2775 #endif
2776
2777 case 'S':
2778 if (do_study == 0)
2779 {
2780 do_study = 1;
2781 if (*pp == '+')
2782 {
2783 if (*(++pp) == '+')
2784 {
2785 verify_jit = TRUE;
2786 pp++;
2787 }
2788 study_options |= PCRE_STUDY_JIT_COMPILE
2789 | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
2790 | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
2791 }
2792 }
2793 else
2794 {
2795 do_study = 0;
2796 no_force_study = 1;
2797 }
2798 break;
2799
2800 case 'U': options |= PCRE_UNGREEDY; break;
2801 case 'W': options |= PCRE_UCP; break;
2802 case 'X': options |= PCRE_EXTRA; break;
2803 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2804 case 'Z': debug_lengths = 0; break;
2805 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2806 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2807
2808 case 'T':
2809 switch (*pp++)
2810 {
2811 case '0': tables = tables0; break;
2812 case '1': tables = tables1; break;
2813
2814 case '\r':
2815 case '\n':
2816 case ' ':
2817 case 0:
2818 fprintf(outfile, "** Missing table number after /T\n");
2819 goto SKIP_DATA;
2820
2821 default:
2822 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2823 goto SKIP_DATA;
2824 }
2825 break;
2826
2827 case 'L':
2828 ppp = pp;
2829 /* The '\r' test here is so that it works on Windows. */
2830 /* The '0' test is just in case this is an unterminated line. */
2831 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2832 *ppp = 0;
2833 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2834 {
2835 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2836 goto SKIP_DATA;
2837 }
2838 locale_set = 1;
2839 tables = PCRE_MAKETABLES;
2840 pp = ppp;
2841 break;
2842
2843 case '>':
2844 to_file = pp;
2845 while (*pp != 0) pp++;
2846 while (isspace(pp[-1])) pp--;
2847 *pp = 0;
2848 break;
2849
2850 case '<':
2851 {
2852 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2853 {
2854 options |= PCRE_JAVASCRIPT_COMPAT;
2855 pp += 3;
2856 }
2857 else
2858 {
2859 int x = check_newline(pp, outfile);
2860 if (x == 0) goto SKIP_DATA;
2861 options |= x;
2862 while (*pp++ != '>');
2863 }
2864 }
2865 break;
2866
2867 case '\r': /* So that it works in Windows */
2868 case '\n':
2869 case ' ':
2870 break;
2871
2872 default:
2873 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2874 goto SKIP_DATA;
2875 }
2876 }
2877
2878 /* Handle compiling via the POSIX interface, which doesn't support the
2879 timing, showing, or debugging options, nor the ability to pass over
2880 local character tables. Neither does it have 16-bit support. */
2881
2882 #if !defined NOPOSIX
2883 if (posix || do_posix)
2884 {
2885 int rc;
2886 int cflags = 0;
2887
2888 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2889 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2890 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2891 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2892 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2893 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2894 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2895
2896 first_gotten_store = 0;
2897 rc = regcomp(&preg, (char *)p, cflags);
2898
2899 /* Compilation failed; go back for another re, skipping to blank line
2900 if non-interactive. */
2901
2902 if (rc != 0)
2903 {
2904 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2905 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2906 goto SKIP_DATA;
2907 }
2908 }
2909
2910 /* Handle compiling via the native interface */
2911
2912 else
2913 #endif /* !defined NOPOSIX */
2914
2915 {
2916 /* In 16-bit mode, convert the input. */
2917
2918 #ifdef SUPPORT_PCRE16
2919 if (use_pcre16)
2920 {
2921 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2922 {
2923 case -1:
2924 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2925 "converted to UTF-16\n");
2926 goto SKIP_DATA;
2927
2928 case -2:
2929 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2930 "cannot be converted to UTF-16\n");
2931 goto SKIP_DATA;
2932
2933 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2934 fprintf(outfile, "**Failed: character value greater than 0xffff "
2935 "cannot be converted to 16-bit in non-UTF mode\n");
2936 goto SKIP_DATA;
2937
2938 default:
2939 break;
2940 }
2941 p = (pcre_uint8 *)buffer16;
2942 }
2943 #endif
2944
2945 /* Compile many times when timing */
2946
2947 if (timeit > 0)
2948 {
2949 register int i;
2950 clock_t time_taken;
2951 clock_t start_time = clock();
2952 for (i = 0; i < timeit; i++)
2953 {
2954 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2955 if (re != NULL) free(re);
2956 }
2957 time_taken = clock() - start_time;
2958 fprintf(outfile, "Compile time %.4f milliseconds\n",
2959 (((double)time_taken * 1000.0) / (double)timeit) /
2960 (double)CLOCKS_PER_SEC);
2961 }
2962
2963 first_gotten_store = 0;
2964 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2965
2966 /* Compilation failed; go back for another re, skipping to blank line
2967 if non-interactive. */
2968
2969 if (re == NULL)
2970 {
2971 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2972 SKIP_DATA:
2973 if (infile != stdin)
2974 {
2975 for (;;)
2976 {
2977 if (extend_inputline(infile, buffer, NULL) == NULL)
2978 {
2979 done = 1;
2980 goto CONTINUE;
2981 }
2982 len = (int)strlen((char *)buffer);
2983 while (len > 0 && isspace(buffer[len-1])) len--;
2984 if (len == 0) break;
2985 }
2986 fprintf(outfile, "\n");
2987 }
2988 goto CONTINUE;
2989 }
2990
2991 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2992 within the regex; check for this so that we know how to process the data
2993 lines. */
2994
2995 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2996 goto SKIP_DATA;
2997 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2998
2999 /* Extract the size for possible writing before possibly flipping it,
3000 and remember the store that was got. */
3001
3002 true_size = ((REAL_PCRE *)re)->size;
3003 regex_gotten_store = first_gotten_store;
3004
3005 /* Output code size information if requested */
3006
3007 if (log_store)
3008 fprintf(outfile, "Memory allocation (code space): %d\n",
3009 (int)(first_gotten_store -
3010 sizeof(REAL_PCRE) -
3011 ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3012
3013 /* If -s or /S was present, study the regex to generate additional info to
3014 help with the matching, unless the pattern has the SS option, which
3015 suppresses the effect of /S (used for a few test patterns where studying is
3016 never sensible). */
3017
3018 if (do_study || (force_study >= 0 && !no_force_study))
3019 {
3020 if (timeit > 0)
3021 {
3022 register int i;
3023 clock_t time_taken;
3024 clock_t start_time = clock();
3025 for (i = 0; i < timeit; i++)
3026 {
3027 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3028 }
3029 time_taken = clock() - start_time;
3030 if (extra != NULL)
3031 {
3032 PCRE_FREE_STUDY(extra);
3033 }
3034 fprintf(outfile, " Study time %.4f milliseconds\n",
3035 (((double)time_taken * 1000.0) / (double)timeit) /
3036 (double)CLOCKS_PER_SEC);
3037 }
3038 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3039 if (error != NULL)
3040 fprintf(outfile, "Failed to study: %s\n", error);
3041 else if (extra != NULL)
3042 {
3043 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3044 if (log_store)
3045 {
3046 size_t jitsize;
3047 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3048 jitsize != 0)
3049 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3050 }
3051 }
3052 }
3053
3054 /* If /K was present, we set up for handling MARK data. */
3055
3056 if (do_mark)
3057 {
3058 if (extra == NULL)
3059 {
3060 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3061 extra->flags = 0;
3062 }
3063 extra->mark = &markptr;
3064 extra->flags |= PCRE_EXTRA_MARK;
3065 }
3066
3067 /* Extract and display information from the compiled data if required. */
3068
3069 SHOW_INFO:
3070
3071 if (do_debug)
3072 {
3073 fprintf(outfile, "------------------------------------------------------------------\n");
3074 PCRE_PRINTINT(re, outfile, debug_lengths);
3075 }
3076
3077 /* We already have the options in get_options (see above) */
3078
3079 if (do_showinfo)
3080 {
3081 unsigned long int all_options;
3082 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3083 hascrorlf;
3084 int nameentrysize, namecount;
3085 const pcre_uint8 *nametable;
3086
3087 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3088 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3089 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3090 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3091 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3092 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3093 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3094 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3095 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3096 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3097 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3098 != 0)
3099 goto SKIP_DATA;
3100
3101 if (size != regex_gotten_store) fprintf(outfile,
3102 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3103 (int)size, (int)regex_gotten_store);
3104
3105 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3106 if (backrefmax > 0)
3107 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3108
3109 if (namecount > 0)
3110 {
3111 fprintf(outfile, "Named capturing subpatterns:\n");
3112 while (namecount-- > 0)
3113 {
3114 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3115 int imm2_size = use_pcre16 ? 1 : 2;
3116 #else
3117 int imm2_size = IMM2_SIZE;
3118 #endif
3119 int length = (int)STRLEN(nametable + imm2_size);
3120 fprintf(outfile, " ");
3121 PCHARSV(nametable, imm2_size, length, outfile);
3122 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3123 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3124 fprintf(outfile, "%3d\n", use_pcre16?
3125 (int)(((PCRE_SPTR16)nametable)[0])
3126 :((int)nametable[0] << 8) | (int)nametable[1]);
3127 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3128 #else
3129 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3130 #ifdef SUPPORT_PCRE8
3131 nametable += nameentrysize;
3132 #else
3133 nametable += nameentrysize * 2;
3134 #endif
3135 #endif
3136 }
3137 }
3138
3139 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3140 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3141
3142 all_options = ((REAL_PCRE *)re)->options;
3143 if (do_flip) all_options = swap_uint32(all_options);
3144
3145 if (get_options == 0) fprintf(outfile, "No options\n");
3146 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3147 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3148 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3149 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3150 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3151 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3152 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3153 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3154 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3155 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3156 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3157 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3158 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3159 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3160 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3161 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3162 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3163 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3164
3165 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3166
3167 switch (get_options & PCRE_NEWLINE_BITS)
3168 {
3169 case PCRE_NEWLINE_CR:
3170 fprintf(outfile, "Forced newline sequence: CR\n");
3171 break;
3172
3173 case PCRE_NEWLINE_LF:
3174 fprintf(outfile, "Forced newline sequence: LF\n");
3175 break;
3176
3177 case PCRE_NEWLINE_CRLF:
3178 fprintf(outfile, "Forced newline sequence: CRLF\n");
3179 break;
3180
3181 case PCRE_NEWLINE_ANYCRLF:
3182 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3183 break;
3184
3185 case PCRE_NEWLINE_ANY:
3186 fprintf(outfile, "Forced newline sequence: ANY\n");
3187 break;
3188
3189 default:
3190 break;
3191 }
3192
3193 if (first_char == -1)
3194 {
3195 fprintf(outfile, "First char at start or follows newline\n");
3196 }
3197 else if (first_char < 0)
3198 {
3199 fprintf(outfile, "No first char\n");
3200 }
3201 else
3202 {
3203 const char *caseless =
3204 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3205 "" : " (caseless)";
3206
3207 if (PRINTOK(first_char))
3208 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3209 else
3210 {
3211 fprintf(outfile, "First char = ");
3212 pchar(first_char, outfile);
3213 fprintf(outfile, "%s\n", caseless);
3214 }
3215 }
3216
3217 if (need_char < 0)
3218 {
3219 fprintf(outfile, "No need char\n");
3220 }
3221 else
3222 {
3223 const char *caseless =
3224 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3225 "" : " (caseless)";
3226
3227 if (PRINTOK(need_char))
3228 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3229 else
3230 {
3231 fprintf(outfile, "Need char = ");
3232 pchar(need_char, outfile);
3233 fprintf(outfile, "%s\n", caseless);
3234 }
3235 }
3236
3237 /* Don't output study size; at present it is in any case a fixed
3238 value, but it varies, depending on the computer architecture, and
3239 so messes up the test suite. (And with the /F option, it might be
3240 flipped.) If study was forced by an external -s, don't show this
3241 information unless -i or -d was also present. This means that, except
3242 when auto-callouts are involved, the output from runs with and without
3243 -s should be identical. */
3244
3245 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3246 {
3247 if (extra == NULL)
3248 fprintf(outfile, "Study returned NULL\n");
3249 else
3250 {
3251 pcre_uint8 *start_bits = NULL;
3252 int minlength;
3253
3254 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3255 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3256
3257 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3258 {
3259 if (start_bits == NULL)
3260 fprintf(outfile, "No set of starting bytes\n");
3261 else
3262 {
3263 int i;
3264 int c = 24;
3265 fprintf(outfile, "Starting byte set: ");
3266 for (i = 0; i < 256; i++)
3267 {
3268 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3269 {
3270 if (c > 75)
3271 {
3272 fprintf(outfile, "\n ");
3273 c = 2;
3274 }
3275 if (PRINTOK(i) && i != ' ')
3276 {
3277 fprintf(outfile, "%c ", i);
3278 c += 2;
3279 }
3280 else
3281 {
3282 fprintf(outfile, "\\x%02x ", i);
3283 c += 5;
3284 }
3285 }
3286 }
3287 fprintf(outfile, "\n");
3288 }
3289 }
3290 }
3291
3292 /* Show this only if the JIT was set by /S, not by -s. */
3293
3294 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3295 {
3296 int jit;
3297 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3298 {
3299 if (jit)
3300 fprintf(outfile, "JIT study was successful\n");
3301 else
3302 #ifdef SUPPORT_JIT
3303 fprintf(outfile, "JIT study was not successful\n");
3304 #else
3305 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3306 #endif
3307 }
3308 }
3309 }
3310 }
3311
3312 /* If the '>' option was present, we write out the regex to a file, and
3313 that is all. The first 8 bytes of the file are the regex length and then
3314 the study length, in big-endian order. */
3315
3316 if (to_file != NULL)
3317 {
3318 FILE *f = fopen((char *)to_file, "wb");
3319 if (f == NULL)
3320 {
3321 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3322 }
3323 else
3324 {
3325 pcre_uint8 sbuf[8];
3326
3327 if (do_flip) regexflip(re, extra);
3328 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3329 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3330 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3331 sbuf[3] = (pcre_uint8)((true_size) & 255);
3332 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3333 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3334 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3335 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3336
3337 if (fwrite(sbuf, 1, 8, f) < 8 ||
3338 fwrite(re, 1, true_size, f) < true_size)
3339 {
3340 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3341 }
3342 else
3343 {
3344 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3345
3346 /* If there is study data, write it. */
3347
3348 if (extra != NULL)
3349 {
3350 if (fwrite(extra->study_data, 1, true_study_size, f) <
3351 true_study_size)
3352 {
3353 fprintf(outfile, "Write error on %s: %s\n", to_file,
3354 strerror(errno));
3355 }
3356 else fprintf(outfile, "Study data written to %s\n", to_file);
3357 }
3358 }
3359 fclose(f);
3360 }
3361
3362 new_free(re);
3363 if (extra != NULL)
3364 {
3365 PCRE_FREE_STUDY(extra);
3366 }
3367 if (locale_set)
3368 {
3369 new_free((void *)tables);
3370 setlocale(LC_CTYPE, "C");
3371 locale_set = 0;
3372 }
3373 continue; /* With next regex */
3374 }
3375 } /* End of non-POSIX compile */
3376
3377 /* Read data lines and test them */
3378
3379 for (;;)
3380 {
3381 pcre_uint8 *q;
3382 pcre_uint8 *bptr;
3383 int *use_offsets = offsets;
3384 int use_size_offsets = size_offsets;
3385 int callout_data = 0;
3386 int callout_data_set = 0;
3387 int count, c;
3388 int copystrings = 0;
3389 int find_match_limit = default_find_match_limit;
3390 int getstrings = 0;
3391 int getlist = 0;
3392 int gmatched = 0;
3393 int start_offset = 0;
3394 int start_offset_sign = 1;
3395 int g_notempty = 0;
3396 int use_dfa = 0;
3397 int jit_was_used = 0;
3398
3399 *copynames = 0;
3400 *getnames = 0;
3401
3402 #ifdef SUPPORT_PCRE16
3403 cn16ptr = copynames;
3404 gn16ptr = getnames;
3405 #endif
3406 #ifdef SUPPORT_PCRE8
3407 cn8ptr = copynames8;
3408 gn8ptr = getnames8;
3409 #endif
3410
3411 SET_PCRE_CALLOUT(callout);
3412 first_callout = 1;
3413 last_callout_mark = NULL;
3414 callout_extra = 0;
3415 callout_count = 0;
3416 callout_fail_count = 999999;
3417 callout_fail_id = -1;
3418 show_malloc = 0;
3419 options = 0;
3420
3421 if (extra != NULL) extra->flags &=
3422 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3423
3424 len = 0;
3425 for (;;)
3426 {
3427 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3428 {
3429 if (len > 0) /* Reached EOF without hitting a newline */
3430 {
3431 fprintf(outfile, "\n");
3432 break;
3433 }
3434 done = 1;
3435 goto CONTINUE;
3436 }
3437 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3438 len = (int)strlen((char *)buffer);
3439 if (buffer[len-1] == '\n') break;
3440 }
3441
3442 while (len > 0 && isspace(buffer[len-1])) len--;
3443 buffer[len] = 0;
3444 if (len == 0) break;
3445
3446 p = buffer;
3447 while (isspace(*p)) p++;
3448
3449 bptr = q = dbuffer;
3450 while ((c = *p++) != 0)
3451 {
3452 int i = 0;
3453 int n = 0;
3454
3455 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3456 In non-UTF mode, allow the value of the byte to fall through to later,
3457 where values greater than 127 are turned into UTF-8 when running in
3458 16-bit mode. */
3459
3460 if (c != '\\')
3461 {
3462 if (use_utf)
3463 {
3464 *q++ = c;
3465 continue;
3466 }
3467 }
3468
3469 /* Handle backslash escapes */
3470
3471 else switch ((c = *p++))
3472 {
3473 case 'a': c = 7; break;
3474 case 'b': c = '\b'; break;
3475 case 'e': c = 27; break;
3476 case 'f': c = '\f'; break;
3477 case 'n': c = '\n'; break;
3478 case 'r': c = '\r'; break;
3479 case 't': c = '\t'; break;
3480 case 'v': c = '\v'; break;
3481
3482 case '0': case '1': case '2': case '3':
3483 case '4': case '5': case '6': case '7':
3484 c -= '0';
3485 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3486 c = c * 8 + *p++ - '0';
3487 break;
3488
3489 case 'x':
3490 if (*p == '{')
3491 {
3492 pcre_uint8 *pt = p;
3493 c = 0;
3494
3495 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3496 when isxdigit() is a macro that refers to its argument more than
3497 once. This is banned by the C Standard, but apparently happens in at
3498 least one MacOS environment. */
3499
3500 for (pt++; isxdigit(*pt); pt++)
3501 {
3502 if (++i == 9)
3503 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3504 "using only the first eight.\n");
3505 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3506 }
3507 if (*pt == '}')
3508 {
3509 p = pt + 1;
3510 break;
3511 }
3512 /* Not correct form for \x{...}; fall through */
3513 }
3514
3515 /* \x without {} always defines just one byte in 8-bit mode. This
3516 allows UTF-8 characters to be constructed byte by byte, and also allows
3517 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3518 Otherwise, pass it down to later code so that it can be turned into
3519 UTF-8 when running in 16-bit mode. */
3520
3521 c = 0;
3522 while (i++ < 2 && isxdigit(*p))
3523 {
3524 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3525 p++;
3526 }
3527 if (use_utf)
3528 {
3529 *q++ = c;
3530 continue;
3531 }
3532 break;
3533
3534 case 0: /* \ followed by EOF allows for an empty line */
3535 p--;
3536 continue;
3537
3538 case '>':
3539 if (*p == '-')
3540 {
3541 start_offset_sign = -1;
3542 p++;
3543 }
3544 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3545 start_offset *= start_offset_sign;
3546 continue;
3547
3548 case 'A': /* Option setting */
3549 options |= PCRE_ANCHORED;
3550 continue;
3551
3552 case 'B':
3553 options |= PCRE_NOTBOL;
3554 continue;
3555
3556 case 'C':
3557 if (isdigit(*p)) /* Set copy string */
3558 {
3559 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3560 copystrings |= 1 << n;
3561 }
3562 else if (isalnum(*p))
3563 {
3564 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3565 }
3566 else if (*p == '+')
3567 {
3568 callout_extra = 1;
3569 p++;
3570 }
3571 else if (*p == '-')
3572 {
3573 SET_PCRE_CALLOUT(NULL);
3574 p++;
3575 }
3576 else if (*p == '!')
3577 {
3578 callout_fail_id = 0;
3579 p++;
3580 while(isdigit(*p))
3581 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3582 callout_fail_count = 0;
3583 if (*p == '!')
3584 {
3585 p++;
3586 while(isdigit(*p))
3587 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3588 }
3589 }
3590 else if (*p == '*')
3591 {
3592 int sign = 1;
3593 callout_data = 0;
3594 if (*(++p) == '-') { sign = -1; p++; }
3595 while(isdigit(*p))
3596 callout_data = callout_data * 10 + *p++ - '0';
3597 callout_data *= sign;
3598 callout_data_set = 1;
3599 }
3600 continue;
3601
3602 #if !defined NODFA
3603 case 'D':
3604 #if !defined NOPOSIX
3605 if (posix || do_posix)
3606 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3607 else
3608 #endif
3609 use_dfa = 1;
3610 continue;
3611 #endif
3612
3613 #if !defined NODFA
3614 case 'F':
3615 options |= PCRE_DFA_SHORTEST;
3616 continue;
3617 #endif
3618
3619 case 'G':
3620 if (isdigit(*p))
3621 {
3622 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3623 getstrings |= 1 << n;
3624 }
3625 else if (isalnum(*p))
3626 {
3627 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3628 }
3629 continue;
3630
3631 case 'J':
3632 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3633 if (extra != NULL
3634 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3635 && extra->executable_jit != NULL)
3636 {
3637 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3638 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3639 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3640 }
3641 continue;
3642
3643 case 'L':
3644 getlist = 1;
3645 continue;
3646
3647 case 'M':
3648 find_match_limit = 1;
3649 continue;
3650
3651 case 'N':
3652 if ((options & PCRE_NOTEMPTY) != 0)
3653 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3654 else
3655 options |= PCRE_NOTEMPTY;
3656 continue;
3657
3658 case 'O':
3659 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3660 if (n > size_offsets_max)
3661 {
3662 size_offsets_max = n;
3663 free(offsets);
3664 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3665 if (offsets == NULL)
3666 {
3667 printf("** Failed to get %d bytes of memory for offsets vector\n",
3668 (int)(size_offsets_max * sizeof(int)));
3669 yield = 1;
3670 goto EXIT;
3671 }
3672 }
3673 use_size_offsets = n;
3674 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3675 continue;
3676
3677 case 'P':
3678 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3679 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3680 continue;
3681
3682 case 'Q':
3683 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3684 if (extra == NULL)
3685 {
3686 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3687 extra->flags = 0;
3688 }
3689 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3690 extra->match_limit_recursion = n;
3691 continue;
3692
3693 case 'q':
3694 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3695 if (extra == NULL)
3696 {
3697 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3698 extra->flags = 0;
3699 }
3700 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3701 extra->match_limit = n;
3702 continue;
3703
3704 #if !defined NODFA
3705 case 'R':
3706 options |= PCRE_DFA_RESTART;
3707 continue;
3708 #endif
3709
3710 case 'S':
3711 show_malloc = 1;
3712 continue;
3713
3714 case 'Y':
3715 options |= PCRE_NO_START_OPTIMIZE;
3716 continue;
3717
3718 case 'Z':
3719 options |= PCRE_NOTEOL;
3720 continue;
3721
3722 case '?':
3723 options |= PCRE_NO_UTF8_CHECK;
3724 continue;
3725
3726 case '<':
3727 {
3728 int x = check_newline(p, outfile);
3729 if (x == 0) goto NEXT_DATA;
3730 options |= x;
3731 while (*p++ != '>');
3732 }
3733 continue;
3734 }
3735
3736 /* We now have a character value in c that may be greater than 255. In
3737 16-bit mode, we always convert characters to UTF-8 so that values greater
3738 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3739 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3740 mode must have come from \x{...} or octal constructs because values from
3741 \x.. get this far only in non-UTF mode. */
3742
3743 #if !defined NOUTF || defined SUPPORT_PCRE16
3744 if (use_pcre16 || use_utf)
3745 {
3746 pcre_uint8 buff8[8];
3747 int ii, utn;
3748 utn = ord2utf8(c, buff8);
3749 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3750 }
3751 else
3752 #endif
3753 {
3754 if (c > 255)
3755 {
3756 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3757 "and UTF-8 mode is not enabled.\n", c);
3758 fprintf(outfile, "** Truncation will probably give the wrong "
3759 "result.\n");
3760 }
3761 *q++ = c;
3762 }
3763 }
3764
3765 /* Reached end of subject string */
3766
3767 *q = 0;
3768 len = (int)(q - dbuffer);
3769
3770 /* Move the data to the end of the buffer so that a read over the end of
3771 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3772 we are using the POSIX interface, we must include the terminating zero. */
3773
3774 #if !defined NOPOSIX
3775 if (posix || do_posix)
3776 {
3777 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3778 bptr += buffer_size - len - 1;
3779 }
3780 else
3781 #endif
3782 {
3783 memmove(bptr + buffer_size - len, bptr, len);
3784 bptr += buffer_size - len;
3785 }
3786
3787 if ((all_use_dfa || use_dfa) && find_match_limit)
3788 {
3789 printf("**Match limit not relevant for DFA matching: ignored\n");
3790 find_match_limit = 0;
3791 }
3792
3793 /* Handle matching via the POSIX interface, which does not
3794 support timing or playing with the match limit or callout data. */
3795
3796 #if !defined NOPOSIX
3797 if (posix || do_posix)
3798 {
3799 int rc;
3800 int eflags = 0;
3801 regmatch_t *pmatch = NULL;
3802 if (use_size_offsets > 0)
3803 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3804 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3805 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3806 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3807
3808 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3809
3810 if (rc != 0)
3811 {
3812 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3813 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3814 }
3815 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3816 != 0)
3817 {
3818 fprintf(outfile, "Matched with REG_NOSUB\n");
3819 }
3820 else
3821 {
3822 size_t i;
3823 for (i = 0; i < (size_t)use_size_offsets; i++)
3824 {
3825 if (pmatch[i].rm_so >= 0)
3826 {
3827 fprintf(outfile, "%2d: ", (int)i);
3828 PCHARSV(dbuffer, pmatch[i].rm_so,
3829 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3830 fprintf(outfile, "\n");
3831 if (do_showcaprest || (i == 0 && do_showrest))
3832 {
3833 fprintf(outfile, "%2d+ ", (int)i);
3834 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3835 outfile);
3836 fprintf(outfile, "\n");
3837 }
3838 }
3839 }
3840 }
3841 free(pmatch);
3842 goto NEXT_DATA;
3843 }
3844
3845 #endif /* !defined NOPOSIX */
3846
3847 /* Handle matching via the native interface - repeats for /g and /G */
3848
3849 #ifdef SUPPORT_PCRE16
3850 if (use_pcre16)
3851 {
3852 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3853 switch(len)
3854 {
3855 case -1:
3856 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3857 "converted to UTF-16\n");
3858 goto NEXT_DATA;
3859
3860 case -2:
3861 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3862 "cannot be converted to UTF-16\n");
3863 goto NEXT_DATA;
3864
3865 case -3:
3866 fprintf(outfile, "**Failed: character value greater than 0xffff "
3867 "cannot be converted to 16-bit in non-UTF mode\n");
3868 goto NEXT_DATA;
3869
3870 default:
3871 break;
3872 }
3873 bptr = (pcre_uint8 *)buffer16;
3874 }
3875 #endif
3876
3877 for (;; gmatched++) /* Loop for /g or /G */
3878 {
3879 markptr = NULL;
3880
3881 if (timeitm > 0)
3882 {
3883 register int i;
3884 clock_t time_taken;
3885 clock_t start_time = clock();
3886
3887 #if !defined NODFA
3888 if (all_use_dfa || use_dfa)
3889 {
3890 int workspace[1000];
3891 for (i = 0; i < timeitm; i++)
3892 {
3893 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3894 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3895 (sizeof(workspace)/sizeof(int)));
3896 }
3897 }
3898 else
3899 #endif
3900
3901 for (i = 0; i < timeitm; i++)
3902 {
3903 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3904 (options | g_notempty), use_offsets, use_size_offsets);
3905 }
3906 time_taken = clock() - start_time;
3907 fprintf(outfile, "Execute time %.4f milliseconds\n",
3908 (((double)time_taken * 1000.0) / (double)timeitm) /
3909 (double)CLOCKS_PER_SEC);
3910 }
3911
3912 /* If find_match_limit is set, we want to do repeated matches with
3913 varying limits in order to find the minimum value for the match limit and
3914 for the recursion limit. The match limits are relevant only to the normal
3915 running of pcre_exec(), so disable the JIT optimization. This makes it
3916 possible to run the same set of tests with and without JIT externally
3917 requested. */
3918
3919 if (find_match_limit)
3920 {
3921 if (extra == NULL)
3922 {
3923 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3924 extra->flags = 0;
3925 }
3926 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3927
3928 (void)check_match_limit(re, extra, bptr, len, start_offset,
3929 options|g_notempty, use_offsets, use_size_offsets,
3930 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3931 PCRE_ERROR_MATCHLIMIT, "match()");
3932
3933 count = check_match_limit(re, extra, bptr, len, start_offset,
3934 options|g_notempty, use_offsets, use_size_offsets,
3935 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3936 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3937 }
3938
3939 /* If callout_data is set, use the interface with additional data */
3940
3941 else if (callout_data_set)
3942 {
3943 if (extra == NULL)
3944 {
3945 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3946 extra->flags = 0;
3947 }
3948 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3949 extra->callout_data = &callout_data;
3950 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3951 options | g_notempty, use_offsets, use_size_offsets);
3952 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3953 }
3954
3955 /* The normal case is just to do the match once, with the default
3956 value of match_limit. */
3957
3958 #if !defined NODFA
3959 else if (all_use_dfa || use_dfa)
3960 {
3961 int workspace[1000];
3962 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3963 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3964 (sizeof(workspace)/sizeof(int)));
3965 if (count == 0)
3966 {
3967 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3968 count = use_size_offsets/2;
3969 }
3970 }
3971 #endif
3972
3973 else
3974 {
3975 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3976 options | g_notempty, use_offsets, use_size_offsets);
3977 if (count == 0)
3978 {
3979 fprintf(outfile, "Matched, but too many substrings\n");
3980 count = use_size_offsets/3;
3981 }
3982 }
3983
3984 if (extra != NULL && (extra->flags & PCRE_EXTRA_USED_JIT) != 0)
3985 jit_was_used = TRUE;
3986
3987 /* Matched */
3988
3989 if (count >= 0)
3990 {
3991 int i, maxcount;
3992 void *cnptr, *gnptr;
3993
3994 #if !defined NODFA
3995 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3996 #endif
3997 maxcount = use_size_offsets/3;
3998
3999 /* This is a check against a lunatic return value. */
4000
4001 if (count > maxcount)
4002 {
4003 fprintf(outfile,
4004 "** PCRE error: returned count %d is too big for offset size %d\n",
4005 count, use_size_offsets);
4006 count = use_size_offsets/3;
4007 if (do_g || do_G)
4008 {
4009 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4010 do_g = do_G = FALSE; /* Break g/G loop */
4011 }
4012 }
4013
4014 /* do_allcaps requests showing of all captures in the pattern, to check
4015 unset ones at the end. */
4016
4017 if (do_allcaps)
4018 {
4019 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4020 goto SKIP_DATA;
4021 count++; /* Allow for full match */
4022 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4023 }
4024
4025 /* Output the captured substrings */
4026
4027 for (i = 0; i < count * 2; i += 2)
4028 {
4029 if (use_offsets[i] < 0)
4030 {
4031 if (use_offsets[i] != -1)
4032 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4033 use_offsets[i], i);
4034 if (use_offsets[i+1] != -1)
4035 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4036 use_offsets[i+1], i+1);
4037 fprintf(outfile, "%2d: <unset>\n", i/2);
4038 }
4039 else
4040 {
4041 fprintf(outfile, "%2d: ", i/2);
4042 PCHARSV(bptr, use_offsets[i],
4043 use_offsets[i+1] - use_offsets[i], outfile);
4044 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4045 fprintf(outfile, "\n");
4046 if (do_showcaprest || (i == 0 && do_showrest))
4047 {
4048 fprintf(outfile, "%2d+ ", i/2);
4049 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4050 outfile);
4051 fprintf(outfile, "\n");
4052 }
4053 }
4054 }
4055
4056 if (markptr != NULL)
4057 {
4058 fprintf(outfile, "MK: ");
4059 PCHARSV(markptr, 0, -1, outfile);
4060 fprintf(outfile, "\n");
4061 }
4062
4063 for (i = 0; i < 32; i++)
4064 {
4065 if ((copystrings & (1 << i)) != 0)
4066 {
4067 int rc;
4068 char copybuffer[256];
4069 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4070 copybuffer, sizeof(copybuffer));
4071 if (rc < 0)
4072 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4073 else
4074 {
4075 fprintf(outfile, "%2dC ", i);
4076 PCHARSV(copybuffer, 0, rc, outfile);
4077 fprintf(outfile, " (%d)\n", rc);
4078 }
4079 }
4080 }
4081
4082 cnptr = copynames;
4083 for (;;)
4084 {
4085 int rc;
4086 char copybuffer[256];
4087
4088 if (use_pcre16)
4089 {
4090 if (*(pcre_uint16 *)cnptr == 0) break;
4091 }
4092 else
4093 {
4094 if (*(pcre_uint8 *)cnptr == 0) break;
4095 }
4096
4097 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4098 cnptr, copybuffer, sizeof(copybuffer));
4099
4100 if (rc < 0)
4101 {
4102 fprintf(outfile, "copy substring ");
4103 PCHARSV(cnptr, 0, -1, outfile);
4104 fprintf(outfile, " failed %d\n", rc);
4105 }
4106 else
4107 {
4108 fprintf(outfile, " C ");
4109 PCHARSV(copybuffer, 0, rc, outfile);
4110 fprintf(outfile, " (%d) ", rc);
4111 PCHARSV(cnptr, 0, -1, outfile);
4112 putc('\n', outfile);
4113 }
4114
4115 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4116 }
4117
4118 for (i = 0; i < 32; i++)
4119 {
4120 if ((getstrings & (1 << i)) != 0)
4121 {
4122 int rc;
4123 const char *substring;
4124 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4125 if (rc < 0)
4126 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4127 else
4128 {
4129 fprintf(outfile, "%2dG ", i);
4130 PCHARSV(substring, 0, rc, outfile);
4131 fprintf(outfile, " (%d)\n", rc);
4132 PCRE_FREE_SUBSTRING(substring);
4133 }
4134 }
4135 }
4136
4137 gnptr = getnames;
4138 for (;;)
4139 {
4140 int rc;
4141 const char *substring;
4142
4143 if (use_pcre16)
4144 {
4145 if (*(pcre_uint16 *)gnptr == 0) break;
4146 }
4147 else
4148 {
4149 if (*(pcre_uint8 *)gnptr == 0) break;
4150 }
4151
4152 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4153 gnptr, &substring);
4154 if (rc < 0)
4155 {
4156 fprintf(outfile, "get substring ");
4157 PCHARSV(gnptr, 0, -1, outfile);
4158 fprintf(outfile, " failed %d\n", rc);
4159 }
4160 else
4161 {
4162 fprintf(outfile, " G ");
4163 PCHARSV(substring, 0, rc, outfile);
4164 fprintf(outfile, " (%d) ", rc);
4165 PCHARSV(gnptr, 0, -1, outfile);
4166 PCRE_FREE_SUBSTRING(substring);
4167 putc('\n', outfile);
4168 }
4169
4170 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4171 }
4172
4173 if (getlist)
4174 {
4175 int rc;
4176 const char **stringlist;
4177 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4178 if (rc < 0)
4179 fprintf(outfile, "get substring list failed %d\n", rc);
4180 else
4181 {
4182 for (i = 0; i < count; i++)
4183 {
4184 fprintf(outfile, "%2dL ", i);
4185 PCHARSV(stringlist[i], 0, -1, outfile);
4186 putc('\n', outfile);
4187 }
4188 if (stringlist[i] != NULL)
4189 fprintf(outfile, "string list not terminated by NULL\n");
4190 PCRE_FREE_SUBSTRING_LIST(stringlist);
4191 }
4192 }
4193 }
4194
4195 /* There was a partial match */
4196
4197 else if (count == PCRE_ERROR_PARTIAL)
4198 {
4199 if (markptr == NULL) fprintf(outfile, "Partial match");
4200 else
4201 {
4202 fprintf(outfile, "Partial match, mark=");
4203 PCHARSV(markptr, 0, -1, outfile);
4204 }
4205 if (use_size_offsets > 1)
4206 {
4207 fprintf(outfile, ": ");
4208 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4209 outfile);
4210 }
4211 fprintf(outfile, "\n");
4212 break; /* Out of the /g loop */
4213 }
4214
4215 /* Failed to match. If this is a /g or /G loop and we previously set
4216 g_notempty after a null match, this is not necessarily the end. We want
4217 to advance the start offset, and continue. We won't be at the end of the
4218 string - that was checked before setting g_notempty.
4219
4220 Complication arises in the case when the newline convention is "any",
4221 "crlf", or "anycrlf". If the previous match was at the end of a line
4222 terminated by CRLF, an advance of one character just passes the \r,
4223 whereas we should prefer the longer newline sequence, as does the code in
4224 pcre_exec(). Fudge the offset value to achieve this. We check for a
4225 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4226 find the default.
4227
4228 Otherwise, in the case of UTF-8 matching, the advance must be one
4229 character, not one byte. */
4230
4231 else
4232 {
4233 if (g_notempty != 0)
4234 {
4235 int onechar = 1;
4236 unsigned int obits = ((REAL_PCRE *)re)->options;
4237 use_offsets[0] = start_offset;
4238 if ((obits & PCRE_NEWLINE_BITS) == 0)
4239 {
4240 int d;
4241 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4242 /* Note that these values are always the ASCII ones, even in
4243 EBCDIC environments. CR = 13, NL = 10. */
4244 obits = (d == 13)? PCRE_NEWLINE_CR :
4245 (d == 10)? PCRE_NEWLINE_LF :
4246 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4247 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4248 (d == -1)? PCRE_NEWLINE_ANY : 0;
4249 }
4250 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4251 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4252 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4253 &&
4254 start_offset < len - 1 &&
4255 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4256 (use_pcre16?
4257 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4258 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4259 :
4260 bptr[start_offset] == '\r'
4261 && bptr[start_offset + 1] == '\n')
4262 #elif defined SUPPORT_PCRE16
4263 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4264 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4265 #else
4266 bptr[start_offset] == '\r'
4267 && bptr[start_offset + 1] == '\n'
4268 #endif
4269 )
4270 onechar++;
4271 else if (use_utf)
4272 {
4273 while (start_offset + onechar < len)
4274 {
4275 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4276 onechar++;
4277 }
4278 }
4279 use_offsets[1] = start_offset + onechar;
4280 }
4281 else
4282 {
4283 switch(count)
4284 {
4285 case PCRE_ERROR_NOMATCH:
4286 if (gmatched == 0)
4287 {
4288 if (markptr == NULL)
4289 {
4290 fprintf(outfile, "No match");
4291 }
4292 else
4293 {
4294 fprintf(outfile, "No match, mark = ");
4295 PCHARSV(markptr, 0, -1, outfile);
4296 }
4297 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4298 putc('\n', outfile);
4299 }
4300 break;
4301
4302 case PCRE_ERROR_BADUTF8:
4303 case PCRE_ERROR_SHORTUTF8:
4304 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4305 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4306 use_pcre16? "16" : "8");
4307 if (use_size_offsets >= 2)
4308 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4309 use_offsets[1]);
4310 fprintf(outfile, "\n");
4311 break;
4312
4313 case PCRE_ERROR_BADUTF8_OFFSET:
4314 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4315 use_pcre16? "16" : "8");
4316 break;
4317
4318 default:
4319 if (count < 0 &&
4320 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4321 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4322 else
4323 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4324 break;
4325 }
4326
4327 break; /* Out of the /g loop */
4328 }
4329 }
4330
4331 /* If not /g or /G we are done */
4332
4333 if (!do_g && !do_G) break;
4334
4335 /* If we have matched an empty string, first check to see if we are at
4336 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4337 Perl's /g options does. This turns out to be rather cunning. First we set
4338 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4339 same point. If this fails (picked up above) we advance to the next
4340 character. */
4341
4342 g_notempty = 0;
4343
4344 if (use_offsets[0] == use_offsets[1])
4345 {
4346 if (use_offsets[0] == len) break;
4347 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4348 }
4349
4350 /* For /g, update the start offset, leaving the rest alone */
4351
4352 if (do_g) start_offset = use_offsets[1];
4353
4354 /* For /G, update the pointer and length */
4355
4356 else
4357 {
4358 bptr += use_offsets[1] * CHAR_SIZE;
4359 len -= use_offsets[1];
4360 }
4361 } /* End of loop for /g and /G */
4362
4363 NEXT_DATA: continue;
4364 } /* End of loop for data lines */
4365
4366 CONTINUE:
4367
4368 #if !defined NOPOSIX
4369 if (posix || do_posix) regfree(&preg);
4370 #endif
4371
4372 if (re != NULL) new_free(re);
4373 if (extra != NULL)
4374 {
4375 PCRE_FREE_STUDY(extra);
4376 }
4377 if (locale_set)
4378 {
4379 new_free((void *)tables);
4380 setlocale(LC_CTYPE, "C");
4381 locale_set = 0;
4382 }
4383 if (jit_stack != NULL)
4384 {
4385 PCRE_JIT_STACK_FREE(jit_stack);
4386 jit_stack = NULL;
4387 }
4388 }
4389
4390 if (infile == stdin) fprintf(outfile, "\n");
4391
4392 EXIT:
4393
4394 if (infile != NULL && infile != stdin) fclose(infile);
4395 if (outfile != NULL && outfile != stdout) fclose(outfile);
4396
4397 free(buffer);
4398 free(dbuffer);
4399 free(pbuffer);
4400 free(offsets);
4401
4402 #ifdef SUPPORT_PCRE16
4403 if (buffer16 != NULL) free(buffer16);
4404 #endif
4405
4406 return yield;
4407 }
4408
4409 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5