/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1046 - (show annotations)
Tue Sep 25 16:27:58 2012 UTC (6 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 135284 byte(s)
Error occurred while calculating annotation data.
All the remaining changes for handling characters with more than one other 
case.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136
137 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
138 /* Configure internal macros to 16 bit mode. */
139 #define COMPILE_PCRE16
140 #endif
141
142 #include "pcre_internal.h"
143
144 /* The pcre_printint() function, which prints the internal form of a compiled
145 regex, is held in a separate file so that (a) it can be compiled in either
146 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
147 when that is compiled in debug mode. */
148
149 #ifdef SUPPORT_PCRE8
150 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
152 #ifdef SUPPORT_PCRE16
153 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154 #endif
155
156 /* We need access to some of the data tables that PCRE uses. So as not to have
157 to keep two copies, we include the source files here, changing the names of the
158 external symbols to prevent clashes. */
159
160 #define PCRE_INCLUDED
161
162 #include "pcre_tables.c"
163 #include "pcre_ucd.c"
164
165 /* The definition of the macro PRINTABLE, which determines whether to print an
166 output character as-is or as a hex value when showing compiled patterns, is
167 the same as in the printint.src file. We uses it here in cases when the locale
168 has not been explicitly changed, so as to get consistent output from systems
169 that differ in their output from isprint() even in the "C" locale. */
170
171 #ifdef EBCDIC
172 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
173 #else
174 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
175 #endif
176
177 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
178
179 /* Posix support is disabled in 16 bit only mode. */
180 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
181 #define NOPOSIX
182 #endif
183
184 /* It is possible to compile this test program without including support for
185 testing the POSIX interface, though this is not available via the standard
186 Makefile. */
187
188 #if !defined NOPOSIX
189 #include "pcreposix.h"
190 #endif
191
192 /* It is also possible, originally for the benefit of a version that was
193 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
194 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
195 automatically cut out the UTF support if PCRE is built without it. */
196
197 #ifndef SUPPORT_UTF
198 #ifndef NOUTF
199 #define NOUTF
200 #endif
201 #endif
202
203 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
204 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
205 only from one place and is handled differently). I couldn't dream up any way of
206 using a single macro to do this in a generic way, because of the many different
207 argument requirements. We know that at least one of SUPPORT_PCRE8 and
208 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
209 use these in the definitions of generic macros.
210
211 **** Special note about the PCHARSxxx macros: the address of the string to be
212 printed is always given as two arguments: a base address followed by an offset.
213 The base address is cast to the correct data size for 8 or 16 bit data; the
214 offset is in units of this size. If the string were given as base+offset in one
215 argument, the casting might be incorrectly applied. */
216
217 #ifdef SUPPORT_PCRE8
218
219 #define PCHARS8(lv, p, offset, len, f) \
220 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
221
222 #define PCHARSV8(p, offset, len, f) \
223 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
224
225 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
226 p = read_capture_name8(p, cn8, re)
227
228 #define STRLEN8(p) ((int)strlen((char *)p))
229
230 #define SET_PCRE_CALLOUT8(callout) \
231 pcre_callout = callout
232
233 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
234 pcre_assign_jit_stack(extra, callback, userdata)
235
236 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
237 re = pcre_compile((char *)pat, options, error, erroffset, tables)
238
239 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
240 namesptr, cbuffer, size) \
241 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
242 (char *)namesptr, cbuffer, size)
243
244 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
245 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
246
247 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
248 offsets, size_offsets, workspace, size_workspace) \
249 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
250 offsets, size_offsets, workspace, size_workspace)
251
252 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
253 offsets, size_offsets) \
254 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
255 offsets, size_offsets)
256
257 #define PCRE_FREE_STUDY8(extra) \
258 pcre_free_study(extra)
259
260 #define PCRE_FREE_SUBSTRING8(substring) \
261 pcre_free_substring(substring)
262
263 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
264 pcre_free_substring_list(listptr)
265
266 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
267 getnamesptr, subsptr) \
268 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
269 (char *)getnamesptr, subsptr)
270
271 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
272 n = pcre_get_stringnumber(re, (char *)ptr)
273
274 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
275 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
276
277 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
278 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
279
280 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
281 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
282
283 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
284 pcre_printint(re, outfile, debug_lengths)
285
286 #define PCRE_STUDY8(extra, re, options, error) \
287 extra = pcre_study(re, options, error)
288
289 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
290 pcre_jit_stack_alloc(startsize, maxsize)
291
292 #define PCRE_JIT_STACK_FREE8(stack) \
293 pcre_jit_stack_free(stack)
294
295 #endif /* SUPPORT_PCRE8 */
296
297 /* -----------------------------------------------------------*/
298
299 #ifdef SUPPORT_PCRE16
300
301 #define PCHARS16(lv, p, offset, len, f) \
302 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
303
304 #define PCHARSV16(p, offset, len, f) \
305 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
306
307 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
308 p = read_capture_name16(p, cn16, re)
309
310 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
311
312 #define SET_PCRE_CALLOUT16(callout) \
313 pcre16_callout = (int (*)(pcre16_callout_block *))callout
314
315 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
316 pcre16_assign_jit_stack((pcre16_extra *)extra, \
317 (pcre16_jit_callback)callback, userdata)
318
319 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
320 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
321 tables)
322
323 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
324 namesptr, cbuffer, size) \
325 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
326 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
327
328 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
329 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
330 (PCRE_UCHAR16 *)cbuffer, size/2)
331
332 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
333 offsets, size_offsets, workspace, size_workspace) \
334 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
335 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
336 workspace, size_workspace)
337
338 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
339 offsets, size_offsets) \
340 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
341 len, start_offset, options, offsets, size_offsets)
342
343 #define PCRE_FREE_STUDY16(extra) \
344 pcre16_free_study((pcre16_extra *)extra)
345
346 #define PCRE_FREE_SUBSTRING16(substring) \
347 pcre16_free_substring((PCRE_SPTR16)substring)
348
349 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
350 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
351
352 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
353 getnamesptr, subsptr) \
354 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
355 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
356
357 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
358 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
359
360 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
361 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
362 (PCRE_SPTR16 *)(void*)subsptr)
363
364 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
365 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
366 (PCRE_SPTR16 **)(void*)listptr)
367
368 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
369 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
370 tables)
371
372 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
373 pcre16_printint(re, outfile, debug_lengths)
374
375 #define PCRE_STUDY16(extra, re, options, error) \
376 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
377
378 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
379 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
380
381 #define PCRE_JIT_STACK_FREE16(stack) \
382 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
383
384 #endif /* SUPPORT_PCRE16 */
385
386
387 /* ----- Both modes are supported; a runtime test is needed, except for
388 pcre_config(), and the JIT stack functions, when it doesn't matter which
389 version is called. ----- */
390
391 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
392
393 #define CHAR_SIZE (use_pcre16? 2:1)
394
395 #define PCHARS(lv, p, offset, len, f) \
396 if (use_pcre16) \
397 PCHARS16(lv, p, offset, len, f); \
398 else \
399 PCHARS8(lv, p, offset, len, f)
400
401 #define PCHARSV(p, offset, len, f) \
402 if (use_pcre16) \
403 PCHARSV16(p, offset, len, f); \
404 else \
405 PCHARSV8(p, offset, len, f)
406
407 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
408 if (use_pcre16) \
409 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
410 else \
411 READ_CAPTURE_NAME8(p, cn8, cn16, re)
412
413 #define SET_PCRE_CALLOUT(callout) \
414 if (use_pcre16) \
415 SET_PCRE_CALLOUT16(callout); \
416 else \
417 SET_PCRE_CALLOUT8(callout)
418
419 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
420
421 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
422 if (use_pcre16) \
423 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
424 else \
425 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
426
427 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
428 if (use_pcre16) \
429 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
430 else \
431 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
432
433 #define PCRE_CONFIG pcre_config
434
435 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
436 namesptr, cbuffer, size) \
437 if (use_pcre16) \
438 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
439 namesptr, cbuffer, size); \
440 else \
441 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
442 namesptr, cbuffer, size)
443
444 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
445 if (use_pcre16) \
446 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
447 else \
448 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
449
450 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
451 offsets, size_offsets, workspace, size_workspace) \
452 if (use_pcre16) \
453 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
454 offsets, size_offsets, workspace, size_workspace); \
455 else \
456 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
457 offsets, size_offsets, workspace, size_workspace)
458
459 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
460 offsets, size_offsets) \
461 if (use_pcre16) \
462 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
463 offsets, size_offsets); \
464 else \
465 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
466 offsets, size_offsets)
467
468 #define PCRE_FREE_STUDY(extra) \
469 if (use_pcre16) \
470 PCRE_FREE_STUDY16(extra); \
471 else \
472 PCRE_FREE_STUDY8(extra)
473
474 #define PCRE_FREE_SUBSTRING(substring) \
475 if (use_pcre16) \
476 PCRE_FREE_SUBSTRING16(substring); \
477 else \
478 PCRE_FREE_SUBSTRING8(substring)
479
480 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
481 if (use_pcre16) \
482 PCRE_FREE_SUBSTRING_LIST16(listptr); \
483 else \
484 PCRE_FREE_SUBSTRING_LIST8(listptr)
485
486 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
487 getnamesptr, subsptr) \
488 if (use_pcre16) \
489 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
490 getnamesptr, subsptr); \
491 else \
492 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
493 getnamesptr, subsptr)
494
495 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
496 if (use_pcre16) \
497 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
498 else \
499 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
500
501 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
502 if (use_pcre16) \
503 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
504 else \
505 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
506
507 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
508 if (use_pcre16) \
509 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
510 else \
511 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
512
513 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
514 (use_pcre16 ? \
515 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
516 :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
517
518 #define PCRE_JIT_STACK_FREE(stack) \
519 if (use_pcre16) \
520 PCRE_JIT_STACK_FREE16(stack); \
521 else \
522 PCRE_JIT_STACK_FREE8(stack)
523
524 #define PCRE_MAKETABLES \
525 (use_pcre16? pcre16_maketables() : pcre_maketables())
526
527 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
528 if (use_pcre16) \
529 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
530 else \
531 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
532
533 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
534 if (use_pcre16) \
535 PCRE_PRINTINT16(re, outfile, debug_lengths); \
536 else \
537 PCRE_PRINTINT8(re, outfile, debug_lengths)
538
539 #define PCRE_STUDY(extra, re, options, error) \
540 if (use_pcre16) \
541 PCRE_STUDY16(extra, re, options, error); \
542 else \
543 PCRE_STUDY8(extra, re, options, error)
544
545 /* ----- Only 8-bit mode is supported ----- */
546
547 #elif defined SUPPORT_PCRE8
548 #define CHAR_SIZE 1
549 #define PCHARS PCHARS8
550 #define PCHARSV PCHARSV8
551 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
552 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
553 #define STRLEN STRLEN8
554 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
555 #define PCRE_COMPILE PCRE_COMPILE8
556 #define PCRE_CONFIG pcre_config
557 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
558 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
559 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
560 #define PCRE_EXEC PCRE_EXEC8
561 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
562 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
563 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
564 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
565 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
566 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
567 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
568 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
569 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
570 #define PCRE_MAKETABLES pcre_maketables()
571 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
572 #define PCRE_PRINTINT PCRE_PRINTINT8
573 #define PCRE_STUDY PCRE_STUDY8
574
575 /* ----- Only 16-bit mode is supported ----- */
576
577 #else
578 #define CHAR_SIZE 2
579 #define PCHARS PCHARS16
580 #define PCHARSV PCHARSV16
581 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
582 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
583 #define STRLEN STRLEN16
584 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
585 #define PCRE_COMPILE PCRE_COMPILE16
586 #define PCRE_CONFIG pcre16_config
587 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
588 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
589 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
590 #define PCRE_EXEC PCRE_EXEC16
591 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
592 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
593 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
594 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
595 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
596 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
597 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
598 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
599 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
600 #define PCRE_MAKETABLES pcre16_maketables()
601 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
602 #define PCRE_PRINTINT PCRE_PRINTINT16
603 #define PCRE_STUDY PCRE_STUDY16
604 #endif
605
606 /* ----- End of mode-specific function call macros ----- */
607
608
609 /* Other parameters */
610
611 #ifndef CLOCKS_PER_SEC
612 #ifdef CLK_TCK
613 #define CLOCKS_PER_SEC CLK_TCK
614 #else
615 #define CLOCKS_PER_SEC 100
616 #endif
617 #endif
618
619 #if !defined NODFA
620 #define DFA_WS_DIMENSION 1000
621 #endif
622
623 /* This is the default loop count for timing. */
624
625 #define LOOPREPEAT 500000
626
627 /* Static variables */
628
629 static FILE *outfile;
630 static int log_store = 0;
631 static int callout_count;
632 static int callout_extra;
633 static int callout_fail_count;
634 static int callout_fail_id;
635 static int debug_lengths;
636 static int first_callout;
637 static int jit_was_used;
638 static int locale_set = 0;
639 static int show_malloc;
640 static int use_utf;
641 static size_t gotten_store;
642 static size_t first_gotten_store = 0;
643 static const unsigned char *last_callout_mark = NULL;
644
645 /* The buffers grow automatically if very long input lines are encountered. */
646
647 static int buffer_size = 50000;
648 static pcre_uint8 *buffer = NULL;
649 static pcre_uint8 *dbuffer = NULL;
650 static pcre_uint8 *pbuffer = NULL;
651
652 /* Another buffer is needed translation to 16-bit character strings. It will
653 obtained and extended as required. */
654
655 #ifdef SUPPORT_PCRE16
656 static int buffer16_size = 0;
657 static pcre_uint16 *buffer16 = NULL;
658
659 #ifdef SUPPORT_PCRE8
660
661 /* We need the table of operator lengths that is used for 16-bit compiling, in
662 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
663 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
664 appropriately for the 16-bit world. Just as a safety check, make sure that
665 COMPILE_PCRE16 is *not* set. */
666
667 #ifdef COMPILE_PCRE16
668 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
669 #endif
670
671 #if LINK_SIZE == 2
672 #undef LINK_SIZE
673 #define LINK_SIZE 1
674 #elif LINK_SIZE == 3 || LINK_SIZE == 4
675 #undef LINK_SIZE
676 #define LINK_SIZE 2
677 #else
678 #error LINK_SIZE must be either 2, 3, or 4
679 #endif
680
681 #undef IMM2_SIZE
682 #define IMM2_SIZE 1
683
684 #endif /* SUPPORT_PCRE8 */
685
686 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
687 #endif /* SUPPORT_PCRE16 */
688
689 /* If we have 8-bit support, default use_pcre16 to false; if there is also
690 16-bit support, it can be changed by an option. If there is no 8-bit support,
691 there must be 16-bit support, so default it to 1. */
692
693 #ifdef SUPPORT_PCRE8
694 static int use_pcre16 = 0;
695 #else
696 static int use_pcre16 = 1;
697 #endif
698
699 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
700
701 static int jit_study_bits[] =
702 {
703 PCRE_STUDY_JIT_COMPILE,
704 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
705 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
706 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
707 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
708 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
709 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
710 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
711 };
712
713 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
714 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
715
716 /* Textual explanations for runtime error codes */
717
718 static const char *errtexts[] = {
719 NULL, /* 0 is no error */
720 NULL, /* NOMATCH is handled specially */
721 "NULL argument passed",
722 "bad option value",
723 "magic number missing",
724 "unknown opcode - pattern overwritten?",
725 "no more memory",
726 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
727 "match limit exceeded",
728 "callout error code",
729 NULL, /* BADUTF8/16 is handled specially */
730 NULL, /* BADUTF8/16 offset is handled specially */
731 NULL, /* PARTIAL is handled specially */
732 "not used - internal error",
733 "internal error - pattern overwritten?",
734 "bad count value",
735 "item unsupported for DFA matching",
736 "backreference condition or recursion test not supported for DFA matching",
737 "match limit not supported for DFA matching",
738 "workspace size exceeded in DFA matching",
739 "too much recursion for DFA matching",
740 "recursion limit exceeded",
741 "not used - internal error",
742 "invalid combination of newline options",
743 "bad offset value",
744 NULL, /* SHORTUTF8/16 is handled specially */
745 "nested recursion at the same subject position",
746 "JIT stack limit reached",
747 "pattern compiled in wrong mode: 8-bit/16-bit error",
748 "pattern compiled with other endianness",
749 "invalid data in workspace for DFA restart"
750 };
751
752
753 /*************************************************
754 * Alternate character tables *
755 *************************************************/
756
757 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
758 using the default tables of the library. However, the T option can be used to
759 select alternate sets of tables, for different kinds of testing. Note also that
760 the L (locale) option also adjusts the tables. */
761
762 /* This is the set of tables distributed as default with PCRE. It recognizes
763 only ASCII characters. */
764
765 static const pcre_uint8 tables0[] = {
766
767 /* This table is a lower casing table. */
768
769 0, 1, 2, 3, 4, 5, 6, 7,
770 8, 9, 10, 11, 12, 13, 14, 15,
771 16, 17, 18, 19, 20, 21, 22, 23,
772 24, 25, 26, 27, 28, 29, 30, 31,
773 32, 33, 34, 35, 36, 37, 38, 39,
774 40, 41, 42, 43, 44, 45, 46, 47,
775 48, 49, 50, 51, 52, 53, 54, 55,
776 56, 57, 58, 59, 60, 61, 62, 63,
777 64, 97, 98, 99,100,101,102,103,
778 104,105,106,107,108,109,110,111,
779 112,113,114,115,116,117,118,119,
780 120,121,122, 91, 92, 93, 94, 95,
781 96, 97, 98, 99,100,101,102,103,
782 104,105,106,107,108,109,110,111,
783 112,113,114,115,116,117,118,119,
784 120,121,122,123,124,125,126,127,
785 128,129,130,131,132,133,134,135,
786 136,137,138,139,140,141,142,143,
787 144,145,146,147,148,149,150,151,
788 152,153,154,155,156,157,158,159,
789 160,161,162,163,164,165,166,167,
790 168,169,170,171,172,173,174,175,
791 176,177,178,179,180,181,182,183,
792 184,185,186,187,188,189,190,191,
793 192,193,194,195,196,197,198,199,
794 200,201,202,203,204,205,206,207,
795 208,209,210,211,212,213,214,215,
796 216,217,218,219,220,221,222,223,
797 224,225,226,227,228,229,230,231,
798 232,233,234,235,236,237,238,239,
799 240,241,242,243,244,245,246,247,
800 248,249,250,251,252,253,254,255,
801
802 /* This table is a case flipping table. */
803
804 0, 1, 2, 3, 4, 5, 6, 7,
805 8, 9, 10, 11, 12, 13, 14, 15,
806 16, 17, 18, 19, 20, 21, 22, 23,
807 24, 25, 26, 27, 28, 29, 30, 31,
808 32, 33, 34, 35, 36, 37, 38, 39,
809 40, 41, 42, 43, 44, 45, 46, 47,
810 48, 49, 50, 51, 52, 53, 54, 55,
811 56, 57, 58, 59, 60, 61, 62, 63,
812 64, 97, 98, 99,100,101,102,103,
813 104,105,106,107,108,109,110,111,
814 112,113,114,115,116,117,118,119,
815 120,121,122, 91, 92, 93, 94, 95,
816 96, 65, 66, 67, 68, 69, 70, 71,
817 72, 73, 74, 75, 76, 77, 78, 79,
818 80, 81, 82, 83, 84, 85, 86, 87,
819 88, 89, 90,123,124,125,126,127,
820 128,129,130,131,132,133,134,135,
821 136,137,138,139,140,141,142,143,
822 144,145,146,147,148,149,150,151,
823 152,153,154,155,156,157,158,159,
824 160,161,162,163,164,165,166,167,
825 168,169,170,171,172,173,174,175,
826 176,177,178,179,180,181,182,183,
827 184,185,186,187,188,189,190,191,
828 192,193,194,195,196,197,198,199,
829 200,201,202,203,204,205,206,207,
830 208,209,210,211,212,213,214,215,
831 216,217,218,219,220,221,222,223,
832 224,225,226,227,228,229,230,231,
833 232,233,234,235,236,237,238,239,
834 240,241,242,243,244,245,246,247,
835 248,249,250,251,252,253,254,255,
836
837 /* This table contains bit maps for various character classes. Each map is 32
838 bytes long and the bits run from the least significant end of each byte. The
839 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
840 graph, print, punct, and cntrl. Other classes are built from combinations. */
841
842 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
843 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846
847 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
848 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851
852 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
853 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
855 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
856
857 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
858 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
859 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
860 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
861
862 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
863 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
864 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
865 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
866
867 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
868 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
869 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
870 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
871
872 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
873 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
874 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
875 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
876
877 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
878 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
879 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
880 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
881
882 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
883 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
884 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
885 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
886
887 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
888 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
889 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
890 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
891
892 /* This table identifies various classes of character by individual bits:
893 0x01 white space character
894 0x02 letter
895 0x04 decimal digit
896 0x08 hexadecimal digit
897 0x10 alphanumeric or '_'
898 0x80 regular expression metacharacter or binary zero
899 */
900
901 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
902 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
903 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
904 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
905 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
906 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
907 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
908 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
909 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
910 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
911 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
912 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
913 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
914 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
915 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
916 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
917 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
918 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
919 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
920 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
921 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
922 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
923 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
924 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
925 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
926 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
927 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
928 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
929 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
930 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
931 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
932 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
933
934 /* This is a set of tables that came orginally from a Windows user. It seems to
935 be at least an approximation of ISO 8859. In particular, there are characters
936 greater than 128 that are marked as spaces, letters, etc. */
937
938 static const pcre_uint8 tables1[] = {
939 0,1,2,3,4,5,6,7,
940 8,9,10,11,12,13,14,15,
941 16,17,18,19,20,21,22,23,
942 24,25,26,27,28,29,30,31,
943 32,33,34,35,36,37,38,39,
944 40,41,42,43,44,45,46,47,
945 48,49,50,51,52,53,54,55,
946 56,57,58,59,60,61,62,63,
947 64,97,98,99,100,101,102,103,
948 104,105,106,107,108,109,110,111,
949 112,113,114,115,116,117,118,119,
950 120,121,122,91,92,93,94,95,
951 96,97,98,99,100,101,102,103,
952 104,105,106,107,108,109,110,111,
953 112,113,114,115,116,117,118,119,
954 120,121,122,123,124,125,126,127,
955 128,129,130,131,132,133,134,135,
956 136,137,138,139,140,141,142,143,
957 144,145,146,147,148,149,150,151,
958 152,153,154,155,156,157,158,159,
959 160,161,162,163,164,165,166,167,
960 168,169,170,171,172,173,174,175,
961 176,177,178,179,180,181,182,183,
962 184,185,186,187,188,189,190,191,
963 224,225,226,227,228,229,230,231,
964 232,233,234,235,236,237,238,239,
965 240,241,242,243,244,245,246,215,
966 248,249,250,251,252,253,254,223,
967 224,225,226,227,228,229,230,231,
968 232,233,234,235,236,237,238,239,
969 240,241,242,243,244,245,246,247,
970 248,249,250,251,252,253,254,255,
971 0,1,2,3,4,5,6,7,
972 8,9,10,11,12,13,14,15,
973 16,17,18,19,20,21,22,23,
974 24,25,26,27,28,29,30,31,
975 32,33,34,35,36,37,38,39,
976 40,41,42,43,44,45,46,47,
977 48,49,50,51,52,53,54,55,
978 56,57,58,59,60,61,62,63,
979 64,97,98,99,100,101,102,103,
980 104,105,106,107,108,109,110,111,
981 112,113,114,115,116,117,118,119,
982 120,121,122,91,92,93,94,95,
983 96,65,66,67,68,69,70,71,
984 72,73,74,75,76,77,78,79,
985 80,81,82,83,84,85,86,87,
986 88,89,90,123,124,125,126,127,
987 128,129,130,131,132,133,134,135,
988 136,137,138,139,140,141,142,143,
989 144,145,146,147,148,149,150,151,
990 152,153,154,155,156,157,158,159,
991 160,161,162,163,164,165,166,167,
992 168,169,170,171,172,173,174,175,
993 176,177,178,179,180,181,182,183,
994 184,185,186,187,188,189,190,191,
995 224,225,226,227,228,229,230,231,
996 232,233,234,235,236,237,238,239,
997 240,241,242,243,244,245,246,215,
998 248,249,250,251,252,253,254,223,
999 192,193,194,195,196,197,198,199,
1000 200,201,202,203,204,205,206,207,
1001 208,209,210,211,212,213,214,247,
1002 216,217,218,219,220,221,222,255,
1003 0,62,0,0,1,0,0,0,
1004 0,0,0,0,0,0,0,0,
1005 32,0,0,0,1,0,0,0,
1006 0,0,0,0,0,0,0,0,
1007 0,0,0,0,0,0,255,3,
1008 126,0,0,0,126,0,0,0,
1009 0,0,0,0,0,0,0,0,
1010 0,0,0,0,0,0,0,0,
1011 0,0,0,0,0,0,255,3,
1012 0,0,0,0,0,0,0,0,
1013 0,0,0,0,0,0,12,2,
1014 0,0,0,0,0,0,0,0,
1015 0,0,0,0,0,0,0,0,
1016 254,255,255,7,0,0,0,0,
1017 0,0,0,0,0,0,0,0,
1018 255,255,127,127,0,0,0,0,
1019 0,0,0,0,0,0,0,0,
1020 0,0,0,0,254,255,255,7,
1021 0,0,0,0,0,4,32,4,
1022 0,0,0,128,255,255,127,255,
1023 0,0,0,0,0,0,255,3,
1024 254,255,255,135,254,255,255,7,
1025 0,0,0,0,0,4,44,6,
1026 255,255,127,255,255,255,127,255,
1027 0,0,0,0,254,255,255,255,
1028 255,255,255,255,255,255,255,127,
1029 0,0,0,0,254,255,255,255,
1030 255,255,255,255,255,255,255,255,
1031 0,2,0,0,255,255,255,255,
1032 255,255,255,255,255,255,255,127,
1033 0,0,0,0,255,255,255,255,
1034 255,255,255,255,255,255,255,255,
1035 0,0,0,0,254,255,0,252,
1036 1,0,0,248,1,0,0,120,
1037 0,0,0,0,254,255,255,255,
1038 0,0,128,0,0,0,128,0,
1039 255,255,255,255,0,0,0,0,
1040 0,0,0,0,0,0,0,128,
1041 255,255,255,255,0,0,0,0,
1042 0,0,0,0,0,0,0,0,
1043 128,0,0,0,0,0,0,0,
1044 0,1,1,0,1,1,0,0,
1045 0,0,0,0,0,0,0,0,
1046 0,0,0,0,0,0,0,0,
1047 1,0,0,0,128,0,0,0,
1048 128,128,128,128,0,0,128,0,
1049 28,28,28,28,28,28,28,28,
1050 28,28,0,0,0,0,0,128,
1051 0,26,26,26,26,26,26,18,
1052 18,18,18,18,18,18,18,18,
1053 18,18,18,18,18,18,18,18,
1054 18,18,18,128,128,0,128,16,
1055 0,26,26,26,26,26,26,18,
1056 18,18,18,18,18,18,18,18,
1057 18,18,18,18,18,18,18,18,
1058 18,18,18,128,128,0,0,0,
1059 0,0,0,0,0,1,0,0,
1060 0,0,0,0,0,0,0,0,
1061 0,0,0,0,0,0,0,0,
1062 0,0,0,0,0,0,0,0,
1063 1,0,0,0,0,0,0,0,
1064 0,0,18,0,0,0,0,0,
1065 0,0,20,20,0,18,0,0,
1066 0,20,18,0,0,0,0,0,
1067 18,18,18,18,18,18,18,18,
1068 18,18,18,18,18,18,18,18,
1069 18,18,18,18,18,18,18,0,
1070 18,18,18,18,18,18,18,18,
1071 18,18,18,18,18,18,18,18,
1072 18,18,18,18,18,18,18,18,
1073 18,18,18,18,18,18,18,0,
1074 18,18,18,18,18,18,18,18
1075 };
1076
1077
1078
1079
1080 #ifndef HAVE_STRERROR
1081 /*************************************************
1082 * Provide strerror() for non-ANSI libraries *
1083 *************************************************/
1084
1085 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1086 in their libraries, but can provide the same facility by this simple
1087 alternative function. */
1088
1089 extern int sys_nerr;
1090 extern char *sys_errlist[];
1091
1092 char *
1093 strerror(int n)
1094 {
1095 if (n < 0 || n >= sys_nerr) return "unknown error number";
1096 return sys_errlist[n];
1097 }
1098 #endif /* HAVE_STRERROR */
1099
1100
1101
1102 /*************************************************
1103 * Print newline configuration *
1104 *************************************************/
1105
1106 /*
1107 Arguments:
1108 rc the return code from PCRE_CONFIG_NEWLINE
1109 isc TRUE if called from "-C newline"
1110 Returns: nothing
1111 */
1112
1113 static void
1114 print_newline_config(int rc, BOOL isc)
1115 {
1116 const char *s = NULL;
1117 if (!isc) printf(" Newline sequence is ");
1118 switch(rc)
1119 {
1120 case CHAR_CR: s = "CR"; break;
1121 case CHAR_LF: s = "LF"; break;
1122 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1123 case -1: s = "ANY"; break;
1124 case -2: s = "ANYCRLF"; break;
1125
1126 default:
1127 printf("a non-standard value: 0x%04x\n", rc);
1128 return;
1129 }
1130
1131 printf("%s\n", s);
1132 }
1133
1134
1135
1136 /*************************************************
1137 * JIT memory callback *
1138 *************************************************/
1139
1140 static pcre_jit_stack* jit_callback(void *arg)
1141 {
1142 jit_was_used = TRUE;
1143 return (pcre_jit_stack *)arg;
1144 }
1145
1146
1147 #if !defined NOUTF || defined SUPPORT_PCRE16
1148 /*************************************************
1149 * Convert UTF-8 string to value *
1150 *************************************************/
1151
1152 /* This function takes one or more bytes that represents a UTF-8 character,
1153 and returns the value of the character.
1154
1155 Argument:
1156 utf8bytes a pointer to the byte vector
1157 vptr a pointer to an int to receive the value
1158
1159 Returns: > 0 => the number of bytes consumed
1160 -6 to 0 => malformed UTF-8 character at offset = (-return)
1161 */
1162
1163 static int
1164 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1165 {
1166 int c = *utf8bytes++;
1167 int d = c;
1168 int i, j, s;
1169
1170 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1171 {
1172 if ((d & 0x80) == 0) break;
1173 d <<= 1;
1174 }
1175
1176 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1177 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1178
1179 /* i now has a value in the range 1-5 */
1180
1181 s = 6*i;
1182 d = (c & utf8_table3[i]) << s;
1183
1184 for (j = 0; j < i; j++)
1185 {
1186 c = *utf8bytes++;
1187 if ((c & 0xc0) != 0x80) return -(j+1);
1188 s -= 6;
1189 d |= (c & 0x3f) << s;
1190 }
1191
1192 /* Check that encoding was the correct unique one */
1193
1194 for (j = 0; j < utf8_table1_size; j++)
1195 if (d <= utf8_table1[j]) break;
1196 if (j != i) return -(i+1);
1197
1198 /* Valid value */
1199
1200 *vptr = d;
1201 return i+1;
1202 }
1203 #endif /* NOUTF || SUPPORT_PCRE16 */
1204
1205
1206
1207 #if !defined NOUTF || defined SUPPORT_PCRE16
1208 /*************************************************
1209 * Convert character value to UTF-8 *
1210 *************************************************/
1211
1212 /* This function takes an integer value in the range 0 - 0x7fffffff
1213 and encodes it as a UTF-8 character in 0 to 6 bytes.
1214
1215 Arguments:
1216 cvalue the character value
1217 utf8bytes pointer to buffer for result - at least 6 bytes long
1218
1219 Returns: number of characters placed in the buffer
1220 */
1221
1222 static int
1223 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1224 {
1225 register int i, j;
1226 for (i = 0; i < utf8_table1_size; i++)
1227 if (cvalue <= utf8_table1[i]) break;
1228 utf8bytes += i;
1229 for (j = i; j > 0; j--)
1230 {
1231 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1232 cvalue >>= 6;
1233 }
1234 *utf8bytes = utf8_table2[i] | cvalue;
1235 return i + 1;
1236 }
1237 #endif
1238
1239
1240 #ifdef SUPPORT_PCRE16
1241 /*************************************************
1242 * Convert a string to 16-bit *
1243 *************************************************/
1244
1245 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1246 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1247 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1248 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1249 result is always left in buffer16.
1250
1251 Note that this function does not object to surrogate values. This is
1252 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1253 for the purpose of testing that they are correctly faulted.
1254
1255 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1256 in UTF-8 so that values greater than 255 can be handled.
1257
1258 Arguments:
1259 data TRUE if converting a data line; FALSE for a regex
1260 p points to a byte string
1261 utf true if UTF-8 (to be converted to UTF-16)
1262 len number of bytes in the string (excluding trailing zero)
1263
1264 Returns: number of 16-bit data items used (excluding trailing zero)
1265 OR -1 if a UTF-8 string is malformed
1266 OR -2 if a value > 0x10ffff is encountered
1267 OR -3 if a value > 0xffff is encountered when not in UTF mode
1268 */
1269
1270 static int
1271 to16(int data, pcre_uint8 *p, int utf, int len)
1272 {
1273 pcre_uint16 *pp;
1274
1275 if (buffer16_size < 2*len + 2)
1276 {
1277 if (buffer16 != NULL) free(buffer16);
1278 buffer16_size = 2*len + 2;
1279 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1280 if (buffer16 == NULL)
1281 {
1282 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1283 exit(1);
1284 }
1285 }
1286
1287 pp = buffer16;
1288
1289 if (!utf && !data)
1290 {
1291 while (len-- > 0) *pp++ = *p++;
1292 }
1293
1294 else
1295 {
1296 int c = 0;
1297 while (len > 0)
1298 {
1299 int chlen = utf82ord(p, &c);
1300 if (chlen <= 0) return -1;
1301 if (c > 0x10ffff) return -2;
1302 p += chlen;
1303 len -= chlen;
1304 if (c < 0x10000) *pp++ = c; else
1305 {
1306 if (!utf) return -3;
1307 c -= 0x10000;
1308 *pp++ = 0xD800 | (c >> 10);
1309 *pp++ = 0xDC00 | (c & 0x3ff);
1310 }
1311 }
1312 }
1313
1314 *pp = 0;
1315 return pp - buffer16;
1316 }
1317 #endif
1318
1319
1320 /*************************************************
1321 * Read or extend an input line *
1322 *************************************************/
1323
1324 /* Input lines are read into buffer, but both patterns and data lines can be
1325 continued over multiple input lines. In addition, if the buffer fills up, we
1326 want to automatically expand it so as to be able to handle extremely large
1327 lines that are needed for certain stress tests. When the input buffer is
1328 expanded, the other two buffers must also be expanded likewise, and the
1329 contents of pbuffer, which are a copy of the input for callouts, must be
1330 preserved (for when expansion happens for a data line). This is not the most
1331 optimal way of handling this, but hey, this is just a test program!
1332
1333 Arguments:
1334 f the file to read
1335 start where in buffer to start (this *must* be within buffer)
1336 prompt for stdin or readline()
1337
1338 Returns: pointer to the start of new data
1339 could be a copy of start, or could be moved
1340 NULL if no data read and EOF reached
1341 */
1342
1343 static pcre_uint8 *
1344 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1345 {
1346 pcre_uint8 *here = start;
1347
1348 for (;;)
1349 {
1350 size_t rlen = (size_t)(buffer_size - (here - buffer));
1351
1352 if (rlen > 1000)
1353 {
1354 int dlen;
1355
1356 /* If libreadline or libedit support is required, use readline() to read a
1357 line if the input is a terminal. Note that readline() removes the trailing
1358 newline, so we must put it back again, to be compatible with fgets(). */
1359
1360 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1361 if (isatty(fileno(f)))
1362 {
1363 size_t len;
1364 char *s = readline(prompt);
1365 if (s == NULL) return (here == start)? NULL : start;
1366 len = strlen(s);
1367 if (len > 0) add_history(s);
1368 if (len > rlen - 1) len = rlen - 1;
1369 memcpy(here, s, len);
1370 here[len] = '\n';
1371 here[len+1] = 0;
1372 free(s);
1373 }
1374 else
1375 #endif
1376
1377 /* Read the next line by normal means, prompting if the file is stdin. */
1378
1379 {
1380 if (f == stdin) printf("%s", prompt);
1381 if (fgets((char *)here, rlen, f) == NULL)
1382 return (here == start)? NULL : start;
1383 }
1384
1385 dlen = (int)strlen((char *)here);
1386 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1387 here += dlen;
1388 }
1389
1390 else
1391 {
1392 int new_buffer_size = 2*buffer_size;
1393 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1394 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1395 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1396
1397 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1398 {
1399 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1400 exit(1);
1401 }
1402
1403 memcpy(new_buffer, buffer, buffer_size);
1404 memcpy(new_pbuffer, pbuffer, buffer_size);
1405
1406 buffer_size = new_buffer_size;
1407
1408 start = new_buffer + (start - buffer);
1409 here = new_buffer + (here - buffer);
1410
1411 free(buffer);
1412 free(dbuffer);
1413 free(pbuffer);
1414
1415 buffer = new_buffer;
1416 dbuffer = new_dbuffer;
1417 pbuffer = new_pbuffer;
1418 }
1419 }
1420
1421 return NULL; /* Control never gets here */
1422 }
1423
1424
1425
1426 /*************************************************
1427 * Read number from string *
1428 *************************************************/
1429
1430 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1431 around with conditional compilation, just do the job by hand. It is only used
1432 for unpicking arguments, so just keep it simple.
1433
1434 Arguments:
1435 str string to be converted
1436 endptr where to put the end pointer
1437
1438 Returns: the unsigned long
1439 */
1440
1441 static int
1442 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1443 {
1444 int result = 0;
1445 while(*str != 0 && isspace(*str)) str++;
1446 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1447 *endptr = str;
1448 return(result);
1449 }
1450
1451
1452
1453 /*************************************************
1454 * Print one character *
1455 *************************************************/
1456
1457 /* Print a single character either literally, or as a hex escape. */
1458
1459 static int pchar(int c, FILE *f)
1460 {
1461 if (PRINTOK(c))
1462 {
1463 if (f != NULL) fprintf(f, "%c", c);
1464 return 1;
1465 }
1466
1467 if (c < 0x100)
1468 {
1469 if (use_utf)
1470 {
1471 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1472 return 6;
1473 }
1474 else
1475 {
1476 if (f != NULL) fprintf(f, "\\x%02x", c);
1477 return 4;
1478 }
1479 }
1480
1481 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1482 return (c <= 0x000000ff)? 6 :
1483 (c <= 0x00000fff)? 7 :
1484 (c <= 0x0000ffff)? 8 :
1485 (c <= 0x000fffff)? 9 : 10;
1486 }
1487
1488
1489
1490 #ifdef SUPPORT_PCRE8
1491 /*************************************************
1492 * Print 8-bit character string *
1493 *************************************************/
1494
1495 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1496 If handed a NULL file, just counts chars without printing. */
1497
1498 static int pchars(pcre_uint8 *p, int length, FILE *f)
1499 {
1500 int c = 0;
1501 int yield = 0;
1502
1503 if (length < 0)
1504 length = strlen((char *)p);
1505
1506 while (length-- > 0)
1507 {
1508 #if !defined NOUTF
1509 if (use_utf)
1510 {
1511 int rc = utf82ord(p, &c);
1512 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1513 {
1514 length -= rc - 1;
1515 p += rc;
1516 yield += pchar(c, f);
1517 continue;
1518 }
1519 }
1520 #endif
1521 c = *p++;
1522 yield += pchar(c, f);
1523 }
1524
1525 return yield;
1526 }
1527 #endif
1528
1529
1530
1531 #ifdef SUPPORT_PCRE16
1532 /*************************************************
1533 * Find length of 0-terminated 16-bit string *
1534 *************************************************/
1535
1536 static int strlen16(PCRE_SPTR16 p)
1537 {
1538 int len = 0;
1539 while (*p++ != 0) len++;
1540 return len;
1541 }
1542 #endif /* SUPPORT_PCRE16 */
1543
1544
1545 #ifdef SUPPORT_PCRE16
1546 /*************************************************
1547 * Print 16-bit character string *
1548 *************************************************/
1549
1550 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1551 If handed a NULL file, just counts chars without printing. */
1552
1553 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1554 {
1555 int yield = 0;
1556
1557 if (length < 0)
1558 length = strlen16(p);
1559
1560 while (length-- > 0)
1561 {
1562 int c = *p++ & 0xffff;
1563 #if !defined NOUTF
1564 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1565 {
1566 int d = *p & 0xffff;
1567 if (d >= 0xDC00 && d < 0xDFFF)
1568 {
1569 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1570 length--;
1571 p++;
1572 }
1573 }
1574 #endif
1575 yield += pchar(c, f);
1576 }
1577
1578 return yield;
1579 }
1580 #endif /* SUPPORT_PCRE16 */
1581
1582
1583
1584 #ifdef SUPPORT_PCRE8
1585 /*************************************************
1586 * Read a capture name (8-bit) and check it *
1587 *************************************************/
1588
1589 static pcre_uint8 *
1590 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1591 {
1592 pcre_uint8 *npp = *pp;
1593 while (isalnum(*p)) *npp++ = *p++;
1594 *npp++ = 0;
1595 *npp = 0;
1596 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1597 {
1598 fprintf(outfile, "no parentheses with name \"");
1599 PCHARSV(*pp, 0, -1, outfile);
1600 fprintf(outfile, "\"\n");
1601 }
1602
1603 *pp = npp;
1604 return p;
1605 }
1606 #endif /* SUPPORT_PCRE8 */
1607
1608
1609
1610 #ifdef SUPPORT_PCRE16
1611 /*************************************************
1612 * Read a capture name (16-bit) and check it *
1613 *************************************************/
1614
1615 /* Note that the text being read is 8-bit. */
1616
1617 static pcre_uint8 *
1618 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1619 {
1620 pcre_uint16 *npp = *pp;
1621 while (isalnum(*p)) *npp++ = *p++;
1622 *npp++ = 0;
1623 *npp = 0;
1624 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1625 {
1626 fprintf(outfile, "no parentheses with name \"");
1627 PCHARSV(*pp, 0, -1, outfile);
1628 fprintf(outfile, "\"\n");
1629 }
1630 *pp = npp;
1631 return p;
1632 }
1633 #endif /* SUPPORT_PCRE16 */
1634
1635
1636
1637 /*************************************************
1638 * Callout function *
1639 *************************************************/
1640
1641 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1642 the match. Yield zero unless more callouts than the fail count, or the callout
1643 data is not zero. */
1644
1645 static int callout(pcre_callout_block *cb)
1646 {
1647 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1648 int i, pre_start, post_start, subject_length;
1649
1650 if (callout_extra)
1651 {
1652 fprintf(f, "Callout %d: last capture = %d\n",
1653 cb->callout_number, cb->capture_last);
1654
1655 for (i = 0; i < cb->capture_top * 2; i += 2)
1656 {
1657 if (cb->offset_vector[i] < 0)
1658 fprintf(f, "%2d: <unset>\n", i/2);
1659 else
1660 {
1661 fprintf(f, "%2d: ", i/2);
1662 PCHARSV(cb->subject, cb->offset_vector[i],
1663 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1664 fprintf(f, "\n");
1665 }
1666 }
1667 }
1668
1669 /* Re-print the subject in canonical form, the first time or if giving full
1670 datails. On subsequent calls in the same match, we use pchars just to find the
1671 printed lengths of the substrings. */
1672
1673 if (f != NULL) fprintf(f, "--->");
1674
1675 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1676 PCHARS(post_start, cb->subject, cb->start_match,
1677 cb->current_position - cb->start_match, f);
1678
1679 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1680
1681 PCHARSV(cb->subject, cb->current_position,
1682 cb->subject_length - cb->current_position, f);
1683
1684 if (f != NULL) fprintf(f, "\n");
1685
1686 /* Always print appropriate indicators, with callout number if not already
1687 shown. For automatic callouts, show the pattern offset. */
1688
1689 if (cb->callout_number == 255)
1690 {
1691 fprintf(outfile, "%+3d ", cb->pattern_position);
1692 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1693 }
1694 else
1695 {
1696 if (callout_extra) fprintf(outfile, " ");
1697 else fprintf(outfile, "%3d ", cb->callout_number);
1698 }
1699
1700 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1701 fprintf(outfile, "^");
1702
1703 if (post_start > 0)
1704 {
1705 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1706 fprintf(outfile, "^");
1707 }
1708
1709 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1710 fprintf(outfile, " ");
1711
1712 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1713 pbuffer + cb->pattern_position);
1714
1715 fprintf(outfile, "\n");
1716 first_callout = 0;
1717
1718 if (cb->mark != last_callout_mark)
1719 {
1720 if (cb->mark == NULL)
1721 fprintf(outfile, "Latest Mark: <unset>\n");
1722 else
1723 {
1724 fprintf(outfile, "Latest Mark: ");
1725 PCHARSV(cb->mark, 0, -1, outfile);
1726 putc('\n', outfile);
1727 }
1728 last_callout_mark = cb->mark;
1729 }
1730
1731 if (cb->callout_data != NULL)
1732 {
1733 int callout_data = *((int *)(cb->callout_data));
1734 if (callout_data != 0)
1735 {
1736 fprintf(outfile, "Callout data = %d\n", callout_data);
1737 return callout_data;
1738 }
1739 }
1740
1741 return (cb->callout_number != callout_fail_id)? 0 :
1742 (++callout_count >= callout_fail_count)? 1 : 0;
1743 }
1744
1745
1746 /*************************************************
1747 * Local malloc functions *
1748 *************************************************/
1749
1750 /* Alternative malloc function, to test functionality and save the size of a
1751 compiled re, which is the first store request that pcre_compile() makes. The
1752 show_malloc variable is set only during matching. */
1753
1754 static void *new_malloc(size_t size)
1755 {
1756 void *block = malloc(size);
1757 gotten_store = size;
1758 if (first_gotten_store == 0) first_gotten_store = size;
1759 if (show_malloc)
1760 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1761 return block;
1762 }
1763
1764 static void new_free(void *block)
1765 {
1766 if (show_malloc)
1767 fprintf(outfile, "free %p\n", block);
1768 free(block);
1769 }
1770
1771 /* For recursion malloc/free, to test stacking calls */
1772
1773 static void *stack_malloc(size_t size)
1774 {
1775 void *block = malloc(size);
1776 if (show_malloc)
1777 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1778 return block;
1779 }
1780
1781 static void stack_free(void *block)
1782 {
1783 if (show_malloc)
1784 fprintf(outfile, "stack_free %p\n", block);
1785 free(block);
1786 }
1787
1788
1789 /*************************************************
1790 * Call pcre_fullinfo() *
1791 *************************************************/
1792
1793 /* Get one piece of information from the pcre_fullinfo() function. When only
1794 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1795 value, but the code is defensive.
1796
1797 Arguments:
1798 re compiled regex
1799 study study data
1800 option PCRE_INFO_xxx option
1801 ptr where to put the data
1802
1803 Returns: 0 when OK, < 0 on error
1804 */
1805
1806 static int
1807 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1808 {
1809 int rc;
1810
1811 if (use_pcre16)
1812 #ifdef SUPPORT_PCRE16
1813 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1814 #else
1815 rc = PCRE_ERROR_BADMODE;
1816 #endif
1817 else
1818 #ifdef SUPPORT_PCRE8
1819 rc = pcre_fullinfo(re, study, option, ptr);
1820 #else
1821 rc = PCRE_ERROR_BADMODE;
1822 #endif
1823
1824 if (rc < 0)
1825 {
1826 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1827 use_pcre16? "16" : "", option);
1828 if (rc == PCRE_ERROR_BADMODE)
1829 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1830 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1831 }
1832
1833 return rc;
1834 }
1835
1836
1837
1838 /*************************************************
1839 * Swap byte functions *
1840 *************************************************/
1841
1842 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1843 value, respectively.
1844
1845 Arguments:
1846 value any number
1847
1848 Returns: the byte swapped value
1849 */
1850
1851 static pcre_uint32
1852 swap_uint32(pcre_uint32 value)
1853 {
1854 return ((value & 0x000000ff) << 24) |
1855 ((value & 0x0000ff00) << 8) |
1856 ((value & 0x00ff0000) >> 8) |
1857 (value >> 24);
1858 }
1859
1860 static pcre_uint16
1861 swap_uint16(pcre_uint16 value)
1862 {
1863 return (value >> 8) | (value << 8);
1864 }
1865
1866
1867
1868 /*************************************************
1869 * Flip bytes in a compiled pattern *
1870 *************************************************/
1871
1872 /* This function is called if the 'F' option was present on a pattern that is
1873 to be written to a file. We flip the bytes of all the integer fields in the
1874 regex data block and the study block. In 16-bit mode this also flips relevant
1875 bytes in the pattern itself. This is to make it possible to test PCRE's
1876 ability to reload byte-flipped patterns, e.g. those compiled on a different
1877 architecture. */
1878
1879 static void
1880 regexflip(pcre *ere, pcre_extra *extra)
1881 {
1882 REAL_PCRE *re = (REAL_PCRE *)ere;
1883 #ifdef SUPPORT_PCRE16
1884 int op;
1885 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1886 int length = re->name_count * re->name_entry_size;
1887 #ifdef SUPPORT_UTF
1888 BOOL utf = (re->options & PCRE_UTF16) != 0;
1889 BOOL utf16_char = FALSE;
1890 #endif /* SUPPORT_UTF */
1891 #endif /* SUPPORT_PCRE16 */
1892
1893 /* Always flip the bytes in the main data block and study blocks. */
1894
1895 re->magic_number = REVERSED_MAGIC_NUMBER;
1896 re->size = swap_uint32(re->size);
1897 re->options = swap_uint32(re->options);
1898 re->flags = swap_uint16(re->flags);
1899 re->top_bracket = swap_uint16(re->top_bracket);
1900 re->top_backref = swap_uint16(re->top_backref);
1901 re->first_char = swap_uint16(re->first_char);
1902 re->req_char = swap_uint16(re->req_char);
1903 re->name_table_offset = swap_uint16(re->name_table_offset);
1904 re->name_entry_size = swap_uint16(re->name_entry_size);
1905 re->name_count = swap_uint16(re->name_count);
1906
1907 if (extra != NULL)
1908 {
1909 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1910 rsd->size = swap_uint32(rsd->size);
1911 rsd->flags = swap_uint32(rsd->flags);
1912 rsd->minlength = swap_uint32(rsd->minlength);
1913 }
1914
1915 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1916 in the name table, if present, and then in the pattern itself. */
1917
1918 #ifdef SUPPORT_PCRE16
1919 if (!use_pcre16) return;
1920
1921 while(TRUE)
1922 {
1923 /* Swap previous characters. */
1924 while (length-- > 0)
1925 {
1926 *ptr = swap_uint16(*ptr);
1927 ptr++;
1928 }
1929 #ifdef SUPPORT_UTF
1930 if (utf16_char)
1931 {
1932 if ((ptr[-1] & 0xfc00) == 0xd800)
1933 {
1934 /* We know that there is only one extra character in UTF-16. */
1935 *ptr = swap_uint16(*ptr);
1936 ptr++;
1937 }
1938 }
1939 utf16_char = FALSE;
1940 #endif /* SUPPORT_UTF */
1941
1942 /* Get next opcode. */
1943
1944 length = 0;
1945 op = *ptr;
1946 *ptr++ = swap_uint16(op);
1947
1948 switch (op)
1949 {
1950 case OP_END:
1951 return;
1952
1953 #ifdef SUPPORT_UTF
1954 case OP_CHAR:
1955 case OP_CHARI:
1956 case OP_NOT:
1957 case OP_NOTI:
1958 case OP_STAR:
1959 case OP_MINSTAR:
1960 case OP_PLUS:
1961 case OP_MINPLUS:
1962 case OP_QUERY:
1963 case OP_MINQUERY:
1964 case OP_UPTO:
1965 case OP_MINUPTO:
1966 case OP_EXACT:
1967 case OP_POSSTAR:
1968 case OP_POSPLUS:
1969 case OP_POSQUERY:
1970 case OP_POSUPTO:
1971 case OP_STARI:
1972 case OP_MINSTARI:
1973 case OP_PLUSI:
1974 case OP_MINPLUSI:
1975 case OP_QUERYI:
1976 case OP_MINQUERYI:
1977 case OP_UPTOI:
1978 case OP_MINUPTOI:
1979 case OP_EXACTI:
1980 case OP_POSSTARI:
1981 case OP_POSPLUSI:
1982 case OP_POSQUERYI:
1983 case OP_POSUPTOI:
1984 case OP_NOTSTAR:
1985 case OP_NOTMINSTAR:
1986 case OP_NOTPLUS:
1987 case OP_NOTMINPLUS:
1988 case OP_NOTQUERY:
1989 case OP_NOTMINQUERY:
1990 case OP_NOTUPTO:
1991 case OP_NOTMINUPTO:
1992 case OP_NOTEXACT:
1993 case OP_NOTPOSSTAR:
1994 case OP_NOTPOSPLUS:
1995 case OP_NOTPOSQUERY:
1996 case OP_NOTPOSUPTO:
1997 case OP_NOTSTARI:
1998 case OP_NOTMINSTARI:
1999 case OP_NOTPLUSI:
2000 case OP_NOTMINPLUSI:
2001 case OP_NOTQUERYI:
2002 case OP_NOTMINQUERYI:
2003 case OP_NOTUPTOI:
2004 case OP_NOTMINUPTOI:
2005 case OP_NOTEXACTI:
2006 case OP_NOTPOSSTARI:
2007 case OP_NOTPOSPLUSI:
2008 case OP_NOTPOSQUERYI:
2009 case OP_NOTPOSUPTOI:
2010 if (utf) utf16_char = TRUE;
2011 #endif
2012 /* Fall through. */
2013
2014 default:
2015 length = OP_lengths16[op] - 1;
2016 break;
2017
2018 case OP_CLASS:
2019 case OP_NCLASS:
2020 /* Skip the character bit map. */
2021 ptr += 32/sizeof(pcre_uint16);
2022 length = 0;
2023 break;
2024
2025 case OP_XCLASS:
2026 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2027 if (LINK_SIZE > 1)
2028 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2029 - (1 + LINK_SIZE + 1));
2030 else
2031 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2032
2033 /* Reverse the size of the XCLASS instance. */
2034 *ptr = swap_uint16(*ptr);
2035 ptr++;
2036 if (LINK_SIZE > 1)
2037 {
2038 *ptr = swap_uint16(*ptr);
2039 ptr++;
2040 }
2041
2042 op = *ptr;
2043 *ptr = swap_uint16(op);
2044 ptr++;
2045 if ((op & XCL_MAP) != 0)
2046 {
2047 /* Skip the character bit map. */
2048 ptr += 32/sizeof(pcre_uint16);
2049 length -= 32/sizeof(pcre_uint16);
2050 }
2051 break;
2052 }
2053 }
2054 /* Control should never reach here in 16 bit mode. */
2055 #endif /* SUPPORT_PCRE16 */
2056 }
2057
2058
2059
2060 /*************************************************
2061 * Check match or recursion limit *
2062 *************************************************/
2063
2064 static int
2065 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2066 int start_offset, int options, int *use_offsets, int use_size_offsets,
2067 int flag, unsigned long int *limit, int errnumber, const char *msg)
2068 {
2069 int count;
2070 int min = 0;
2071 int mid = 64;
2072 int max = -1;
2073
2074 extra->flags |= flag;
2075
2076 for (;;)
2077 {
2078 *limit = mid;
2079
2080 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2081 use_offsets, use_size_offsets);
2082
2083 if (count == errnumber)
2084 {
2085 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2086 min = mid;
2087 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2088 }
2089
2090 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2091 count == PCRE_ERROR_PARTIAL)
2092 {
2093 if (mid == min + 1)
2094 {
2095 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2096 break;
2097 }
2098 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2099 max = mid;
2100 mid = (min + mid)/2;
2101 }
2102 else break; /* Some other error */
2103 }
2104
2105 extra->flags &= ~flag;
2106 return count;
2107 }
2108
2109
2110
2111 /*************************************************
2112 * Case-independent strncmp() function *
2113 *************************************************/
2114
2115 /*
2116 Arguments:
2117 s first string
2118 t second string
2119 n number of characters to compare
2120
2121 Returns: < 0, = 0, or > 0, according to the comparison
2122 */
2123
2124 static int
2125 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2126 {
2127 while (n--)
2128 {
2129 int c = tolower(*s++) - tolower(*t++);
2130 if (c) return c;
2131 }
2132 return 0;
2133 }
2134
2135
2136
2137 /*************************************************
2138 * Check newline indicator *
2139 *************************************************/
2140
2141 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2142 a message and return 0 if there is no match.
2143
2144 Arguments:
2145 p points after the leading '<'
2146 f file for error message
2147
2148 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2149 */
2150
2151 static int
2152 check_newline(pcre_uint8 *p, FILE *f)
2153 {
2154 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2155 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2156 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2157 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2158 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2159 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2160 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2161 fprintf(f, "Unknown newline type at: <%s\n", p);
2162 return 0;
2163 }
2164
2165
2166
2167 /*************************************************
2168 * Usage function *
2169 *************************************************/
2170
2171 static void
2172 usage(void)
2173 {
2174 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2175 printf("Input and output default to stdin and stdout.\n");
2176 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2177 printf("If input is a terminal, readline() is used to read from it.\n");
2178 #else
2179 printf("This version of pcretest is not linked with readline().\n");
2180 #endif
2181 printf("\nOptions:\n");
2182 #ifdef SUPPORT_PCRE16
2183 printf(" -16 use the 16-bit library\n");
2184 #endif
2185 printf(" -b show compiled code\n");
2186 printf(" -C show PCRE compile-time options and exit\n");
2187 printf(" -C arg show a specific compile-time option\n");
2188 printf(" and exit with its value. The arg can be:\n");
2189 printf(" linksize internal link size [2, 3, 4]\n");
2190 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2191 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2192 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2193 printf(" ucp Unicode Properties supported [0, 1]\n");
2194 printf(" jit Just-in-time compiler supported [0, 1]\n");
2195 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2196 printf(" -d debug: show compiled code and information (-b and -i)\n");
2197 #if !defined NODFA
2198 printf(" -dfa force DFA matching for all subjects\n");
2199 #endif
2200 printf(" -help show usage information\n");
2201 printf(" -i show information about compiled patterns\n"
2202 " -M find MATCH_LIMIT minimum for each subject\n"
2203 " -m output memory used information\n"
2204 " -o <n> set size of offsets vector to <n>\n");
2205 #if !defined NOPOSIX
2206 printf(" -p use POSIX interface\n");
2207 #endif
2208 printf(" -q quiet: do not output PCRE version number at start\n");
2209 printf(" -S <n> set stack size to <n> megabytes\n");
2210 printf(" -s force each pattern to be studied at basic level\n"
2211 " -s+ force each pattern to be studied, using JIT if available\n"
2212 " -s++ ditto, verifying when JIT was actually used\n"
2213 " -s+n force each pattern to be studied, using JIT if available,\n"
2214 " where 1 <= n <= 7 selects JIT options\n"
2215 " -s++n ditto, verifying when JIT was actually used\n"
2216 " -t time compilation and execution\n");
2217 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2218 printf(" -tm time execution (matching) only\n");
2219 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2220 }
2221
2222
2223
2224 /*************************************************
2225 * Main Program *
2226 *************************************************/
2227
2228 /* Read lines from named file or stdin and write to named file or stdout; lines
2229 consist of a regular expression, in delimiters and optionally followed by
2230 options, followed by a set of test data, terminated by an empty line. */
2231
2232 int main(int argc, char **argv)
2233 {
2234 FILE *infile = stdin;
2235 const char *version;
2236 int options = 0;
2237 int study_options = 0;
2238 int default_find_match_limit = FALSE;
2239 int op = 1;
2240 int timeit = 0;
2241 int timeitm = 0;
2242 int showinfo = 0;
2243 int showstore = 0;
2244 int force_study = -1;
2245 int force_study_options = 0;
2246 int quiet = 0;
2247 int size_offsets = 45;
2248 int size_offsets_max;
2249 int *offsets = NULL;
2250 int debug = 0;
2251 int done = 0;
2252 int all_use_dfa = 0;
2253 int verify_jit = 0;
2254 int yield = 0;
2255 int stack_size;
2256
2257 #if !defined NOPOSIX
2258 int posix = 0;
2259 #endif
2260 #if !defined NODFA
2261 int *dfa_workspace = NULL;
2262 #endif
2263
2264 pcre_jit_stack *jit_stack = NULL;
2265
2266 /* These vectors store, end-to-end, a list of zero-terminated captured
2267 substring names, each list itself being terminated by an empty name. Assume
2268 that 1024 is plenty long enough for the few names we'll be testing. It is
2269 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2270 for the actual memory, to ensure alignment. */
2271
2272 pcre_uint16 copynames[1024];
2273 pcre_uint16 getnames[1024];
2274
2275 #ifdef SUPPORT_PCRE16
2276 pcre_uint16 *cn16ptr;
2277 pcre_uint16 *gn16ptr;
2278 #endif
2279
2280 #ifdef SUPPORT_PCRE8
2281 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2282 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2283 pcre_uint8 *cn8ptr;
2284 pcre_uint8 *gn8ptr;
2285 #endif
2286
2287 /* Get buffers from malloc() so that valgrind will check their misuse when
2288 debugging. They grow automatically when very long lines are read. The 16-bit
2289 buffer (buffer16) is obtained only if needed. */
2290
2291 buffer = (pcre_uint8 *)malloc(buffer_size);
2292 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2293 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2294
2295 /* The outfile variable is static so that new_malloc can use it. */
2296
2297 outfile = stdout;
2298
2299 /* The following _setmode() stuff is some Windows magic that tells its runtime
2300 library to translate CRLF into a single LF character. At least, that's what
2301 I've been told: never having used Windows I take this all on trust. Originally
2302 it set 0x8000, but then I was advised that _O_BINARY was better. */
2303
2304 #if defined(_WIN32) || defined(WIN32)
2305 _setmode( _fileno( stdout ), _O_BINARY );
2306 #endif
2307
2308 /* Get the version number: both pcre_version() and pcre16_version() give the
2309 same answer. We just need to ensure that we call one that is available. */
2310
2311 #ifdef SUPPORT_PCRE8
2312 version = pcre_version();
2313 #else
2314 version = pcre16_version();
2315 #endif
2316
2317 /* Scan options */
2318
2319 while (argc > 1 && argv[op][0] == '-')
2320 {
2321 pcre_uint8 *endptr;
2322 char *arg = argv[op];
2323
2324 if (strcmp(arg, "-m") == 0) showstore = 1;
2325 else if (strcmp(arg, "-s") == 0) force_study = 0;
2326
2327 else if (strncmp(arg, "-s+", 3) == 0)
2328 {
2329 arg += 3;
2330 if (*arg == '+') { arg++; verify_jit = TRUE; }
2331 force_study = 1;
2332 if (*arg == 0)
2333 force_study_options = jit_study_bits[6];
2334 else if (*arg >= '1' && *arg <= '7')
2335 force_study_options = jit_study_bits[*arg - '1'];
2336 else goto BAD_ARG;
2337 }
2338 else if (strcmp(arg, "-16") == 0)
2339 {
2340 #ifdef SUPPORT_PCRE16
2341 use_pcre16 = 1;
2342 #else
2343 printf("** This version of PCRE was built without 16-bit support\n");
2344 exit(1);
2345 #endif
2346 }
2347 else if (strcmp(arg, "-q") == 0) quiet = 1;
2348 else if (strcmp(arg, "-b") == 0) debug = 1;
2349 else if (strcmp(arg, "-i") == 0) showinfo = 1;
2350 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2351 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2352 #if !defined NODFA
2353 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2354 #endif
2355 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2356 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2357 *endptr == 0))
2358 {
2359 op++;
2360 argc--;
2361 }
2362 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2363 {
2364 int both = arg[2] == 0;
2365 int temp;
2366 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2367 *endptr == 0))
2368 {
2369 timeitm = temp;
2370 op++;
2371 argc--;
2372 }
2373 else timeitm = LOOPREPEAT;
2374 if (both) timeit = timeitm;
2375 }
2376 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2377 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2378 *endptr == 0))
2379 {
2380 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2381 printf("PCRE: -S not supported on this OS\n");
2382 exit(1);
2383 #else
2384 int rc;
2385 struct rlimit rlim;
2386 getrlimit(RLIMIT_STACK, &rlim);
2387 rlim.rlim_cur = stack_size * 1024 * 1024;
2388 rc = setrlimit(RLIMIT_STACK, &rlim);
2389 if (rc != 0)
2390 {
2391 printf("PCRE: setrlimit() failed with error %d\n", rc);
2392 exit(1);
2393 }
2394 op++;
2395 argc--;
2396 #endif
2397 }
2398 #if !defined NOPOSIX
2399 else if (strcmp(arg, "-p") == 0) posix = 1;
2400 #endif
2401 else if (strcmp(arg, "-C") == 0)
2402 {
2403 int rc;
2404 unsigned long int lrc;
2405
2406 if (argc > 2)
2407 {
2408 if (strcmp(argv[op + 1], "linksize") == 0)
2409 {
2410 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2411 printf("%d\n", rc);
2412 yield = rc;
2413 }
2414 else if (strcmp(argv[op + 1], "pcre8") == 0)
2415 {
2416 #ifdef SUPPORT_PCRE8
2417 printf("1\n");
2418 yield = 1;
2419 #else
2420 printf("0\n");
2421 yield = 0;
2422 #endif
2423 }
2424 else if (strcmp(argv[op + 1], "pcre16") == 0)
2425 {
2426 #ifdef SUPPORT_PCRE16
2427 printf("1\n");
2428 yield = 1;
2429 #else
2430 printf("0\n");
2431 yield = 0;
2432 #endif
2433 }
2434 else if (strcmp(argv[op + 1], "utf") == 0)
2435 {
2436 #ifdef SUPPORT_PCRE8
2437 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2438 printf("%d\n", rc);
2439 yield = rc;
2440 #else
2441 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2442 printf("%d\n", rc);
2443 yield = rc;
2444 #endif
2445 }
2446 else if (strcmp(argv[op + 1], "ucp") == 0)
2447 {
2448 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2449 printf("%d\n", rc);
2450 yield = rc;
2451 }
2452 else if (strcmp(argv[op + 1], "jit") == 0)
2453 {
2454 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2455 printf("%d\n", rc);
2456 yield = rc;
2457 }
2458 else if (strcmp(argv[op + 1], "newline") == 0)
2459 {
2460 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2461 print_newline_config(rc, TRUE);
2462 }
2463 else if (strcmp(argv[op + 1], "ebcdic") == 0)
2464 {
2465 #ifdef EBCDIC
2466 printf("1\n");
2467 yield = 1;
2468 #else
2469 printf("0\n");
2470 #endif
2471 }
2472 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2473 {
2474 #ifdef EBCDIC
2475 printf("0x%02x\n", CHAR_LF);
2476 #else
2477 printf("0\n");
2478 #endif
2479 }
2480 else
2481 {
2482 printf("Unknown -C option: %s\n", argv[op + 1]);
2483 }
2484 goto EXIT;
2485 }
2486
2487 /* No argument for -C: output all configuration information. */
2488
2489 printf("PCRE version %s\n", version);
2490 printf("Compiled with\n");
2491
2492 #ifdef EBCDIC
2493 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2494 #endif
2495
2496 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2497 are set, either both UTFs are supported or both are not supported. */
2498
2499 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2500 printf(" 8-bit and 16-bit support\n");
2501 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2502 if (rc)
2503 printf(" UTF-8 and UTF-16 support\n");
2504 else
2505 printf(" No UTF-8 or UTF-16 support\n");
2506 #elif defined SUPPORT_PCRE8
2507 printf(" 8-bit support only\n");
2508 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2509 printf(" %sUTF-8 support\n", rc? "" : "No ");
2510 #else
2511 printf(" 16-bit support only\n");
2512 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2513 printf(" %sUTF-16 support\n", rc? "" : "No ");
2514 #endif
2515
2516 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2517 printf(" %sUnicode properties support\n", rc? "" : "No ");
2518 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2519 if (rc)
2520 {
2521 const char *arch;
2522 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2523 printf(" Just-in-time compiler support: %s\n", arch);
2524 }
2525 else
2526 printf(" No just-in-time compiler support\n");
2527 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2528 print_newline_config(rc, FALSE);
2529 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2530 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2531 "all Unicode newlines");
2532 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2533 printf(" Internal link size = %d\n", rc);
2534 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2535 printf(" POSIX malloc threshold = %d\n", rc);
2536 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2537 printf(" Default match limit = %ld\n", lrc);
2538 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2539 printf(" Default recursion depth limit = %ld\n", lrc);
2540 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2541 printf(" Match recursion uses %s", rc? "stack" : "heap");
2542 if (showstore)
2543 {
2544 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2545 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2546 }
2547 printf("\n");
2548 goto EXIT;
2549 }
2550 else if (strcmp(arg, "-help") == 0 ||
2551 strcmp(arg, "--help") == 0)
2552 {
2553 usage();
2554 goto EXIT;
2555 }
2556 else
2557 {
2558 BAD_ARG:
2559 printf("** Unknown or malformed option %s\n", arg);
2560 usage();
2561 yield = 1;
2562 goto EXIT;
2563 }
2564 op++;
2565 argc--;
2566 }
2567
2568 /* Get the store for the offsets vector, and remember what it was */
2569
2570 size_offsets_max = size_offsets;
2571 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2572 if (offsets == NULL)
2573 {
2574 printf("** Failed to get %d bytes of memory for offsets vector\n",
2575 (int)(size_offsets_max * sizeof(int)));
2576 yield = 1;
2577 goto EXIT;
2578 }
2579
2580 /* Sort out the input and output files */
2581
2582 if (argc > 1)
2583 {
2584 infile = fopen(argv[op], INPUT_MODE);
2585 if (infile == NULL)
2586 {
2587 printf("** Failed to open %s\n", argv[op]);
2588 yield = 1;
2589 goto EXIT;
2590 }
2591 }
2592
2593 if (argc > 2)
2594 {
2595 outfile = fopen(argv[op+1], OUTPUT_MODE);
2596 if (outfile == NULL)
2597 {
2598 printf("** Failed to open %s\n", argv[op+1]);
2599 yield = 1;
2600 goto EXIT;
2601 }
2602 }
2603
2604 /* Set alternative malloc function */
2605
2606 #ifdef SUPPORT_PCRE8
2607 pcre_malloc = new_malloc;
2608 pcre_free = new_free;
2609 pcre_stack_malloc = stack_malloc;
2610 pcre_stack_free = stack_free;
2611 #endif
2612
2613 #ifdef SUPPORT_PCRE16
2614 pcre16_malloc = new_malloc;
2615 pcre16_free = new_free;
2616 pcre16_stack_malloc = stack_malloc;
2617 pcre16_stack_free = stack_free;
2618 #endif
2619
2620 /* Heading line unless quiet, then prompt for first regex if stdin */
2621
2622 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2623
2624 /* Main loop */
2625
2626 while (!done)
2627 {
2628 pcre *re = NULL;
2629 pcre_extra *extra = NULL;
2630
2631 #if !defined NOPOSIX /* There are still compilers that require no indent */
2632 regex_t preg;
2633 int do_posix = 0;
2634 #endif
2635
2636 const char *error;
2637 pcre_uint8 *markptr;
2638 pcre_uint8 *p, *pp, *ppp;
2639 pcre_uint8 *to_file = NULL;
2640 const pcre_uint8 *tables = NULL;
2641 unsigned long int get_options;
2642 unsigned long int true_size, true_study_size = 0;
2643 size_t size, regex_gotten_store;
2644 int do_allcaps = 0;
2645 int do_mark = 0;
2646 int do_study = 0;
2647 int no_force_study = 0;
2648 int do_debug = debug;
2649 int do_G = 0;
2650 int do_g = 0;
2651 int do_showinfo = showinfo;
2652 int do_showrest = 0;
2653 int do_showcaprest = 0;
2654 int do_flip = 0;
2655 int erroroffset, len, delimiter, poffset;
2656
2657 #if !defined NODFA
2658 int dfa_matched = 0;
2659 #endif
2660
2661 use_utf = 0;
2662 debug_lengths = 1;
2663
2664 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2665 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2666 fflush(outfile);
2667
2668 p = buffer;
2669 while (isspace(*p)) p++;
2670 if (*p == 0) continue;
2671
2672 /* See if the pattern is to be loaded pre-compiled from a file. */
2673
2674 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2675 {
2676 pcre_uint32 magic;
2677 pcre_uint8 sbuf[8];
2678 FILE *f;
2679
2680 p++;
2681 if (*p == '!')
2682 {
2683 do_debug = TRUE;
2684 do_showinfo = TRUE;
2685 p++;
2686 }
2687
2688 pp = p + (int)strlen((char *)p);
2689 while (isspace(pp[-1])) pp--;
2690 *pp = 0;
2691
2692 f = fopen((char *)p, "rb");
2693 if (f == NULL)
2694 {
2695 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2696 continue;
2697 }
2698
2699 first_gotten_store = 0;
2700 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2701
2702 true_size =
2703 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2704 true_study_size =
2705 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2706
2707 re = (pcre *)new_malloc(true_size);
2708 if (re == NULL)
2709 {
2710 printf("** Failed to get %d bytes of memory for pcre object\n",
2711 (int)true_size);
2712 yield = 1;
2713 goto EXIT;
2714 }
2715 regex_gotten_store = first_gotten_store;
2716
2717 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2718
2719 magic = ((REAL_PCRE *)re)->magic_number;
2720 if (magic != MAGIC_NUMBER)
2721 {
2722 if (swap_uint32(magic) == MAGIC_NUMBER)
2723 {
2724 do_flip = 1;
2725 }
2726 else
2727 {
2728 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2729 new_free(re);
2730 fclose(f);
2731 continue;
2732 }
2733 }
2734
2735 /* We hide the byte-invert info for little and big endian tests. */
2736 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2737 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2738
2739 /* Now see if there is any following study data. */
2740
2741 if (true_study_size != 0)
2742 {
2743 pcre_study_data *psd;
2744
2745 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2746 extra->flags = PCRE_EXTRA_STUDY_DATA;
2747
2748 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2749 extra->study_data = psd;
2750
2751 if (fread(psd, 1, true_study_size, f) != true_study_size)
2752 {
2753 FAIL_READ:
2754 fprintf(outfile, "Failed to read data from %s\n", p);
2755 if (extra != NULL)
2756 {
2757 PCRE_FREE_STUDY(extra);
2758 }
2759 new_free(re);
2760 fclose(f);
2761 continue;
2762 }
2763 fprintf(outfile, "Study data loaded from %s\n", p);
2764 do_study = 1; /* To get the data output if requested */
2765 }
2766 else fprintf(outfile, "No study data\n");
2767
2768 /* Flip the necessary bytes. */
2769 if (do_flip)
2770 {
2771 int rc;
2772 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2773 if (rc == PCRE_ERROR_BADMODE)
2774 {
2775 /* Simulate the result of the function call below. */
2776 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2777 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2778 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2779 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2780 new_free(re);
2781 fclose(f);
2782 continue;
2783 }
2784 }
2785
2786 /* Need to know if UTF-8 for printing data strings. */
2787
2788 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2789 {
2790 new_free(re);
2791 fclose(f);
2792 continue;
2793 }
2794 use_utf = (get_options & PCRE_UTF8) != 0;
2795
2796 fclose(f);
2797 goto SHOW_INFO;
2798 }
2799
2800 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2801 the pattern; if it isn't complete, read more. */
2802
2803 delimiter = *p++;
2804
2805 if (isalnum(delimiter) || delimiter == '\\')
2806 {
2807 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2808 goto SKIP_DATA;
2809 }
2810
2811 pp = p;
2812 poffset = (int)(p - buffer);
2813
2814 for(;;)
2815 {
2816 while (*pp != 0)
2817 {
2818 if (*pp == '\\' && pp[1] != 0) pp++;
2819 else if (*pp == delimiter) break;
2820 pp++;
2821 }
2822 if (*pp != 0) break;
2823 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2824 {
2825 fprintf(outfile, "** Unexpected EOF\n");
2826 done = 1;
2827 goto CONTINUE;
2828 }
2829 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2830 }
2831
2832 /* The buffer may have moved while being extended; reset the start of data
2833 pointer to the correct relative point in the buffer. */
2834
2835 p = buffer + poffset;
2836
2837 /* If the first character after the delimiter is backslash, make
2838 the pattern end with backslash. This is purely to provide a way
2839 of testing for the error message when a pattern ends with backslash. */
2840
2841 if (pp[1] == '\\') *pp++ = '\\';
2842
2843 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2844 for callouts. */
2845
2846 *pp++ = 0;
2847 strcpy((char *)pbuffer, (char *)p);
2848
2849 /* Look for options after final delimiter */
2850
2851 options = 0;
2852 study_options = force_study_options;
2853 log_store = showstore; /* default from command line */
2854
2855 while (*pp != 0)
2856 {
2857 switch (*pp++)
2858 {
2859 case 'f': options |= PCRE_FIRSTLINE; break;
2860 case 'g': do_g = 1; break;
2861 case 'i': options |= PCRE_CASELESS; break;
2862 case 'm': options |= PCRE_MULTILINE; break;
2863 case 's': options |= PCRE_DOTALL; break;
2864 case 'x': options |= PCRE_EXTENDED; break;
2865
2866 case '+':
2867 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2868 break;
2869
2870 case '=': do_allcaps = 1; break;
2871 case 'A': options |= PCRE_ANCHORED; break;
2872 case 'B': do_debug = 1; break;
2873 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2874 case 'D': do_debug = do_showinfo = 1; break;
2875 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2876 case 'F': do_flip = 1; break;
2877 case 'G': do_G = 1; break;
2878 case 'I': do_showinfo = 1; break;
2879 case 'J': options |= PCRE_DUPNAMES; break;
2880 case 'K': do_mark = 1; break;
2881 case 'M': log_store = 1; break;
2882 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2883
2884 #if !defined NOPOSIX
2885 case 'P': do_posix = 1; break;
2886 #endif
2887
2888 case 'S':
2889 do_study = 1;
2890 for (;;)
2891 {
2892 switch (*pp++)
2893 {
2894 case 'S':
2895 do_study = 0;
2896 no_force_study = 1;
2897 break;
2898
2899 case '!':
2900 study_options |= PCRE_STUDY_EXTRA_NEEDED;
2901 break;
2902
2903 case '+':
2904 if (*pp == '+')
2905 {
2906 verify_jit = TRUE;
2907 pp++;
2908 }
2909 if (*pp >= '1' && *pp <= '7')
2910 study_options |= jit_study_bits[*pp++ - '1'];
2911 else
2912 study_options |= jit_study_bits[6];
2913 break;
2914
2915 case '-':
2916 study_options &= ~PCRE_STUDY_ALLJIT;
2917 break;
2918
2919 default:
2920 pp--;
2921 goto ENDLOOP;
2922 }
2923 }
2924 ENDLOOP:
2925 break;
2926
2927 case 'U': options |= PCRE_UNGREEDY; break;
2928 case 'W': options |= PCRE_UCP; break;
2929 case 'X': options |= PCRE_EXTRA; break;
2930 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2931 case 'Z': debug_lengths = 0; break;
2932 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2933 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2934
2935 case 'T':
2936 switch (*pp++)
2937 {
2938 case '0': tables = tables0; break;
2939 case '1': tables = tables1; break;
2940
2941 case '\r':
2942 case '\n':
2943 case ' ':
2944 case 0:
2945 fprintf(outfile, "** Missing table number after /T\n");
2946 goto SKIP_DATA;
2947
2948 default:
2949 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2950 goto SKIP_DATA;
2951 }
2952 break;
2953
2954 case 'L':
2955 ppp = pp;
2956 /* The '\r' test here is so that it works on Windows. */
2957 /* The '0' test is just in case this is an unterminated line. */
2958 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2959 *ppp = 0;
2960 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2961 {
2962 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2963 goto SKIP_DATA;
2964 }
2965 locale_set = 1;
2966 tables = PCRE_MAKETABLES;
2967 pp = ppp;
2968 break;
2969
2970 case '>':
2971 to_file = pp;
2972 while (*pp != 0) pp++;
2973 while (isspace(pp[-1])) pp--;
2974 *pp = 0;
2975 break;
2976
2977 case '<':
2978 {
2979 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2980 {
2981 options |= PCRE_JAVASCRIPT_COMPAT;
2982 pp += 3;
2983 }
2984 else
2985 {
2986 int x = check_newline(pp, outfile);
2987 if (x == 0) goto SKIP_DATA;
2988 options |= x;
2989 while (*pp++ != '>');
2990 }
2991 }
2992 break;
2993
2994 case '\r': /* So that it works in Windows */
2995 case '\n':
2996 case ' ':
2997 break;
2998
2999 default:
3000 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3001 goto SKIP_DATA;
3002 }
3003 }
3004
3005 /* Handle compiling via the POSIX interface, which doesn't support the
3006 timing, showing, or debugging options, nor the ability to pass over
3007 local character tables. Neither does it have 16-bit support. */
3008
3009 #if !defined NOPOSIX
3010 if (posix || do_posix)
3011 {
3012 int rc;
3013 int cflags = 0;
3014
3015 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3016 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3017 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3018 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3019 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3020 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3021 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3022
3023 first_gotten_store = 0;
3024 rc = regcomp(&preg, (char *)p, cflags);
3025
3026 /* Compilation failed; go back for another re, skipping to blank line
3027 if non-interactive. */
3028
3029 if (rc != 0)
3030 {
3031 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3032 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3033 goto SKIP_DATA;
3034 }
3035 }
3036
3037 /* Handle compiling via the native interface */
3038
3039 else
3040 #endif /* !defined NOPOSIX */
3041
3042 {
3043 /* In 16-bit mode, convert the input. */
3044
3045 #ifdef SUPPORT_PCRE16
3046 if (use_pcre16)
3047 {
3048 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3049 {
3050 case -1:
3051 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3052 "converted to UTF-16\n");
3053 goto SKIP_DATA;
3054
3055 case -2:
3056 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3057 "cannot be converted to UTF-16\n");
3058 goto SKIP_DATA;
3059
3060 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3061 fprintf(outfile, "**Failed: character value greater than 0xffff "
3062 "cannot be converted to 16-bit in non-UTF mode\n");
3063 goto SKIP_DATA;
3064
3065 default:
3066 break;
3067 }
3068 p = (pcre_uint8 *)buffer16;
3069 }
3070 #endif
3071
3072 /* Compile many times when timing */
3073
3074 if (timeit > 0)
3075 {
3076 register int i;
3077 clock_t time_taken;
3078 clock_t start_time = clock();
3079 for (i = 0; i < timeit; i++)
3080 {
3081 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3082 if (re != NULL) free(re);
3083 }
3084 time_taken = clock() - start_time;
3085 fprintf(outfile, "Compile time %.4f milliseconds\n",
3086 (((double)time_taken * 1000.0) / (double)timeit) /
3087 (double)CLOCKS_PER_SEC);
3088 }
3089
3090 first_gotten_store = 0;
3091 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3092
3093 /* Compilation failed; go back for another re, skipping to blank line
3094 if non-interactive. */
3095
3096 if (re == NULL)
3097 {
3098 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3099 SKIP_DATA:
3100 if (infile != stdin)
3101 {
3102 for (;;)
3103 {
3104 if (extend_inputline(infile, buffer, NULL) == NULL)
3105 {
3106 done = 1;
3107 goto CONTINUE;
3108 }
3109 len = (int)strlen((char *)buffer);
3110 while (len > 0 && isspace(buffer[len-1])) len--;
3111 if (len == 0) break;
3112 }
3113 fprintf(outfile, "\n");
3114 }
3115 goto CONTINUE;
3116 }
3117
3118 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3119 within the regex; check for this so that we know how to process the data
3120 lines. */
3121
3122 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3123 goto SKIP_DATA;
3124 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3125
3126 /* Extract the size for possible writing before possibly flipping it,
3127 and remember the store that was got. */
3128
3129 true_size = ((REAL_PCRE *)re)->size;
3130 regex_gotten_store = first_gotten_store;
3131
3132 /* Output code size information if requested */
3133
3134 if (log_store)
3135 fprintf(outfile, "Memory allocation (code space): %d\n",
3136 (int)(first_gotten_store -
3137 sizeof(REAL_PCRE) -
3138 ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3139
3140 /* If -s or /S was present, study the regex to generate additional info to
3141 help with the matching, unless the pattern has the SS option, which
3142 suppresses the effect of /S (used for a few test patterns where studying is
3143 never sensible). */
3144
3145 if (do_study || (force_study >= 0 && !no_force_study))
3146 {
3147 if (timeit > 0)
3148 {
3149 register int i;
3150 clock_t time_taken;
3151 clock_t start_time = clock();
3152 for (i = 0; i < timeit; i++)
3153 {
3154 PCRE_STUDY(extra, re, study_options, &error);
3155 }
3156 time_taken = clock() - start_time;
3157 if (extra != NULL)
3158 {
3159 PCRE_FREE_STUDY(extra);
3160 }
3161 fprintf(outfile, " Study time %.4f milliseconds\n",
3162 (((double)time_taken * 1000.0) / (double)timeit) /
3163 (double)CLOCKS_PER_SEC);
3164 }
3165 PCRE_STUDY(extra, re, study_options, &error);
3166 if (error != NULL)
3167 fprintf(outfile, "Failed to study: %s\n", error);
3168 else if (extra != NULL)
3169 {
3170 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3171 if (log_store)
3172 {
3173 size_t jitsize;
3174 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3175 jitsize != 0)
3176 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3177 }
3178 }
3179 }
3180
3181 /* If /K was present, we set up for handling MARK data. */
3182
3183 if (do_mark)
3184 {
3185 if (extra == NULL)
3186 {
3187 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3188 extra->flags = 0;
3189 }
3190 extra->mark = &markptr;
3191 extra->flags |= PCRE_EXTRA_MARK;
3192 }
3193
3194 /* Extract and display information from the compiled data if required. */
3195
3196 SHOW_INFO:
3197
3198 if (do_debug)
3199 {
3200 fprintf(outfile, "------------------------------------------------------------------\n");
3201 PCRE_PRINTINT(re, outfile, debug_lengths);
3202 }
3203
3204 /* We already have the options in get_options (see above) */
3205
3206 if (do_showinfo)
3207 {
3208 unsigned long int all_options;
3209 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3210 hascrorlf, maxlookbehind;
3211 int nameentrysize, namecount;
3212 const pcre_uint8 *nametable;
3213
3214 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3215 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3216 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3217 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3218 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3219 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3220 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3221 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3222 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3223 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3224 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3225 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3226 != 0)
3227 goto SKIP_DATA;
3228
3229 if (size != regex_gotten_store) fprintf(outfile,
3230 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3231 (int)size, (int)regex_gotten_store);
3232
3233 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3234 if (backrefmax > 0)
3235 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3236
3237 if (namecount > 0)
3238 {
3239 fprintf(outfile, "Named capturing subpatterns:\n");
3240 while (namecount-- > 0)
3241 {
3242 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3243 int imm2_size = use_pcre16 ? 1 : 2;
3244 #else
3245 int imm2_size = IMM2_SIZE;
3246 #endif
3247 int length = (int)STRLEN(nametable + imm2_size);
3248 fprintf(outfile, " ");
3249 PCHARSV(nametable, imm2_size, length, outfile);
3250 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3251 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3252 fprintf(outfile, "%3d\n", use_pcre16?
3253 (int)(((PCRE_SPTR16)nametable)[0])
3254 :((int)nametable[0] << 8) | (int)nametable[1]);
3255 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3256 #else
3257 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3258 #ifdef SUPPORT_PCRE8
3259 nametable += nameentrysize;
3260 #else
3261 nametable += nameentrysize * 2;
3262 #endif
3263 #endif
3264 }
3265 }
3266
3267 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3268 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3269
3270 all_options = ((REAL_PCRE *)re)->options;
3271 if (do_flip) all_options = swap_uint32(all_options);
3272
3273 if (get_options == 0) fprintf(outfile, "No options\n");
3274 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3275 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3276 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3277 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3278 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3279 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3280 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3281 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3282 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3283 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3284 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3285 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3286 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3287 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3288 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3289 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3290 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3291 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3292
3293 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3294
3295 switch (get_options & PCRE_NEWLINE_BITS)
3296 {
3297 case PCRE_NEWLINE_CR:
3298 fprintf(outfile, "Forced newline sequence: CR\n");
3299 break;
3300
3301 case PCRE_NEWLINE_LF:
3302 fprintf(outfile, "Forced newline sequence: LF\n");
3303 break;
3304
3305 case PCRE_NEWLINE_CRLF:
3306 fprintf(outfile, "Forced newline sequence: CRLF\n");
3307 break;
3308
3309 case PCRE_NEWLINE_ANYCRLF:
3310 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3311 break;
3312
3313 case PCRE_NEWLINE_ANY:
3314 fprintf(outfile, "Forced newline sequence: ANY\n");
3315 break;
3316
3317 default:
3318 break;
3319 }
3320
3321 if (first_char == -1)
3322 {
3323 fprintf(outfile, "First char at start or follows newline\n");
3324 }
3325 else if (first_char < 0)
3326 {
3327 fprintf(outfile, "No first char\n");
3328 }
3329 else
3330 {
3331 const char *caseless =
3332 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3333 "" : " (caseless)";
3334
3335 if (PRINTOK(first_char))
3336 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3337 else
3338 {
3339 fprintf(outfile, "First char = ");
3340 pchar(first_char, outfile);
3341 fprintf(outfile, "%s\n", caseless);
3342 }
3343 }
3344
3345 if (need_char < 0)
3346 {
3347 fprintf(outfile, "No need char\n");
3348 }
3349 else
3350 {
3351 const char *caseless =
3352 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3353 "" : " (caseless)";
3354
3355 if (PRINTOK(need_char))
3356 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3357 else
3358 {
3359 fprintf(outfile, "Need char = ");
3360 pchar(need_char, outfile);
3361 fprintf(outfile, "%s\n", caseless);
3362 }
3363 }
3364
3365 if (maxlookbehind > 0)
3366 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3367
3368 /* Don't output study size; at present it is in any case a fixed
3369 value, but it varies, depending on the computer architecture, and
3370 so messes up the test suite. (And with the /F option, it might be
3371 flipped.) If study was forced by an external -s, don't show this
3372 information unless -i or -d was also present. This means that, except
3373 when auto-callouts are involved, the output from runs with and without
3374 -s should be identical. */
3375
3376 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3377 {
3378 if (extra == NULL)
3379 fprintf(outfile, "Study returned NULL\n");
3380 else
3381 {
3382 pcre_uint8 *start_bits = NULL;
3383 int minlength;
3384
3385 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3386 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3387
3388 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3389 {
3390 if (start_bits == NULL)
3391 fprintf(outfile, "No set of starting bytes\n");
3392 else
3393 {
3394 int i;
3395 int c = 24;
3396 fprintf(outfile, "Starting byte set: ");
3397 for (i = 0; i < 256; i++)
3398 {
3399 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3400 {
3401 if (c > 75)
3402 {
3403 fprintf(outfile, "\n ");
3404 c = 2;
3405 }
3406 if (PRINTOK(i) && i != ' ')
3407 {
3408 fprintf(outfile, "%c ", i);
3409 c += 2;
3410 }
3411 else
3412 {
3413 fprintf(outfile, "\\x%02x ", i);
3414 c += 5;
3415 }
3416 }
3417 }
3418 fprintf(outfile, "\n");
3419 }
3420 }
3421 }
3422
3423 /* Show this only if the JIT was set by /S, not by -s. */
3424
3425 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3426 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3427 {
3428 int jit;
3429 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3430 {
3431 if (jit)
3432 fprintf(outfile, "JIT study was successful\n");
3433 else
3434 #ifdef SUPPORT_JIT
3435 fprintf(outfile, "JIT study was not successful\n");
3436 #else
3437 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3438 #endif
3439 }
3440 }
3441 }
3442 }
3443
3444 /* If the '>' option was present, we write out the regex to a file, and
3445 that is all. The first 8 bytes of the file are the regex length and then
3446 the study length, in big-endian order. */
3447
3448 if (to_file != NULL)
3449 {
3450 FILE *f = fopen((char *)to_file, "wb");
3451 if (f == NULL)
3452 {
3453 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3454 }
3455 else
3456 {
3457 pcre_uint8 sbuf[8];
3458
3459 if (do_flip) regexflip(re, extra);
3460 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3461 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3462 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3463 sbuf[3] = (pcre_uint8)((true_size) & 255);
3464 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3465 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3466 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3467 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3468
3469 if (fwrite(sbuf, 1, 8, f) < 8 ||
3470 fwrite(re, 1, true_size, f) < true_size)
3471 {
3472 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3473 }
3474 else
3475 {
3476 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3477
3478 /* If there is study data, write it. */
3479
3480 if (extra != NULL)
3481 {
3482 if (fwrite(extra->study_data, 1, true_study_size, f) <
3483 true_study_size)
3484 {
3485 fprintf(outfile, "Write error on %s: %s\n", to_file,
3486 strerror(errno));
3487 }
3488 else fprintf(outfile, "Study data written to %s\n", to_file);
3489 }
3490 }
3491 fclose(f);
3492 }
3493
3494 new_free(re);
3495 if (extra != NULL)
3496 {
3497 PCRE_FREE_STUDY(extra);
3498 }
3499 if (locale_set)
3500 {
3501 new_free((void *)tables);
3502 setlocale(LC_CTYPE, "C");
3503 locale_set = 0;
3504 }
3505 continue; /* With next regex */
3506 }
3507 } /* End of non-POSIX compile */
3508
3509 /* Read data lines and test them */
3510
3511 for (;;)
3512 {
3513 pcre_uint8 *q;
3514 pcre_uint8 *bptr;
3515 int *use_offsets = offsets;
3516 int use_size_offsets = size_offsets;
3517 int callout_data = 0;
3518 int callout_data_set = 0;
3519 int count, c;
3520 int copystrings = 0;
3521 int find_match_limit = default_find_match_limit;
3522 int getstrings = 0;
3523 int getlist = 0;
3524 int gmatched = 0;
3525 int start_offset = 0;
3526 int start_offset_sign = 1;
3527 int g_notempty = 0;
3528 int use_dfa = 0;
3529
3530 *copynames = 0;
3531 *getnames = 0;
3532
3533 #ifdef SUPPORT_PCRE16
3534 cn16ptr = copynames;
3535 gn16ptr = getnames;
3536 #endif
3537 #ifdef SUPPORT_PCRE8
3538 cn8ptr = copynames8;
3539 gn8ptr = getnames8;
3540 #endif
3541
3542 SET_PCRE_CALLOUT(callout);
3543 first_callout = 1;
3544 last_callout_mark = NULL;
3545 callout_extra = 0;
3546 callout_count = 0;
3547 callout_fail_count = 999999;
3548 callout_fail_id = -1;
3549 show_malloc = 0;
3550 options = 0;
3551
3552 if (extra != NULL) extra->flags &=
3553 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3554
3555 len = 0;
3556 for (;;)
3557 {
3558 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3559 {
3560 if (len > 0) /* Reached EOF without hitting a newline */
3561 {
3562 fprintf(outfile, "\n");
3563 break;
3564 }
3565 done = 1;
3566 goto CONTINUE;
3567 }
3568 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3569 len = (int)strlen((char *)buffer);
3570 if (buffer[len-1] == '\n') break;
3571 }
3572
3573 while (len > 0 && isspace(buffer[len-1])) len--;
3574 buffer[len] = 0;
3575 if (len == 0) break;
3576
3577 p = buffer;
3578 while (isspace(*p)) p++;
3579
3580 bptr = q = dbuffer;
3581 while ((c = *p++) != 0)
3582 {
3583 int i = 0;
3584 int n = 0;
3585
3586 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3587 In non-UTF mode, allow the value of the byte to fall through to later,
3588 where values greater than 127 are turned into UTF-8 when running in
3589 16-bit mode. */
3590
3591 if (c != '\\')
3592 {
3593 if (use_utf)
3594 {
3595 *q++ = c;
3596 continue;
3597 }
3598 }
3599
3600 /* Handle backslash escapes */
3601
3602 else switch ((c = *p++))
3603 {
3604 case 'a': c = 7; break;
3605 case 'b': c = '\b'; break;
3606 case 'e': c = 27; break;
3607 case 'f': c = '\f'; break;
3608 case 'n': c = '\n'; break;
3609 case 'r': c = '\r'; break;
3610 case 't': c = '\t'; break;
3611 case 'v': c = '\v'; break;
3612
3613 case '0': case '1': case '2': case '3':
3614 case '4': case '5': case '6': case '7':
3615 c -= '0';
3616 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3617 c = c * 8 + *p++ - '0';
3618 break;
3619
3620 case 'x':
3621 if (*p == '{')
3622 {
3623 pcre_uint8 *pt = p;
3624 c = 0;
3625
3626 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3627 when isxdigit() is a macro that refers to its argument more than
3628 once. This is banned by the C Standard, but apparently happens in at
3629 least one MacOS environment. */
3630
3631 for (pt++; isxdigit(*pt); pt++)
3632 {
3633 if (++i == 9)
3634 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3635 "using only the first eight.\n");
3636 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3637 }
3638 if (*pt == '}')
3639 {
3640 p = pt + 1;
3641 break;
3642 }
3643 /* Not correct form for \x{...}; fall through */
3644 }
3645
3646 /* \x without {} always defines just one byte in 8-bit mode. This
3647 allows UTF-8 characters to be constructed byte by byte, and also allows
3648 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3649 Otherwise, pass it down to later code so that it can be turned into
3650 UTF-8 when running in 16-bit mode. */
3651
3652 c = 0;
3653 while (i++ < 2 && isxdigit(*p))
3654 {
3655 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3656 p++;
3657 }
3658 if (use_utf)
3659 {
3660 *q++ = c;
3661 continue;
3662 }
3663 break;
3664
3665 case 0: /* \ followed by EOF allows for an empty line */
3666 p--;
3667 continue;
3668
3669 case '>':
3670 if (*p == '-')
3671 {
3672 start_offset_sign = -1;
3673 p++;
3674 }
3675 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3676 start_offset *= start_offset_sign;
3677 continue;
3678
3679 case 'A': /* Option setting */
3680 options |= PCRE_ANCHORED;
3681 continue;
3682
3683 case 'B':
3684 options |= PCRE_NOTBOL;
3685 continue;
3686
3687 case 'C':
3688 if (isdigit(*p)) /* Set copy string */
3689 {
3690 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3691 copystrings |= 1 << n;
3692 }
3693 else if (isalnum(*p))
3694 {
3695 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3696 }
3697 else if (*p == '+')
3698 {
3699 callout_extra = 1;
3700 p++;
3701 }
3702 else if (*p == '-')
3703 {
3704 SET_PCRE_CALLOUT(NULL);
3705 p++;
3706 }
3707 else if (*p == '!')
3708 {
3709 callout_fail_id = 0;
3710 p++;
3711 while(isdigit(*p))
3712 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3713 callout_fail_count = 0;
3714 if (*p == '!')
3715 {
3716 p++;
3717 while(isdigit(*p))
3718 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3719 }
3720 }
3721 else if (*p == '*')
3722 {
3723 int sign = 1;
3724 callout_data = 0;
3725 if (*(++p) == '-') { sign = -1; p++; }
3726 while(isdigit(*p))
3727 callout_data = callout_data * 10 + *p++ - '0';
3728 callout_data *= sign;
3729 callout_data_set = 1;
3730 }
3731 continue;
3732
3733 #if !defined NODFA
3734 case 'D':
3735 #if !defined NOPOSIX
3736 if (posix || do_posix)
3737 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3738 else
3739 #endif
3740 use_dfa = 1;
3741 continue;
3742 #endif
3743
3744 #if !defined NODFA
3745 case 'F':
3746 options |= PCRE_DFA_SHORTEST;
3747 continue;
3748 #endif
3749
3750 case 'G':
3751 if (isdigit(*p))
3752 {
3753 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3754 getstrings |= 1 << n;
3755 }
3756 else if (isalnum(*p))
3757 {
3758 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3759 }
3760 continue;
3761
3762 case 'J':
3763 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3764 if (extra != NULL
3765 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3766 && extra->executable_jit != NULL)
3767 {
3768 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3769 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3770 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3771 }
3772 continue;
3773
3774 case 'L':
3775 getlist = 1;
3776 continue;
3777
3778 case 'M':
3779 find_match_limit = 1;
3780 continue;
3781
3782 case 'N':
3783 if ((options & PCRE_NOTEMPTY) != 0)
3784 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3785 else
3786 options |= PCRE_NOTEMPTY;
3787 continue;
3788
3789 case 'O':
3790 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3791 if (n > size_offsets_max)
3792 {
3793 size_offsets_max = n;
3794 free(offsets);
3795 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3796 if (offsets == NULL)
3797 {
3798 printf("** Failed to get %d bytes of memory for offsets vector\n",
3799 (int)(size_offsets_max * sizeof(int)));
3800 yield = 1;
3801 goto EXIT;
3802 }
3803 }
3804 use_size_offsets = n;
3805 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3806 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
3807 continue;
3808
3809 case 'P':
3810 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3811 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3812 continue;
3813
3814 case 'Q':
3815 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3816 if (extra == NULL)
3817 {
3818 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3819 extra->flags = 0;
3820 }
3821 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3822 extra->match_limit_recursion = n;
3823 continue;
3824
3825 case 'q':
3826 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3827 if (extra == NULL)
3828 {
3829 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3830 extra->flags = 0;
3831 }
3832 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3833 extra->match_limit = n;
3834 continue;
3835
3836 #if !defined NODFA
3837 case 'R':
3838 options |= PCRE_DFA_RESTART;
3839 continue;
3840 #endif
3841
3842 case 'S':
3843 show_malloc = 1;
3844 continue;
3845
3846 case 'Y':
3847 options |= PCRE_NO_START_OPTIMIZE;
3848 continue;
3849
3850 case 'Z':
3851 options |= PCRE_NOTEOL;
3852 continue;
3853
3854 case '?':
3855 options |= PCRE_NO_UTF8_CHECK;
3856 continue;
3857
3858 case '<':
3859 {
3860 int x = check_newline(p, outfile);
3861 if (x == 0) goto NEXT_DATA;
3862 options |= x;
3863 while (*p++ != '>');
3864 }
3865 continue;
3866 }
3867
3868 /* We now have a character value in c that may be greater than 255. In
3869 16-bit mode, we always convert characters to UTF-8 so that values greater
3870 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3871 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3872 mode must have come from \x{...} or octal constructs because values from
3873 \x.. get this far only in non-UTF mode. */
3874
3875 #if !defined NOUTF || defined SUPPORT_PCRE16
3876 if (use_pcre16 || use_utf)
3877 {
3878 pcre_uint8 buff8[8];
3879 int ii, utn;
3880 utn = ord2utf8(c, buff8);
3881 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3882 }
3883 else
3884 #endif
3885 {
3886 if (c > 255)
3887 {
3888 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3889 "and UTF-8 mode is not enabled.\n", c);
3890 fprintf(outfile, "** Truncation will probably give the wrong "
3891 "result.\n");
3892 }
3893 *q++ = c;
3894 }
3895 }
3896
3897 /* Reached end of subject string */
3898
3899 *q = 0;
3900 len = (int)(q - dbuffer);
3901
3902 /* Move the data to the end of the buffer so that a read over the end of
3903 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3904 we are using the POSIX interface, we must include the terminating zero. */
3905
3906 #if !defined NOPOSIX
3907 if (posix || do_posix)
3908 {
3909 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3910 bptr += buffer_size - len - 1;
3911 }
3912 else
3913 #endif
3914 {
3915 memmove(bptr + buffer_size - len, bptr, len);
3916 bptr += buffer_size - len;
3917 }
3918
3919 if ((all_use_dfa || use_dfa) && find_match_limit)
3920 {
3921 printf("**Match limit not relevant for DFA matching: ignored\n");
3922 find_match_limit = 0;
3923 }
3924
3925 /* Handle matching via the POSIX interface, which does not
3926 support timing or playing with the match limit or callout data. */
3927
3928 #if !defined NOPOSIX
3929 if (posix || do_posix)
3930 {
3931 int rc;
3932 int eflags = 0;
3933 regmatch_t *pmatch = NULL;
3934 if (use_size_offsets > 0)
3935 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3936 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3937 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3938 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3939
3940 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3941
3942 if (rc != 0)
3943 {
3944 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3945 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3946 }
3947 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3948 != 0)
3949 {
3950 fprintf(outfile, "Matched with REG_NOSUB\n");
3951 }
3952 else
3953 {
3954 size_t i;
3955 for (i = 0; i < (size_t)use_size_offsets; i++)
3956 {
3957 if (pmatch[i].rm_so >= 0)
3958 {
3959 fprintf(outfile, "%2d: ", (int)i);
3960 PCHARSV(dbuffer, pmatch[i].rm_so,
3961 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3962 fprintf(outfile, "\n");
3963 if (do_showcaprest || (i == 0 && do_showrest))
3964 {
3965 fprintf(outfile, "%2d+ ", (int)i);
3966 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3967 outfile);
3968 fprintf(outfile, "\n");
3969 }
3970 }
3971 }
3972 }
3973 free(pmatch);
3974 goto NEXT_DATA;
3975 }
3976
3977 #endif /* !defined NOPOSIX */
3978
3979 /* Handle matching via the native interface - repeats for /g and /G */
3980
3981 #ifdef SUPPORT_PCRE16
3982 if (use_pcre16)
3983 {
3984 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3985 switch(len)
3986 {
3987 case -1:
3988 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3989 "converted to UTF-16\n");
3990 goto NEXT_DATA;
3991
3992 case -2:
3993 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3994 "cannot be converted to UTF-16\n");
3995 goto NEXT_DATA;
3996
3997 case -3:
3998 fprintf(outfile, "**Failed: character value greater than 0xffff "
3999 "cannot be converted to 16-bit in non-UTF mode\n");
4000 goto NEXT_DATA;
4001
4002 default:
4003 break;
4004 }
4005 bptr = (pcre_uint8 *)buffer16;
4006 }
4007 #endif
4008
4009 /* Ensure that there is a JIT callback if we want to verify that JIT was
4010 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4011
4012 if (verify_jit && jit_stack == NULL && extra != NULL)
4013 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4014
4015 for (;; gmatched++) /* Loop for /g or /G */
4016 {
4017 markptr = NULL;
4018 jit_was_used = FALSE;
4019
4020 if (timeitm > 0)
4021 {
4022 register int i;
4023 clock_t time_taken;
4024 clock_t start_time = clock();
4025
4026 #if !defined NODFA
4027 if (all_use_dfa || use_dfa)
4028 {
4029 if ((options & PCRE_DFA_RESTART) != 0)
4030 {
4031 fprintf(outfile, "Timing DFA restarts is not supported\n");
4032 break;
4033 }
4034 if (dfa_workspace == NULL)
4035 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4036 for (i = 0; i < timeitm; i++)
4037 {
4038 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4039 (options | g_notempty), use_offsets, use_size_offsets,
4040 dfa_workspace, DFA_WS_DIMENSION);
4041 }
4042 }
4043 else
4044 #endif
4045
4046 for (i = 0; i < timeitm; i++)
4047 {
4048 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4049 (options | g_notempty), use_offsets, use_size_offsets);
4050 }
4051 time_taken = clock() - start_time;
4052 fprintf(outfile, "Execute time %.4f milliseconds\n",
4053 (((double)time_taken * 1000.0) / (double)timeitm) /
4054 (double)CLOCKS_PER_SEC);
4055 }
4056
4057 /* If find_match_limit is set, we want to do repeated matches with
4058 varying limits in order to find the minimum value for the match limit and
4059 for the recursion limit. The match limits are relevant only to the normal
4060 running of pcre_exec(), so disable the JIT optimization. This makes it
4061 possible to run the same set of tests with and without JIT externally
4062 requested. */
4063
4064 if (find_match_limit)
4065 {
4066 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4067 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4068 extra->flags = 0;
4069
4070 (void)check_match_limit(re, extra, bptr, len, start_offset,
4071 options|g_notempty, use_offsets, use_size_offsets,
4072 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4073 PCRE_ERROR_MATCHLIMIT, "match()");
4074
4075 count = check_match_limit(re, extra, bptr, len, start_offset,
4076 options|g_notempty, use_offsets, use_size_offsets,
4077 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4078 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4079 }
4080
4081 /* If callout_data is set, use the interface with additional data */
4082
4083 else if (callout_data_set)
4084 {
4085 if (extra == NULL)
4086 {
4087 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4088 extra->flags = 0;
4089 }
4090 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4091 extra->callout_data = &callout_data;
4092 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4093 options | g_notempty, use_offsets, use_size_offsets);
4094 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4095 }
4096
4097 /* The normal case is just to do the match once, with the default
4098 value of match_limit. */
4099
4100 #if !defined NODFA
4101 else if (all_use_dfa || use_dfa)
4102 {
4103 if (dfa_workspace == NULL)
4104 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4105 if (dfa_matched++ == 0)
4106 dfa_workspace[0] = -1; /* To catch bad restart */
4107 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4108 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4109 DFA_WS_DIMENSION);
4110 if (count == 0)
4111 {
4112 fprintf(outfile, "Matched, but too many subsidiary matches\n");
4113 count = use_size_offsets/2;
4114 }
4115 }
4116 #endif
4117
4118 else
4119 {
4120 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4121 options | g_notempty, use_offsets, use_size_offsets);
4122 if (count == 0)
4123 {
4124 fprintf(outfile, "Matched, but too many substrings\n");
4125 count = use_size_offsets/3;
4126 }
4127 }
4128
4129 /* Matched */
4130
4131 if (count >= 0)
4132 {
4133 int i, maxcount;
4134 void *cnptr, *gnptr;
4135
4136 #if !defined NODFA
4137 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4138 #endif
4139 maxcount = use_size_offsets/3;
4140
4141 /* This is a check against a lunatic return value. */
4142
4143 if (count > maxcount)
4144 {
4145 fprintf(outfile,
4146 "** PCRE error: returned count %d is too big for offset size %d\n",
4147 count, use_size_offsets);
4148 count = use_size_offsets/3;
4149 if (do_g || do_G)
4150 {
4151 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4152 do_g = do_G = FALSE; /* Break g/G loop */
4153 }
4154 }
4155
4156 /* do_allcaps requests showing of all captures in the pattern, to check
4157 unset ones at the end. */
4158
4159 if (do_allcaps)
4160 {
4161 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4162 goto SKIP_DATA;
4163 count++; /* Allow for full match */
4164 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4165 }
4166
4167 /* Output the captured substrings */
4168
4169 for (i = 0; i < count * 2; i += 2)
4170 {
4171 if (use_offsets[i] < 0)
4172 {
4173 if (use_offsets[i] != -1)
4174 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4175 use_offsets[i], i);
4176 if (use_offsets[i+1] != -1)
4177 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4178 use_offsets[i+1], i+1);
4179 fprintf(outfile, "%2d: <unset>\n", i/2);
4180 }
4181 else
4182 {
4183 fprintf(outfile, "%2d: ", i/2);
4184 PCHARSV(bptr, use_offsets[i],
4185 use_offsets[i+1] - use_offsets[i], outfile);
4186 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4187 fprintf(outfile, "\n");
4188 if (do_showcaprest || (i == 0 && do_showrest))
4189 {
4190 fprintf(outfile, "%2d+ ", i/2);
4191 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4192 outfile);
4193 fprintf(outfile, "\n");
4194 }
4195 }
4196 }
4197
4198 if (markptr != NULL)
4199 {
4200 fprintf(outfile, "MK: ");
4201 PCHARSV(markptr, 0, -1, outfile);
4202 fprintf(outfile, "\n");
4203 }
4204
4205 for (i = 0; i < 32; i++)
4206 {
4207 if ((copystrings & (1 << i)) != 0)
4208 {
4209 int rc;
4210 char copybuffer[256];
4211 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4212 copybuffer, sizeof(copybuffer));
4213 if (rc < 0)
4214 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4215 else
4216 {
4217 fprintf(outfile, "%2dC ", i);
4218 PCHARSV(copybuffer, 0, rc, outfile);
4219 fprintf(outfile, " (%d)\n", rc);
4220 }
4221 }
4222 }
4223
4224 cnptr = copynames;
4225 for (;;)
4226 {
4227 int rc;
4228 char copybuffer[256];
4229
4230 if (use_pcre16)
4231 {
4232 if (*(pcre_uint16 *)cnptr == 0) break;
4233 }
4234 else
4235 {
4236 if (*(pcre_uint8 *)cnptr == 0) break;
4237 }
4238
4239 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4240 cnptr, copybuffer, sizeof(copybuffer));
4241
4242 if (rc < 0)
4243 {
4244 fprintf(outfile, "copy substring ");
4245 PCHARSV(cnptr, 0, -1, outfile);
4246 fprintf(outfile, " failed %d\n", rc);
4247 }
4248 else
4249 {
4250 fprintf(outfile, " C ");
4251 PCHARSV(copybuffer, 0, rc, outfile);
4252 fprintf(outfile, " (%d) ", rc);
4253 PCHARSV(cnptr, 0, -1, outfile);
4254 putc('\n', outfile);
4255 }
4256
4257 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4258 }
4259
4260 for (i = 0; i < 32; i++)
4261 {
4262 if ((getstrings & (1 << i)) != 0)
4263 {
4264 int rc;
4265 const char *substring;
4266 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4267 if (rc < 0)
4268 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4269 else
4270 {
4271 fprintf(outfile, "%2dG ", i);
4272 PCHARSV(substring, 0, rc, outfile);
4273 fprintf(outfile, " (%d)\n", rc);
4274 PCRE_FREE_SUBSTRING(substring);
4275 }
4276 }
4277 }
4278
4279 gnptr = getnames;
4280 for (;;)
4281 {
4282 int rc;
4283 const char *substring;
4284
4285 if (use_pcre16)
4286 {
4287 if (*(pcre_uint16 *)gnptr == 0) break;
4288 }
4289 else
4290 {
4291 if (*(pcre_uint8 *)gnptr == 0) break;
4292 }
4293
4294 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4295 gnptr, &substring);
4296 if (rc < 0)
4297 {
4298 fprintf(outfile, "get substring ");
4299 PCHARSV(gnptr, 0, -1, outfile);
4300 fprintf(outfile, " failed %d\n", rc);
4301 }
4302 else
4303 {
4304 fprintf(outfile, " G ");
4305 PCHARSV(substring, 0, rc, outfile);
4306 fprintf(outfile, " (%d) ", rc);
4307 PCHARSV(gnptr, 0, -1, outfile);
4308 PCRE_FREE_SUBSTRING(substring);
4309 putc('\n', outfile);
4310 }
4311
4312 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4313 }
4314
4315 if (getlist)
4316 {
4317 int rc;
4318 const char **stringlist;
4319 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4320 if (rc < 0)
4321 fprintf(outfile, "get substring list failed %d\n", rc);
4322 else
4323 {
4324 for (i = 0; i < count; i++)
4325 {
4326 fprintf(outfile, "%2dL ", i);
4327 PCHARSV(stringlist[i], 0, -1, outfile);
4328 putc('\n', outfile);
4329 }
4330 if (stringlist[i] != NULL)
4331 fprintf(outfile, "string list not terminated by NULL\n");
4332 PCRE_FREE_SUBSTRING_LIST(stringlist);
4333 }
4334 }
4335 }
4336
4337 /* There was a partial match */
4338
4339 else if (count == PCRE_ERROR_PARTIAL)
4340 {
4341 if (markptr == NULL) fprintf(outfile, "Partial match");
4342 else
4343 {
4344 fprintf(outfile, "Partial match, mark=");
4345 PCHARSV(markptr, 0, -1, outfile);
4346 }
4347 if (use_size_offsets > 1)
4348 {
4349 fprintf(outfile, ": ");
4350 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4351 outfile);
4352 }
4353 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4354 fprintf(outfile, "\n");
4355 break; /* Out of the /g loop */
4356 }
4357
4358 /* Failed to match. If this is a /g or /G loop and we previously set
4359 g_notempty after a null match, this is not necessarily the end. We want
4360 to advance the start offset, and continue. We won't be at the end of the
4361 string - that was checked before setting g_notempty.
4362
4363 Complication arises in the case when the newline convention is "any",
4364 "crlf", or "anycrlf". If the previous match was at the end of a line
4365 terminated by CRLF, an advance of one character just passes the \r,
4366 whereas we should prefer the longer newline sequence, as does the code in
4367 pcre_exec(). Fudge the offset value to achieve this. We check for a
4368 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4369 find the default.
4370
4371 Otherwise, in the case of UTF-8 matching, the advance must be one
4372 character, not one byte. */
4373
4374 else
4375 {
4376 if (g_notempty != 0)
4377 {
4378 int onechar = 1;
4379 unsigned int obits = ((REAL_PCRE *)re)->options;
4380 use_offsets[0] = start_offset;
4381 if ((obits & PCRE_NEWLINE_BITS) == 0)
4382 {
4383 int d;
4384 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4385 /* Note that these values are always the ASCII ones, even in
4386 EBCDIC environments. CR = 13, NL = 10. */
4387 obits = (d == 13)? PCRE_NEWLINE_CR :
4388 (d == 10)? PCRE_NEWLINE_LF :
4389 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4390 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4391 (d == -1)? PCRE_NEWLINE_ANY : 0;
4392 }
4393 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4394 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4395 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4396 &&
4397 start_offset < len - 1 &&
4398 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4399 (use_pcre16?
4400 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4401 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4402 :
4403 bptr[start_offset] == '\r'
4404 && bptr[start_offset + 1] == '\n')
4405 #elif defined SUPPORT_PCRE16
4406 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4407 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4408 #else
4409 bptr[start_offset] == '\r'
4410 && bptr[start_offset + 1] == '\n'
4411 #endif
4412 )
4413 onechar++;
4414 else if (use_utf)
4415 {
4416 while (start_offset + onechar < len)
4417 {
4418 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4419 onechar++;
4420 }
4421 }
4422 use_offsets[1] = start_offset + onechar;
4423 }
4424 else
4425 {
4426 switch(count)
4427 {
4428 case PCRE_ERROR_NOMATCH:
4429 if (gmatched == 0)
4430 {
4431 if (markptr == NULL)
4432 {
4433 fprintf(outfile, "No match");
4434 }
4435 else
4436 {
4437 fprintf(outfile, "No match, mark = ");
4438 PCHARSV(markptr, 0, -1, outfile);
4439 }
4440 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4441 putc('\n', outfile);
4442 }
4443 break;
4444
4445 case PCRE_ERROR_BADUTF8:
4446 case PCRE_ERROR_SHORTUTF8:
4447 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4448 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4449 use_pcre16? "16" : "8");
4450 if (use_size_offsets >= 2)
4451 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4452 use_offsets[1]);
4453 fprintf(outfile, "\n");
4454 break;
4455
4456 case PCRE_ERROR_BADUTF8_OFFSET:
4457 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4458 use_pcre16? "16" : "8");
4459 break;
4460
4461 default:
4462 if (count < 0 &&
4463 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4464 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4465 else
4466 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4467 break;
4468 }
4469
4470 break; /* Out of the /g loop */
4471 }
4472 }
4473
4474 /* If not /g or /G we are done */
4475
4476 if (!do_g && !do_G) break;
4477
4478 /* If we have matched an empty string, first check to see if we are at
4479 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4480 Perl's /g options does. This turns out to be rather cunning. First we set
4481 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4482 same point. If this fails (picked up above) we advance to the next
4483 character. */
4484
4485 g_notempty = 0;
4486
4487 if (use_offsets[0] == use_offsets[1])
4488 {
4489 if (use_offsets[0] == len) break;
4490 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4491 }
4492
4493 /* For /g, update the start offset, leaving the rest alone */
4494
4495 if (do_g) start_offset = use_offsets[1];
4496
4497 /* For /G, update the pointer and length */
4498
4499 else
4500 {
4501 bptr += use_offsets[1] * CHAR_SIZE;
4502 len -= use_offsets[1];
4503 }
4504 } /* End of loop for /g and /G */
4505
4506 NEXT_DATA: continue;
4507 } /* End of loop for data lines */
4508
4509 CONTINUE:
4510
4511 #if !defined NOPOSIX
4512 if (posix || do_posix) regfree(&preg);
4513 #endif
4514
4515 if (re != NULL) new_free(re);
4516 if (extra != NULL)
4517 {
4518 PCRE_FREE_STUDY(extra);
4519 }
4520 if (locale_set)
4521 {
4522 new_free((void *)tables);
4523 setlocale(LC_CTYPE, "C");
4524 locale_set = 0;
4525 }
4526 if (jit_stack != NULL)
4527 {
4528 PCRE_JIT_STACK_FREE(jit_stack);
4529 jit_stack = NULL;
4530 }
4531 }
4532
4533 if (infile == stdin) fprintf(outfile, "\n");
4534
4535 EXIT:
4536
4537 if (infile != NULL && infile != stdin) fclose(infile);
4538 if (outfile != NULL && outfile != stdout) fclose(outfile);
4539
4540 free(buffer);
4541 free(dbuffer);
4542 free(pbuffer);
4543 free(offsets);
4544
4545 #ifdef SUPPORT_PCRE16
4546 if (buffer16 != NULL) free(buffer16);
4547 #endif
4548
4549 #if !defined NODFA
4550 if (dfa_workspace != NULL)
4551 free(dfa_workspace);
4552 #endif
4553
4554 return yield;
4555 }
4556
4557 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5