/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1087 - (show annotations)
Tue Oct 16 15:55:38 2012 UTC (7 years, 1 month ago) by chpe
File MIME type: text/plain
File size: 153891 byte(s)
Error occurred while calculating annotation data.
pcre32: pcretest: Comment fixes
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136
137 #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138 /* Configure internal macros to 32 bit mode. */
139 #define COMPILE_PCRE32
140 #endif
141 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142 /* Configure internal macros to 16 bit mode. */
143 #define COMPILE_PCRE16
144 #endif
145 #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146 /* Configure internal macros to 16 bit mode. */
147 #define COMPILE_PCRE8
148 #endif
149
150 #include "pcre_internal.h"
151
152 /* The pcre_printint() function, which prints the internal form of a compiled
153 regex, is held in a separate file so that (a) it can be compiled in either
154 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
155 when that is compiled in debug mode. */
156
157 #ifdef SUPPORT_PCRE8
158 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159 #endif
160 #ifdef SUPPORT_PCRE16
161 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162 #endif
163 #ifdef SUPPORT_PCRE32
164 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165 #endif
166
167 /* We need access to some of the data tables that PCRE uses. So as not to have
168 to keep two copies, we include the source files here, changing the names of the
169 external symbols to prevent clashes. */
170
171 #define PCRE_INCLUDED
172
173 #include "pcre_tables.c"
174 #include "pcre_ucd.c"
175
176 /* The definition of the macro PRINTABLE, which determines whether to print an
177 output character as-is or as a hex value when showing compiled patterns, is
178 the same as in the printint.src file. We uses it here in cases when the locale
179 has not been explicitly changed, so as to get consistent output from systems
180 that differ in their output from isprint() even in the "C" locale. */
181
182 #ifdef EBCDIC
183 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
184 #else
185 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
186 #endif
187
188 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189
190 /* Posix support is disabled in 16 or 32 bit only mode. */
191 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192 #define NOPOSIX
193 #endif
194
195 /* It is possible to compile this test program without including support for
196 testing the POSIX interface, though this is not available via the standard
197 Makefile. */
198
199 #if !defined NOPOSIX
200 #include "pcreposix.h"
201 #endif
202
203 /* It is also possible, originally for the benefit of a version that was
204 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
205 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
206 automatically cut out the UTF support if PCRE is built without it. */
207
208 #ifndef SUPPORT_UTF
209 #ifndef NOUTF
210 #define NOUTF
211 #endif
212 #endif
213
214 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
215 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216 only from one place and is handled differently). I couldn't dream up any way of
217 using a single macro to do this in a generic way, because of the many different
218 argument requirements. We know that at least one of SUPPORT_PCRE8 and
219 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
220 use these in the definitions of generic macros.
221
222 **** Special note about the PCHARSxxx macros: the address of the string to be
223 printed is always given as two arguments: a base address followed by an offset.
224 The base address is cast to the correct data size for 8 or 16 bit data; the
225 offset is in units of this size. If the string were given as base+offset in one
226 argument, the casting might be incorrectly applied. */
227
228 #ifdef SUPPORT_PCRE8
229
230 #define PCHARS8(lv, p, offset, len, f) \
231 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
232
233 #define PCHARSV8(p, offset, len, f) \
234 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235
236 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237 p = read_capture_name8(p, cn8, re)
238
239 #define STRLEN8(p) ((int)strlen((char *)p))
240
241 #define SET_PCRE_CALLOUT8(callout) \
242 pcre_callout = callout
243
244 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
245 pcre_assign_jit_stack(extra, callback, userdata)
246
247 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
248 re = pcre_compile((char *)pat, options, error, erroffset, tables)
249
250 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251 namesptr, cbuffer, size) \
252 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
253 (char *)namesptr, cbuffer, size)
254
255 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
256 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
257
258 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259 offsets, size_offsets, workspace, size_workspace) \
260 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
261 offsets, size_offsets, workspace, size_workspace)
262
263 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
264 offsets, size_offsets) \
265 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
266 offsets, size_offsets)
267
268 #define PCRE_FREE_STUDY8(extra) \
269 pcre_free_study(extra)
270
271 #define PCRE_FREE_SUBSTRING8(substring) \
272 pcre_free_substring(substring)
273
274 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
275 pcre_free_substring_list(listptr)
276
277 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
278 getnamesptr, subsptr) \
279 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
280 (char *)getnamesptr, subsptr)
281
282 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
283 n = pcre_get_stringnumber(re, (char *)ptr)
284
285 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
286 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
287
288 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
289 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
290
291 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
292 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
293
294 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
295 pcre_printint(re, outfile, debug_lengths)
296
297 #define PCRE_STUDY8(extra, re, options, error) \
298 extra = pcre_study(re, options, error)
299
300 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
301 pcre_jit_stack_alloc(startsize, maxsize)
302
303 #define PCRE_JIT_STACK_FREE8(stack) \
304 pcre_jit_stack_free(stack)
305
306 #endif /* SUPPORT_PCRE8 */
307
308 /* -----------------------------------------------------------*/
309
310 #ifdef SUPPORT_PCRE16
311
312 #define PCHARS16(lv, p, offset, len, f) \
313 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
314
315 #define PCHARSV16(p, offset, len, f) \
316 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
317
318 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319 p = read_capture_name16(p, cn16, re)
320
321 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
322
323 #define SET_PCRE_CALLOUT16(callout) \
324 pcre16_callout = (int (*)(pcre16_callout_block *))callout
325
326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327 pcre16_assign_jit_stack((pcre16_extra *)extra, \
328 (pcre16_jit_callback)callback, userdata)
329
330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332 tables)
333
334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335 namesptr, cbuffer, size) \
336 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338
339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341 (PCRE_UCHAR16 *)cbuffer, size/2)
342
343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344 offsets, size_offsets, workspace, size_workspace) \
345 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347 workspace, size_workspace)
348
349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350 offsets, size_offsets) \
351 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352 len, start_offset, options, offsets, size_offsets)
353
354 #define PCRE_FREE_STUDY16(extra) \
355 pcre16_free_study((pcre16_extra *)extra)
356
357 #define PCRE_FREE_SUBSTRING16(substring) \
358 pcre16_free_substring((PCRE_SPTR16)substring)
359
360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362
363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364 getnamesptr, subsptr) \
365 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367
368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370
371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373 (PCRE_SPTR16 *)(void*)subsptr)
374
375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377 (PCRE_SPTR16 **)(void*)listptr)
378
379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381 tables)
382
383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384 pcre16_printint(re, outfile, debug_lengths)
385
386 #define PCRE_STUDY16(extra, re, options, error) \
387 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388
389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391
392 #define PCRE_JIT_STACK_FREE16(stack) \
393 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394
395 #endif /* SUPPORT_PCRE16 */
396
397 /* -----------------------------------------------------------*/
398
399 #ifdef SUPPORT_PCRE32
400
401 #define PCHARS32(lv, p, offset, len, f) \
402 lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)
403
404 #define PCHARSV32(p, offset, len, f) \
405 (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)
406
407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408 p = read_capture_name32(p, cn32, re)
409
410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411
412 #define SET_PCRE_CALLOUT32(callout) \
413 pcre32_callout = (int (*)(pcre32_callout_block *))callout
414
415 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416 pcre32_assign_jit_stack((pcre32_extra *)extra, \
417 (pcre32_jit_callback)callback, userdata)
418
419 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421 tables)
422
423 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424 namesptr, cbuffer, size) \
425 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427
428 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430 (PCRE_UCHAR32 *)cbuffer, size/2)
431
432 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433 offsets, size_offsets, workspace, size_workspace) \
434 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436 workspace, size_workspace)
437
438 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439 offsets, size_offsets) \
440 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441 len, start_offset, options, offsets, size_offsets)
442
443 #define PCRE_FREE_STUDY32(extra) \
444 pcre32_free_study((pcre32_extra *)extra)
445
446 #define PCRE_FREE_SUBSTRING32(substring) \
447 pcre32_free_substring((PCRE_SPTR32)substring)
448
449 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
451
452 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453 getnamesptr, subsptr) \
454 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456
457 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
459
460 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462 (PCRE_SPTR32 *)(void*)subsptr)
463
464 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466 (PCRE_SPTR32 **)(void*)listptr)
467
468 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470 tables)
471
472 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473 pcre32_printint(re, outfile, debug_lengths)
474
475 #define PCRE_STUDY32(extra, re, options, error) \
476 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477
478 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480
481 #define PCRE_JIT_STACK_FREE32(stack) \
482 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483
484 #endif /* SUPPORT_PCRE32 */
485
486
487 /* ----- Both modes are supported; a runtime test is needed, except for
488 pcre_config(), and the JIT stack functions, when it doesn't matter which
489 version is called. ----- */
490
491 enum {
492 PCRE8_MODE,
493 PCRE16_MODE,
494 PCRE32_MODE
495 };
496
497 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2
498
499 #define CHAR_SIZE (1 << pcre_mode)
500
501 #define PCHARS(lv, p, offset, len, f) \
502 if (pcre_mode == PCRE32_MODE) \
503 PCHARS32(lv, p, offset, len, f); \
504 else if (pcre_mode == PCRE16_MODE) \
505 PCHARS16(lv, p, offset, len, f); \
506 else \
507 PCHARS8(lv, p, offset, len, f)
508
509 #define PCHARSV(p, offset, len, f) \
510 if (pcre_mode == PCRE32_MODE) \
511 PCHARSV32(p, offset, len, f); \
512 else if (pcre_mode == PCRE16_MODE) \
513 PCHARSV16(p, offset, len, f); \
514 else \
515 PCHARSV8(p, offset, len, f)
516
517 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
518 if (pcre_mode == PCRE32_MODE) \
519 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
520 else if (pcre_mode == PCRE16_MODE) \
521 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
522 else \
523 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
524
525 #define SET_PCRE_CALLOUT(callout) \
526 if (pcre_mode == PCRE32_MODE) \
527 SET_PCRE_CALLOUT32(callout); \
528 else if (pcre_mode == PCRE16_MODE) \
529 SET_PCRE_CALLOUT16(callout); \
530 else \
531 SET_PCRE_CALLOUT8(callout)
532
533 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
534
535 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
536 if (pcre_mode == PCRE32_MODE) \
537 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
538 else if (pcre_mode == PCRE16_MODE) \
539 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
540 else \
541 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
542
543 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
544 if (pcre_mode == PCRE32_MODE) \
545 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
546 else if (pcre_mode == PCRE16_MODE) \
547 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
548 else \
549 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
550
551 #define PCRE_CONFIG pcre_config
552
553 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
554 namesptr, cbuffer, size) \
555 if (pcre_mode == PCRE32_MODE) \
556 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
557 namesptr, cbuffer, size); \
558 else if (pcre_mode == PCRE16_MODE) \
559 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
560 namesptr, cbuffer, size); \
561 else \
562 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
563 namesptr, cbuffer, size)
564
565 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
566 if (pcre_mode == PCRE32_MODE) \
567 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
568 else if (pcre_mode == PCRE16_MODE) \
569 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
570 else \
571 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
572
573 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
574 offsets, size_offsets, workspace, size_workspace) \
575 if (pcre_mode == PCRE32_MODE) \
576 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
577 offsets, size_offsets, workspace, size_workspace); \
578 else if (pcre_mode == PCRE16_MODE) \
579 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
580 offsets, size_offsets, workspace, size_workspace); \
581 else \
582 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets, workspace, size_workspace)
584
585 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets) \
587 if (pcre_mode == PCRE32_MODE) \
588 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets); \
590 else if (pcre_mode == PCRE16_MODE) \
591 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets); \
593 else \
594 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
595 offsets, size_offsets)
596
597 #define PCRE_FREE_STUDY(extra) \
598 if (pcre_mode == PCRE32_MODE) \
599 PCRE_FREE_STUDY32(extra); \
600 else if (pcre_mode == PCRE16_MODE) \
601 PCRE_FREE_STUDY16(extra); \
602 else \
603 PCRE_FREE_STUDY8(extra)
604
605 #define PCRE_FREE_SUBSTRING(substring) \
606 if (pcre_mode == PCRE32_MODE) \
607 PCRE_FREE_SUBSTRING32(substring); \
608 else if (pcre_mode == PCRE16_MODE) \
609 PCRE_FREE_SUBSTRING16(substring); \
610 else \
611 PCRE_FREE_SUBSTRING8(substring)
612
613 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
614 if (pcre_mode == PCRE32_MODE) \
615 PCRE_FREE_SUBSTRING_LIST32(listptr); \
616 else if (pcre_mode == PCRE16_MODE) \
617 PCRE_FREE_SUBSTRING_LIST16(listptr); \
618 else \
619 PCRE_FREE_SUBSTRING_LIST8(listptr)
620
621 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
622 getnamesptr, subsptr) \
623 if (pcre_mode == PCRE32_MODE) \
624 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
625 getnamesptr, subsptr); \
626 else if (pcre_mode == PCRE16_MODE) \
627 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
628 getnamesptr, subsptr); \
629 else \
630 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
631 getnamesptr, subsptr)
632
633 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
634 if (pcre_mode == PCRE32_MODE) \
635 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
636 else if (pcre_mode == PCRE16_MODE) \
637 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
638 else \
639 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
640
641 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
642 if (pcre_mode == PCRE32_MODE) \
643 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
644 else if (pcre_mode == PCRE16_MODE) \
645 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
646 else \
647 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
648
649 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
650 if (pcre_mode == PCRE32_MODE) \
651 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
652 else if (pcre_mode == PCRE16_MODE) \
653 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
654 else \
655 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
656
657 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
658 (pcre_mode == PCRE32_MODE ? \
659 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
660 : pcre_mode == PCRE16_MODE ? \
661 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
662 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
663
664 #define PCRE_JIT_STACK_FREE(stack) \
665 if (pcre_mode == PCRE32_MODE) \
666 PCRE_JIT_STACK_FREE32(stack); \
667 else if (pcre_mode == PCRE16_MODE) \
668 PCRE_JIT_STACK_FREE16(stack); \
669 else \
670 PCRE_JIT_STACK_FREE8(stack)
671
672 #define PCRE_MAKETABLES \
673 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
674
675 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
676 if (pcre_mode == PCRE32_MODE) \
677 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
678 else if (pcre_mode == PCRE16_MODE) \
679 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
680 else \
681 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
682
683 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
684 if (pcre_mode == PCRE32_MODE) \
685 PCRE_PRINTINT32(re, outfile, debug_lengths); \
686 else if (pcre_mode == PCRE16_MODE) \
687 PCRE_PRINTINT16(re, outfile, debug_lengths); \
688 else \
689 PCRE_PRINTINT8(re, outfile, debug_lengths)
690
691 #define PCRE_STUDY(extra, re, options, error) \
692 if (pcre_mode == PCRE32_MODE) \
693 PCRE_STUDY32(extra, re, options, error); \
694 else if (pcre_mode == PCRE16_MODE) \
695 PCRE_STUDY16(extra, re, options, error); \
696 else \
697 PCRE_STUDY8(extra, re, options, error)
698
699 /* ----- Only 8-bit mode is supported ----- */
700
701 #elif defined SUPPORT_PCRE8
702 #define CHAR_SIZE 1
703 #define PCHARS PCHARS8
704 #define PCHARSV PCHARSV8
705 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
706 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
707 #define STRLEN STRLEN8
708 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
709 #define PCRE_COMPILE PCRE_COMPILE8
710 #define PCRE_CONFIG pcre_config
711 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
712 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
713 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
714 #define PCRE_EXEC PCRE_EXEC8
715 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
716 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
717 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
718 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
719 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
720 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
721 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
722 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
723 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
724 #define PCRE_MAKETABLES pcre_maketables()
725 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
726 #define PCRE_PRINTINT PCRE_PRINTINT8
727 #define PCRE_STUDY PCRE_STUDY8
728
729 /* ----- Only 16-bit mode is supported ----- */
730
731 #elif defined SUPPORT_PCRE16
732 #define CHAR_SIZE 2
733 #define PCHARS PCHARS16
734 #define PCHARSV PCHARSV16
735 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
736 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
737 #define STRLEN STRLEN16
738 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
739 #define PCRE_COMPILE PCRE_COMPILE16
740 #define PCRE_CONFIG pcre16_config
741 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
742 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
743 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
744 #define PCRE_EXEC PCRE_EXEC16
745 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
746 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
747 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
748 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
749 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
750 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
751 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
752 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
753 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
754 #define PCRE_MAKETABLES pcre16_maketables()
755 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
756 #define PCRE_PRINTINT PCRE_PRINTINT16
757 #define PCRE_STUDY PCRE_STUDY16
758
759 /* ----- Only 32-bit mode is supported ----- */
760
761 #elif defined SUPPORT_PCRE32
762 #define CHAR_SIZE 4
763 #define PCHARS PCHARS32
764 #define PCHARSV PCHARSV32
765 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
766 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
767 #define STRLEN STRLEN32
768 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
769 #define PCRE_COMPILE PCRE_COMPILE32
770 #define PCRE_CONFIG pcre32_config
771 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
772 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
773 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
774 #define PCRE_EXEC PCRE_EXEC32
775 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
776 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
777 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
778 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
779 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
780 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
781 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
782 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
783 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
784 #define PCRE_MAKETABLES pcre32_maketables()
785 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
786 #define PCRE_PRINTINT PCRE_PRINTINT32
787 #define PCRE_STUDY PCRE_STUDY32
788
789 #endif
790
791 /* ----- End of mode-specific function call macros ----- */
792
793
794 /* Other parameters */
795
796 #ifndef CLOCKS_PER_SEC
797 #ifdef CLK_TCK
798 #define CLOCKS_PER_SEC CLK_TCK
799 #else
800 #define CLOCKS_PER_SEC 100
801 #endif
802 #endif
803
804 #if !defined NODFA
805 #define DFA_WS_DIMENSION 1000
806 #endif
807
808 /* This is the default loop count for timing. */
809
810 #define LOOPREPEAT 500000
811
812 /* Static variables */
813
814 static FILE *outfile;
815 static int log_store = 0;
816 static int callout_count;
817 static int callout_extra;
818 static int callout_fail_count;
819 static int callout_fail_id;
820 static int debug_lengths;
821 static int first_callout;
822 static int jit_was_used;
823 static int locale_set = 0;
824 static int show_malloc;
825 static int use_utf;
826 static size_t gotten_store;
827 static size_t first_gotten_store = 0;
828 static const unsigned char *last_callout_mark = NULL;
829
830 /* The buffers grow automatically if very long input lines are encountered. */
831
832 static int buffer_size = 50000;
833 static pcre_uint8 *buffer = NULL;
834 static pcre_uint8 *dbuffer = NULL;
835 static pcre_uint8 *pbuffer = NULL;
836
837 /* Another buffer is needed translation to 16/32-bit character strings. It will
838 obtained and extended as required. */
839
840 #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
841
842 /* We need the table of operator lengths that is used for 16/32-bit compiling, in
843 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
844 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
845 appropriately for the 16/32-bit world. Just as a safety check, make sure that
846 COMPILE_PCRE[16|32] is *not* set. */
847
848 #ifdef COMPILE_PCRE16
849 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
850 #endif
851
852 #ifdef COMPILE_PCRE32
853 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
854 #endif
855
856 #if LINK_SIZE == 2
857 #undef LINK_SIZE
858 #define LINK_SIZE 1
859 #elif LINK_SIZE == 3 || LINK_SIZE == 4
860 #undef LINK_SIZE
861 #define LINK_SIZE 2
862 #else
863 #error LINK_SIZE must be either 2, 3, or 4
864 #endif
865
866 #undef IMM2_SIZE
867 #define IMM2_SIZE 1
868
869 #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
870
871 #ifdef SUPPORT_PCRE16
872 static int buffer16_size = 0;
873 static pcre_uint16 *buffer16 = NULL;
874 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
875 #endif /* SUPPORT_PCRE16 */
876
877 #ifdef SUPPORT_PCRE32
878 static int buffer32_size = 0;
879 static pcre_uint32 *buffer32 = NULL;
880 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
881 #endif /* SUPPORT_PCRE32 */
882
883 /* If we have 8-bit support, default to it; if there is also
884 16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
885 there must be 16-or 32-bit support, so default it to 1. */
886
887 #if defined SUPPORT_PCRE8
888 static int pcre_mode = PCRE8_MODE;
889 #elif defined SUPPORT_PCRE16
890 static int pcre_mode = PCRE16_MODE;
891 #elif defined SUPPORT_PCRE32
892 static int pcre_mode = PCRE32_MODE;
893 #endif
894
895 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
896
897 static int jit_study_bits[] =
898 {
899 PCRE_STUDY_JIT_COMPILE,
900 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
901 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
902 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
903 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
904 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
905 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
906 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
907 };
908
909 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
910 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
911
912 /* Textual explanations for runtime error codes */
913
914 static const char *errtexts[] = {
915 NULL, /* 0 is no error */
916 NULL, /* NOMATCH is handled specially */
917 "NULL argument passed",
918 "bad option value",
919 "magic number missing",
920 "unknown opcode - pattern overwritten?",
921 "no more memory",
922 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
923 "match limit exceeded",
924 "callout error code",
925 NULL, /* BADUTF8/16 is handled specially */
926 NULL, /* BADUTF8/16 offset is handled specially */
927 NULL, /* PARTIAL is handled specially */
928 "not used - internal error",
929 "internal error - pattern overwritten?",
930 "bad count value",
931 "item unsupported for DFA matching",
932 "backreference condition or recursion test not supported for DFA matching",
933 "match limit not supported for DFA matching",
934 "workspace size exceeded in DFA matching",
935 "too much recursion for DFA matching",
936 "recursion limit exceeded",
937 "not used - internal error",
938 "invalid combination of newline options",
939 "bad offset value",
940 NULL, /* SHORTUTF8/16 is handled specially */
941 "nested recursion at the same subject position",
942 "JIT stack limit reached",
943 "pattern compiled in wrong mode: 8-bit/16-bit error",
944 "pattern compiled with other endianness",
945 "invalid data in workspace for DFA restart"
946 };
947
948
949 /*************************************************
950 * Alternate character tables *
951 *************************************************/
952
953 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
954 using the default tables of the library. However, the T option can be used to
955 select alternate sets of tables, for different kinds of testing. Note also that
956 the L (locale) option also adjusts the tables. */
957
958 /* This is the set of tables distributed as default with PCRE. It recognizes
959 only ASCII characters. */
960
961 static const pcre_uint8 tables0[] = {
962
963 /* This table is a lower casing table. */
964
965 0, 1, 2, 3, 4, 5, 6, 7,
966 8, 9, 10, 11, 12, 13, 14, 15,
967 16, 17, 18, 19, 20, 21, 22, 23,
968 24, 25, 26, 27, 28, 29, 30, 31,
969 32, 33, 34, 35, 36, 37, 38, 39,
970 40, 41, 42, 43, 44, 45, 46, 47,
971 48, 49, 50, 51, 52, 53, 54, 55,
972 56, 57, 58, 59, 60, 61, 62, 63,
973 64, 97, 98, 99,100,101,102,103,
974 104,105,106,107,108,109,110,111,
975 112,113,114,115,116,117,118,119,
976 120,121,122, 91, 92, 93, 94, 95,
977 96, 97, 98, 99,100,101,102,103,
978 104,105,106,107,108,109,110,111,
979 112,113,114,115,116,117,118,119,
980 120,121,122,123,124,125,126,127,
981 128,129,130,131,132,133,134,135,
982 136,137,138,139,140,141,142,143,
983 144,145,146,147,148,149,150,151,
984 152,153,154,155,156,157,158,159,
985 160,161,162,163,164,165,166,167,
986 168,169,170,171,172,173,174,175,
987 176,177,178,179,180,181,182,183,
988 184,185,186,187,188,189,190,191,
989 192,193,194,195,196,197,198,199,
990 200,201,202,203,204,205,206,207,
991 208,209,210,211,212,213,214,215,
992 216,217,218,219,220,221,222,223,
993 224,225,226,227,228,229,230,231,
994 232,233,234,235,236,237,238,239,
995 240,241,242,243,244,245,246,247,
996 248,249,250,251,252,253,254,255,
997
998 /* This table is a case flipping table. */
999
1000 0, 1, 2, 3, 4, 5, 6, 7,
1001 8, 9, 10, 11, 12, 13, 14, 15,
1002 16, 17, 18, 19, 20, 21, 22, 23,
1003 24, 25, 26, 27, 28, 29, 30, 31,
1004 32, 33, 34, 35, 36, 37, 38, 39,
1005 40, 41, 42, 43, 44, 45, 46, 47,
1006 48, 49, 50, 51, 52, 53, 54, 55,
1007 56, 57, 58, 59, 60, 61, 62, 63,
1008 64, 97, 98, 99,100,101,102,103,
1009 104,105,106,107,108,109,110,111,
1010 112,113,114,115,116,117,118,119,
1011 120,121,122, 91, 92, 93, 94, 95,
1012 96, 65, 66, 67, 68, 69, 70, 71,
1013 72, 73, 74, 75, 76, 77, 78, 79,
1014 80, 81, 82, 83, 84, 85, 86, 87,
1015 88, 89, 90,123,124,125,126,127,
1016 128,129,130,131,132,133,134,135,
1017 136,137,138,139,140,141,142,143,
1018 144,145,146,147,148,149,150,151,
1019 152,153,154,155,156,157,158,159,
1020 160,161,162,163,164,165,166,167,
1021 168,169,170,171,172,173,174,175,
1022 176,177,178,179,180,181,182,183,
1023 184,185,186,187,188,189,190,191,
1024 192,193,194,195,196,197,198,199,
1025 200,201,202,203,204,205,206,207,
1026 208,209,210,211,212,213,214,215,
1027 216,217,218,219,220,221,222,223,
1028 224,225,226,227,228,229,230,231,
1029 232,233,234,235,236,237,238,239,
1030 240,241,242,243,244,245,246,247,
1031 248,249,250,251,252,253,254,255,
1032
1033 /* This table contains bit maps for various character classes. Each map is 32
1034 bytes long and the bits run from the least significant end of each byte. The
1035 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1036 graph, print, punct, and cntrl. Other classes are built from combinations. */
1037
1038 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1039 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1040 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1041 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1042
1043 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1044 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1045 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1046 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1047
1048 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1049 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1050 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1051 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1052
1053 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1054 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1055 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1056 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1057
1058 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1059 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1060 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1061 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1062
1063 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1064 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1065 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1066 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1067
1068 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1069 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1070 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1071 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1072
1073 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1074 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1075 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1076 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1077
1078 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1079 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1080 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1081 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1082
1083 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1084 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1085 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1086 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1087
1088 /* This table identifies various classes of character by individual bits:
1089 0x01 white space character
1090 0x02 letter
1091 0x04 decimal digit
1092 0x08 hexadecimal digit
1093 0x10 alphanumeric or '_'
1094 0x80 regular expression metacharacter or binary zero
1095 */
1096
1097 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1098 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1099 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1100 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1101 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1102 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1103 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1104 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1105 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1106 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1107 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1108 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1109 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1110 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1111 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1112 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1113 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1114 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1115 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1116 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1117 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1118 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1119 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1120 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1121 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1122 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1123 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1124 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1125 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1126 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1127 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1128 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1129
1130 /* This is a set of tables that came orginally from a Windows user. It seems to
1131 be at least an approximation of ISO 8859. In particular, there are characters
1132 greater than 128 that are marked as spaces, letters, etc. */
1133
1134 static const pcre_uint8 tables1[] = {
1135 0,1,2,3,4,5,6,7,
1136 8,9,10,11,12,13,14,15,
1137 16,17,18,19,20,21,22,23,
1138 24,25,26,27,28,29,30,31,
1139 32,33,34,35,36,37,38,39,
1140 40,41,42,43,44,45,46,47,
1141 48,49,50,51,52,53,54,55,
1142 56,57,58,59,60,61,62,63,
1143 64,97,98,99,100,101,102,103,
1144 104,105,106,107,108,109,110,111,
1145 112,113,114,115,116,117,118,119,
1146 120,121,122,91,92,93,94,95,
1147 96,97,98,99,100,101,102,103,
1148 104,105,106,107,108,109,110,111,
1149 112,113,114,115,116,117,118,119,
1150 120,121,122,123,124,125,126,127,
1151 128,129,130,131,132,133,134,135,
1152 136,137,138,139,140,141,142,143,
1153 144,145,146,147,148,149,150,151,
1154 152,153,154,155,156,157,158,159,
1155 160,161,162,163,164,165,166,167,
1156 168,169,170,171,172,173,174,175,
1157 176,177,178,179,180,181,182,183,
1158 184,185,186,187,188,189,190,191,
1159 224,225,226,227,228,229,230,231,
1160 232,233,234,235,236,237,238,239,
1161 240,241,242,243,244,245,246,215,
1162 248,249,250,251,252,253,254,223,
1163 224,225,226,227,228,229,230,231,
1164 232,233,234,235,236,237,238,239,
1165 240,241,242,243,244,245,246,247,
1166 248,249,250,251,252,253,254,255,
1167 0,1,2,3,4,5,6,7,
1168 8,9,10,11,12,13,14,15,
1169 16,17,18,19,20,21,22,23,
1170 24,25,26,27,28,29,30,31,
1171 32,33,34,35,36,37,38,39,
1172 40,41,42,43,44,45,46,47,
1173 48,49,50,51,52,53,54,55,
1174 56,57,58,59,60,61,62,63,
1175 64,97,98,99,100,101,102,103,
1176 104,105,106,107,108,109,110,111,
1177 112,113,114,115,116,117,118,119,
1178 120,121,122,91,92,93,94,95,
1179 96,65,66,67,68,69,70,71,
1180 72,73,74,75,76,77,78,79,
1181 80,81,82,83,84,85,86,87,
1182 88,89,90,123,124,125,126,127,
1183 128,129,130,131,132,133,134,135,
1184 136,137,138,139,140,141,142,143,
1185 144,145,146,147,148,149,150,151,
1186 152,153,154,155,156,157,158,159,
1187 160,161,162,163,164,165,166,167,
1188 168,169,170,171,172,173,174,175,
1189 176,177,178,179,180,181,182,183,
1190 184,185,186,187,188,189,190,191,
1191 224,225,226,227,228,229,230,231,
1192 232,233,234,235,236,237,238,239,
1193 240,241,242,243,244,245,246,215,
1194 248,249,250,251,252,253,254,223,
1195 192,193,194,195,196,197,198,199,
1196 200,201,202,203,204,205,206,207,
1197 208,209,210,211,212,213,214,247,
1198 216,217,218,219,220,221,222,255,
1199 0,62,0,0,1,0,0,0,
1200 0,0,0,0,0,0,0,0,
1201 32,0,0,0,1,0,0,0,
1202 0,0,0,0,0,0,0,0,
1203 0,0,0,0,0,0,255,3,
1204 126,0,0,0,126,0,0,0,
1205 0,0,0,0,0,0,0,0,
1206 0,0,0,0,0,0,0,0,
1207 0,0,0,0,0,0,255,3,
1208 0,0,0,0,0,0,0,0,
1209 0,0,0,0,0,0,12,2,
1210 0,0,0,0,0,0,0,0,
1211 0,0,0,0,0,0,0,0,
1212 254,255,255,7,0,0,0,0,
1213 0,0,0,0,0,0,0,0,
1214 255,255,127,127,0,0,0,0,
1215 0,0,0,0,0,0,0,0,
1216 0,0,0,0,254,255,255,7,
1217 0,0,0,0,0,4,32,4,
1218 0,0,0,128,255,255,127,255,
1219 0,0,0,0,0,0,255,3,
1220 254,255,255,135,254,255,255,7,
1221 0,0,0,0,0,4,44,6,
1222 255,255,127,255,255,255,127,255,
1223 0,0,0,0,254,255,255,255,
1224 255,255,255,255,255,255,255,127,
1225 0,0,0,0,254,255,255,255,
1226 255,255,255,255,255,255,255,255,
1227 0,2,0,0,255,255,255,255,
1228 255,255,255,255,255,255,255,127,
1229 0,0,0,0,255,255,255,255,
1230 255,255,255,255,255,255,255,255,
1231 0,0,0,0,254,255,0,252,
1232 1,0,0,248,1,0,0,120,
1233 0,0,0,0,254,255,255,255,
1234 0,0,128,0,0,0,128,0,
1235 255,255,255,255,0,0,0,0,
1236 0,0,0,0,0,0,0,128,
1237 255,255,255,255,0,0,0,0,
1238 0,0,0,0,0,0,0,0,
1239 128,0,0,0,0,0,0,0,
1240 0,1,1,0,1,1,0,0,
1241 0,0,0,0,0,0,0,0,
1242 0,0,0,0,0,0,0,0,
1243 1,0,0,0,128,0,0,0,
1244 128,128,128,128,0,0,128,0,
1245 28,28,28,28,28,28,28,28,
1246 28,28,0,0,0,0,0,128,
1247 0,26,26,26,26,26,26,18,
1248 18,18,18,18,18,18,18,18,
1249 18,18,18,18,18,18,18,18,
1250 18,18,18,128,128,0,128,16,
1251 0,26,26,26,26,26,26,18,
1252 18,18,18,18,18,18,18,18,
1253 18,18,18,18,18,18,18,18,
1254 18,18,18,128,128,0,0,0,
1255 0,0,0,0,0,1,0,0,
1256 0,0,0,0,0,0,0,0,
1257 0,0,0,0,0,0,0,0,
1258 0,0,0,0,0,0,0,0,
1259 1,0,0,0,0,0,0,0,
1260 0,0,18,0,0,0,0,0,
1261 0,0,20,20,0,18,0,0,
1262 0,20,18,0,0,0,0,0,
1263 18,18,18,18,18,18,18,18,
1264 18,18,18,18,18,18,18,18,
1265 18,18,18,18,18,18,18,0,
1266 18,18,18,18,18,18,18,18,
1267 18,18,18,18,18,18,18,18,
1268 18,18,18,18,18,18,18,18,
1269 18,18,18,18,18,18,18,0,
1270 18,18,18,18,18,18,18,18
1271 };
1272
1273
1274
1275
1276 #ifndef HAVE_STRERROR
1277 /*************************************************
1278 * Provide strerror() for non-ANSI libraries *
1279 *************************************************/
1280
1281 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1282 in their libraries, but can provide the same facility by this simple
1283 alternative function. */
1284
1285 extern int sys_nerr;
1286 extern char *sys_errlist[];
1287
1288 char *
1289 strerror(int n)
1290 {
1291 if (n < 0 || n >= sys_nerr) return "unknown error number";
1292 return sys_errlist[n];
1293 }
1294 #endif /* HAVE_STRERROR */
1295
1296
1297
1298 /*************************************************
1299 * Print newline configuration *
1300 *************************************************/
1301
1302 /*
1303 Arguments:
1304 rc the return code from PCRE_CONFIG_NEWLINE
1305 isc TRUE if called from "-C newline"
1306 Returns: nothing
1307 */
1308
1309 static void
1310 print_newline_config(int rc, BOOL isc)
1311 {
1312 const char *s = NULL;
1313 if (!isc) printf(" Newline sequence is ");
1314 switch(rc)
1315 {
1316 case CHAR_CR: s = "CR"; break;
1317 case CHAR_LF: s = "LF"; break;
1318 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1319 case -1: s = "ANY"; break;
1320 case -2: s = "ANYCRLF"; break;
1321
1322 default:
1323 printf("a non-standard value: 0x%04x\n", rc);
1324 return;
1325 }
1326
1327 printf("%s\n", s);
1328 }
1329
1330
1331
1332 /*************************************************
1333 * JIT memory callback *
1334 *************************************************/
1335
1336 static pcre_jit_stack* jit_callback(void *arg)
1337 {
1338 jit_was_used = TRUE;
1339 return (pcre_jit_stack *)arg;
1340 }
1341
1342
1343 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1344 /*************************************************
1345 * Convert UTF-8 string to value *
1346 *************************************************/
1347
1348 /* This function takes one or more bytes that represents a UTF-8 character,
1349 and returns the value of the character.
1350
1351 Argument:
1352 utf8bytes a pointer to the byte vector
1353 vptr a pointer to an int to receive the value
1354
1355 Returns: > 0 => the number of bytes consumed
1356 -6 to 0 => malformed UTF-8 character at offset = (-return)
1357 */
1358
1359 static int
1360 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1361 {
1362 pcre_uint32 c = *utf8bytes++;
1363 pcre_uint32 d = c;
1364 int i, j, s;
1365
1366 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1367 {
1368 if ((d & 0x80) == 0) break;
1369 d <<= 1;
1370 }
1371
1372 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1373 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1374
1375 /* i now has a value in the range 1-5 */
1376
1377 s = 6*i;
1378 d = (c & utf8_table3[i]) << s;
1379
1380 for (j = 0; j < i; j++)
1381 {
1382 c = *utf8bytes++;
1383 if ((c & 0xc0) != 0x80) return -(j+1);
1384 s -= 6;
1385 d |= (c & 0x3f) << s;
1386 }
1387
1388 /* Check that encoding was the correct unique one */
1389
1390 for (j = 0; j < utf8_table1_size; j++)
1391 if (d <= utf8_table1[j]) break;
1392 if (j != i) return -(i+1);
1393
1394 /* Valid value */
1395
1396 *vptr = d;
1397 return i+1;
1398 }
1399 #endif /* NOUTF || SUPPORT_PCRE16 */
1400
1401
1402
1403 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1404 /*************************************************
1405 * Convert character value to UTF-8 *
1406 *************************************************/
1407
1408 /* This function takes an integer value in the range 0 - 0x7fffffff
1409 and encodes it as a UTF-8 character in 0 to 6 bytes.
1410
1411 Arguments:
1412 cvalue the character value
1413 utf8bytes pointer to buffer for result - at least 6 bytes long
1414
1415 Returns: number of characters placed in the buffer
1416 */
1417
1418 static int
1419 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1420 {
1421 register int i, j;
1422 if (cvalue > 0x7fffffffu)
1423 return -1;
1424 for (i = 0; i < utf8_table1_size; i++)
1425 if (cvalue <= utf8_table1[i]) break;
1426 utf8bytes += i;
1427 for (j = i; j > 0; j--)
1428 {
1429 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1430 cvalue >>= 6;
1431 }
1432 *utf8bytes = utf8_table2[i] | cvalue;
1433 return i + 1;
1434 }
1435 #endif
1436
1437
1438 #ifdef SUPPORT_PCRE16
1439 /*************************************************
1440 * Convert a string to 16-bit *
1441 *************************************************/
1442
1443 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1444 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1445 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1446 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1447 result is always left in buffer16.
1448
1449 Note that this function does not object to surrogate values. This is
1450 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1451 for the purpose of testing that they are correctly faulted.
1452
1453 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1454 in UTF-8 so that values greater than 255 can be handled.
1455
1456 Arguments:
1457 data TRUE if converting a data line; FALSE for a regex
1458 p points to a byte string
1459 utf true if UTF-8 (to be converted to UTF-16)
1460 len number of bytes in the string (excluding trailing zero)
1461
1462 Returns: number of 16-bit data items used (excluding trailing zero)
1463 OR -1 if a UTF-8 string is malformed
1464 OR -2 if a value > 0x10ffff is encountered
1465 OR -3 if a value > 0xffff is encountered when not in UTF mode
1466 */
1467
1468 static int
1469 to16(int data, pcre_uint8 *p, int utf, int len)
1470 {
1471 pcre_uint16 *pp;
1472
1473 if (buffer16_size < 2*len + 2)
1474 {
1475 if (buffer16 != NULL) free(buffer16);
1476 buffer16_size = 2*len + 2;
1477 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1478 if (buffer16 == NULL)
1479 {
1480 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1481 exit(1);
1482 }
1483 }
1484
1485 pp = buffer16;
1486
1487 if (!utf && !data)
1488 {
1489 while (len-- > 0) *pp++ = *p++;
1490 }
1491
1492 else
1493 {
1494 pcre_uint32 c = 0;
1495 while (len > 0)
1496 {
1497 int chlen = utf82ord(p, &c);
1498 if (chlen <= 0) return -1;
1499 if (c > 0x10ffff) return -2;
1500 p += chlen;
1501 len -= chlen;
1502 if (c < 0x10000) *pp++ = c; else
1503 {
1504 if (!utf) return -3;
1505 c -= 0x10000;
1506 *pp++ = 0xD800 | (c >> 10);
1507 *pp++ = 0xDC00 | (c & 0x3ff);
1508 }
1509 }
1510 }
1511
1512 *pp = 0;
1513 return pp - buffer16;
1514 }
1515 #endif
1516
1517 #ifdef SUPPORT_PCRE32
1518 /*************************************************
1519 * Convert a string to 32-bit *
1520 *************************************************/
1521
1522 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1523 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1524 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1525 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1526 result is always left in buffer32.
1527
1528 Note that this function does not object to surrogate values. This is
1529 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1530 for the purpose of testing that they are correctly faulted.
1531
1532 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1533 in UTF-8 so that values greater than 255 can be handled.
1534
1535 Arguments:
1536 data TRUE if converting a data line; FALSE for a regex
1537 p points to a byte string
1538 utf true if UTF-8 (to be converted to UTF-32)
1539 len number of bytes in the string (excluding trailing zero)
1540
1541 Returns: number of 32-bit data items used (excluding trailing zero)
1542 OR -1 if a UTF-8 string is malformed
1543 OR -2 if a value > 0x10ffff is encountered
1544 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1545 */
1546
1547 static int
1548 to32(int data, pcre_uint8 *p, int utf, int len)
1549 {
1550 pcre_uint32 *pp;
1551
1552 if (buffer32_size < 4*len + 4)
1553 {
1554 if (buffer32 != NULL) free(buffer32);
1555 buffer32_size = 4*len + 4;
1556 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1557 if (buffer32 == NULL)
1558 {
1559 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1560 exit(1);
1561 }
1562 }
1563
1564 pp = buffer32;
1565
1566 if (!utf && !data)
1567 {
1568 while (len-- > 0) *pp++ = *p++;
1569 }
1570
1571 else
1572 {
1573 pcre_uint32 c = 0;
1574 while (len > 0)
1575 {
1576 int chlen = utf82ord(p, &c);
1577 if (chlen <= 0) return -1;
1578 if (utf)
1579 {
1580 if (c > 0x10ffff) return -2;
1581 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1582 }
1583
1584 p += chlen;
1585 len -= chlen;
1586 *pp++ = c;
1587 }
1588 }
1589
1590 *pp = 0;
1591 return pp - buffer32;
1592 }
1593 #endif
1594
1595 /*************************************************
1596 * Read or extend an input line *
1597 *************************************************/
1598
1599 /* Input lines are read into buffer, but both patterns and data lines can be
1600 continued over multiple input lines. In addition, if the buffer fills up, we
1601 want to automatically expand it so as to be able to handle extremely large
1602 lines that are needed for certain stress tests. When the input buffer is
1603 expanded, the other two buffers must also be expanded likewise, and the
1604 contents of pbuffer, which are a copy of the input for callouts, must be
1605 preserved (for when expansion happens for a data line). This is not the most
1606 optimal way of handling this, but hey, this is just a test program!
1607
1608 Arguments:
1609 f the file to read
1610 start where in buffer to start (this *must* be within buffer)
1611 prompt for stdin or readline()
1612
1613 Returns: pointer to the start of new data
1614 could be a copy of start, or could be moved
1615 NULL if no data read and EOF reached
1616 */
1617
1618 static pcre_uint8 *
1619 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1620 {
1621 pcre_uint8 *here = start;
1622
1623 for (;;)
1624 {
1625 size_t rlen = (size_t)(buffer_size - (here - buffer));
1626
1627 if (rlen > 1000)
1628 {
1629 int dlen;
1630
1631 /* If libreadline or libedit support is required, use readline() to read a
1632 line if the input is a terminal. Note that readline() removes the trailing
1633 newline, so we must put it back again, to be compatible with fgets(). */
1634
1635 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1636 if (isatty(fileno(f)))
1637 {
1638 size_t len;
1639 char *s = readline(prompt);
1640 if (s == NULL) return (here == start)? NULL : start;
1641 len = strlen(s);
1642 if (len > 0) add_history(s);
1643 if (len > rlen - 1) len = rlen - 1;
1644 memcpy(here, s, len);
1645 here[len] = '\n';
1646 here[len+1] = 0;
1647 free(s);
1648 }
1649 else
1650 #endif
1651
1652 /* Read the next line by normal means, prompting if the file is stdin. */
1653
1654 {
1655 if (f == stdin) printf("%s", prompt);
1656 if (fgets((char *)here, rlen, f) == NULL)
1657 return (here == start)? NULL : start;
1658 }
1659
1660 dlen = (int)strlen((char *)here);
1661 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1662 here += dlen;
1663 }
1664
1665 else
1666 {
1667 int new_buffer_size = 2*buffer_size;
1668 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1669 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1670 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1671
1672 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1673 {
1674 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1675 exit(1);
1676 }
1677
1678 memcpy(new_buffer, buffer, buffer_size);
1679 memcpy(new_pbuffer, pbuffer, buffer_size);
1680
1681 buffer_size = new_buffer_size;
1682
1683 start = new_buffer + (start - buffer);
1684 here = new_buffer + (here - buffer);
1685
1686 free(buffer);
1687 free(dbuffer);
1688 free(pbuffer);
1689
1690 buffer = new_buffer;
1691 dbuffer = new_dbuffer;
1692 pbuffer = new_pbuffer;
1693 }
1694 }
1695
1696 return NULL; /* Control never gets here */
1697 }
1698
1699
1700
1701 /*************************************************
1702 * Read number from string *
1703 *************************************************/
1704
1705 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1706 around with conditional compilation, just do the job by hand. It is only used
1707 for unpicking arguments, so just keep it simple.
1708
1709 Arguments:
1710 str string to be converted
1711 endptr where to put the end pointer
1712
1713 Returns: the unsigned long
1714 */
1715
1716 static int
1717 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1718 {
1719 int result = 0;
1720 while(*str != 0 && isspace(*str)) str++;
1721 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1722 *endptr = str;
1723 return(result);
1724 }
1725
1726
1727
1728 /*************************************************
1729 * Print one character *
1730 *************************************************/
1731
1732 /* Print a single character either literally, or as a hex escape. */
1733
1734 static int pchar(pcre_uint32 c, FILE *f)
1735 {
1736 int n;
1737 if (PRINTOK(c))
1738 {
1739 if (f != NULL) fprintf(f, "%c", c);
1740 return 1;
1741 }
1742
1743 if (c < 0x100)
1744 {
1745 if (use_utf)
1746 {
1747 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1748 return 6;
1749 }
1750 else
1751 {
1752 if (f != NULL) fprintf(f, "\\x%02x", c);
1753 return 4;
1754 }
1755 }
1756
1757 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1758 return n >= 0 ? n : 0;
1759 }
1760
1761
1762
1763 #ifdef SUPPORT_PCRE8
1764 /*************************************************
1765 * Print 8-bit character string *
1766 *************************************************/
1767
1768 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1769 If handed a NULL file, just counts chars without printing. */
1770
1771 static int pchars(pcre_uint8 *p, int length, FILE *f)
1772 {
1773 pcre_uint32 c = 0;
1774 int yield = 0;
1775
1776 if (length < 0)
1777 length = strlen((char *)p);
1778
1779 while (length-- > 0)
1780 {
1781 #if !defined NOUTF
1782 if (use_utf)
1783 {
1784 int rc = utf82ord(p, &c);
1785 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1786 {
1787 length -= rc - 1;
1788 p += rc;
1789 yield += pchar(c, f);
1790 continue;
1791 }
1792 }
1793 #endif
1794 c = *p++;
1795 yield += pchar(c, f);
1796 }
1797
1798 return yield;
1799 }
1800 #endif
1801
1802
1803
1804 #ifdef SUPPORT_PCRE16
1805 /*************************************************
1806 * Find length of 0-terminated 16-bit string *
1807 *************************************************/
1808
1809 static int strlen16(PCRE_SPTR16 p)
1810 {
1811 int len = 0;
1812 while (*p++ != 0) len++;
1813 return len;
1814 }
1815 #endif /* SUPPORT_PCRE16 */
1816
1817
1818
1819 #ifdef SUPPORT_PCRE32
1820 /*************************************************
1821 * Find length of 0-terminated 32-bit string *
1822 *************************************************/
1823
1824 static int strlen32(PCRE_SPTR32 p)
1825 {
1826 int len = 0;
1827 while (*p++ != 0) len++;
1828 return len;
1829 }
1830 #endif /* SUPPORT_PCRE32 */
1831
1832
1833
1834 #ifdef SUPPORT_PCRE16
1835 /*************************************************
1836 * Print 16-bit character string *
1837 *************************************************/
1838
1839 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1840 If handed a NULL file, just counts chars without printing. */
1841
1842 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1843 {
1844 int yield = 0;
1845
1846 if (length < 0)
1847 length = strlen16(p);
1848
1849 while (length-- > 0)
1850 {
1851 pcre_uint32 c = *p++ & 0xffff;
1852 #if !defined NOUTF
1853 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1854 {
1855 int d = *p & 0xffff;
1856 if (d >= 0xDC00 && d < 0xDFFF)
1857 {
1858 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1859 length--;
1860 p++;
1861 }
1862 }
1863 #endif
1864 yield += pchar(c, f);
1865 }
1866
1867 return yield;
1868 }
1869 #endif /* SUPPORT_PCRE16 */
1870
1871
1872
1873 #ifdef SUPPORT_PCRE32
1874 /*************************************************
1875 * Print 32-bit character string *
1876 *************************************************/
1877
1878 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
1879 If handed a NULL file, just counts chars without printing. */
1880
1881 static int pchars32(PCRE_SPTR32 p, int length, FILE *f)
1882 {
1883 int yield = 0;
1884
1885 if (length < 0)
1886 length = strlen32(p);
1887
1888 while (length-- > 0)
1889 {
1890 pcre_uint32 c = *p++;
1891 yield += pchar(c, f);
1892 }
1893
1894 return yield;
1895 }
1896 #endif /* SUPPORT_PCRE32 */
1897
1898
1899
1900 #ifdef SUPPORT_PCRE8
1901 /*************************************************
1902 * Read a capture name (8-bit) and check it *
1903 *************************************************/
1904
1905 static pcre_uint8 *
1906 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1907 {
1908 pcre_uint8 *npp = *pp;
1909 while (isalnum(*p)) *npp++ = *p++;
1910 *npp++ = 0;
1911 *npp = 0;
1912 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1913 {
1914 fprintf(outfile, "no parentheses with name \"");
1915 PCHARSV(*pp, 0, -1, outfile);
1916 fprintf(outfile, "\"\n");
1917 }
1918
1919 *pp = npp;
1920 return p;
1921 }
1922 #endif /* SUPPORT_PCRE8 */
1923
1924
1925
1926 #ifdef SUPPORT_PCRE16
1927 /*************************************************
1928 * Read a capture name (16-bit) and check it *
1929 *************************************************/
1930
1931 /* Note that the text being read is 8-bit. */
1932
1933 static pcre_uint8 *
1934 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1935 {
1936 pcre_uint16 *npp = *pp;
1937 while (isalnum(*p)) *npp++ = *p++;
1938 *npp++ = 0;
1939 *npp = 0;
1940 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1941 {
1942 fprintf(outfile, "no parentheses with name \"");
1943 PCHARSV(*pp, 0, -1, outfile);
1944 fprintf(outfile, "\"\n");
1945 }
1946 *pp = npp;
1947 return p;
1948 }
1949 #endif /* SUPPORT_PCRE16 */
1950
1951
1952
1953 #ifdef SUPPORT_PCRE32
1954 /*************************************************
1955 * Read a capture name (32-bit) and check it *
1956 *************************************************/
1957
1958 /* Note that the text being read is 8-bit. */
1959
1960 static pcre_uint8 *
1961 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
1962 {
1963 pcre_uint32 *npp = *pp;
1964 while (isalnum(*p)) *npp++ = *p++;
1965 *npp++ = 0;
1966 *npp = 0;
1967 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
1968 {
1969 fprintf(outfile, "no parentheses with name \"");
1970 PCHARSV(*pp, 0, -1, outfile);
1971 fprintf(outfile, "\"\n");
1972 }
1973 *pp = npp;
1974 return p;
1975 }
1976 #endif /* SUPPORT_PCRE32 */
1977
1978
1979
1980 /*************************************************
1981 * Callout function *
1982 *************************************************/
1983
1984 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1985 the match. Yield zero unless more callouts than the fail count, or the callout
1986 data is not zero. */
1987
1988 static int callout(pcre_callout_block *cb)
1989 {
1990 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1991 int i, pre_start, post_start, subject_length;
1992
1993 if (callout_extra)
1994 {
1995 fprintf(f, "Callout %d: last capture = %d\n",
1996 cb->callout_number, cb->capture_last);
1997
1998 for (i = 0; i < cb->capture_top * 2; i += 2)
1999 {
2000 if (cb->offset_vector[i] < 0)
2001 fprintf(f, "%2d: <unset>\n", i/2);
2002 else
2003 {
2004 fprintf(f, "%2d: ", i/2);
2005 PCHARSV(cb->subject, cb->offset_vector[i],
2006 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2007 fprintf(f, "\n");
2008 }
2009 }
2010 }
2011
2012 /* Re-print the subject in canonical form, the first time or if giving full
2013 datails. On subsequent calls in the same match, we use pchars just to find the
2014 printed lengths of the substrings. */
2015
2016 if (f != NULL) fprintf(f, "--->");
2017
2018 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2019 PCHARS(post_start, cb->subject, cb->start_match,
2020 cb->current_position - cb->start_match, f);
2021
2022 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2023
2024 PCHARSV(cb->subject, cb->current_position,
2025 cb->subject_length - cb->current_position, f);
2026
2027 if (f != NULL) fprintf(f, "\n");
2028
2029 /* Always print appropriate indicators, with callout number if not already
2030 shown. For automatic callouts, show the pattern offset. */
2031
2032 if (cb->callout_number == 255)
2033 {
2034 fprintf(outfile, "%+3d ", cb->pattern_position);
2035 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2036 }
2037 else
2038 {
2039 if (callout_extra) fprintf(outfile, " ");
2040 else fprintf(outfile, "%3d ", cb->callout_number);
2041 }
2042
2043 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2044 fprintf(outfile, "^");
2045
2046 if (post_start > 0)
2047 {
2048 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2049 fprintf(outfile, "^");
2050 }
2051
2052 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2053 fprintf(outfile, " ");
2054
2055 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2056 pbuffer + cb->pattern_position);
2057
2058 fprintf(outfile, "\n");
2059 first_callout = 0;
2060
2061 if (cb->mark != last_callout_mark)
2062 {
2063 if (cb->mark == NULL)
2064 fprintf(outfile, "Latest Mark: <unset>\n");
2065 else
2066 {
2067 fprintf(outfile, "Latest Mark: ");
2068 PCHARSV(cb->mark, 0, -1, outfile);
2069 putc('\n', outfile);
2070 }
2071 last_callout_mark = cb->mark;
2072 }
2073
2074 if (cb->callout_data != NULL)
2075 {
2076 int callout_data = *((int *)(cb->callout_data));
2077 if (callout_data != 0)
2078 {
2079 fprintf(outfile, "Callout data = %d\n", callout_data);
2080 return callout_data;
2081 }
2082 }
2083
2084 return (cb->callout_number != callout_fail_id)? 0 :
2085 (++callout_count >= callout_fail_count)? 1 : 0;
2086 }
2087
2088
2089 /*************************************************
2090 * Local malloc functions *
2091 *************************************************/
2092
2093 /* Alternative malloc function, to test functionality and save the size of a
2094 compiled re, which is the first store request that pcre_compile() makes. The
2095 show_malloc variable is set only during matching. */
2096
2097 static void *new_malloc(size_t size)
2098 {
2099 void *block = malloc(size);
2100 gotten_store = size;
2101 if (first_gotten_store == 0) first_gotten_store = size;
2102 if (show_malloc)
2103 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2104 return block;
2105 }
2106
2107 static void new_free(void *block)
2108 {
2109 if (show_malloc)
2110 fprintf(outfile, "free %p\n", block);
2111 free(block);
2112 }
2113
2114 /* For recursion malloc/free, to test stacking calls */
2115
2116 static void *stack_malloc(size_t size)
2117 {
2118 void *block = malloc(size);
2119 if (show_malloc)
2120 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2121 return block;
2122 }
2123
2124 static void stack_free(void *block)
2125 {
2126 if (show_malloc)
2127 fprintf(outfile, "stack_free %p\n", block);
2128 free(block);
2129 }
2130
2131
2132 /*************************************************
2133 * Call pcre_fullinfo() *
2134 *************************************************/
2135
2136 /* Get one piece of information from the pcre_fullinfo() function. When only
2137 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2138 value, but the code is defensive.
2139
2140 Arguments:
2141 re compiled regex
2142 study study data
2143 option PCRE_INFO_xxx option
2144 ptr where to put the data
2145
2146 Returns: 0 when OK, < 0 on error
2147 */
2148
2149 static int
2150 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2151 {
2152 int rc;
2153
2154 if (pcre_mode == PCRE32_MODE)
2155 #ifdef SUPPORT_PCRE32
2156 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2157 #else
2158 rc = PCRE_ERROR_BADMODE;
2159 #endif
2160 else if (pcre_mode == PCRE16_MODE)
2161 #ifdef SUPPORT_PCRE16
2162 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2163 #else
2164 rc = PCRE_ERROR_BADMODE;
2165 #endif
2166 else
2167 #ifdef SUPPORT_PCRE8
2168 rc = pcre_fullinfo(re, study, option, ptr);
2169 #else
2170 rc = PCRE_ERROR_BADMODE;
2171 #endif
2172
2173 if (rc < 0)
2174 {
2175 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2176 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2177 if (rc == PCRE_ERROR_BADMODE)
2178 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2179 "%d-bit mode\n", 8 * CHAR_SIZE,
2180 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2181 }
2182
2183 return rc;
2184 }
2185
2186
2187
2188 /*************************************************
2189 * Swap byte functions *
2190 *************************************************/
2191
2192 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2193 value, respectively.
2194
2195 Arguments:
2196 value any number
2197
2198 Returns: the byte swapped value
2199 */
2200
2201 static pcre_uint32
2202 swap_uint32(pcre_uint32 value)
2203 {
2204 return ((value & 0x000000ff) << 24) |
2205 ((value & 0x0000ff00) << 8) |
2206 ((value & 0x00ff0000) >> 8) |
2207 (value >> 24);
2208 }
2209
2210 static pcre_uint16
2211 swap_uint16(pcre_uint16 value)
2212 {
2213 return (value >> 8) | (value << 8);
2214 }
2215
2216
2217
2218 /*************************************************
2219 * Flip bytes in a compiled pattern *
2220 *************************************************/
2221
2222 /* This function is called if the 'F' option was present on a pattern that is
2223 to be written to a file. We flip the bytes of all the integer fields in the
2224 regex data block and the study block. In 16-bit mode this also flips relevant
2225 bytes in the pattern itself. This is to make it possible to test PCRE's
2226 ability to reload byte-flipped patterns, e.g. those compiled on a different
2227 architecture. */
2228
2229 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2230 static void
2231 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2232 {
2233 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2234 #ifdef SUPPORT_PCRE16
2235 int op;
2236 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2237 int length = re->name_count * re->name_entry_size;
2238 #ifdef SUPPORT_UTF
2239 BOOL utf = (re->options & PCRE_UTF16) != 0;
2240 BOOL utf16_char = FALSE;
2241 #endif /* SUPPORT_UTF */
2242 #endif /* SUPPORT_PCRE16 */
2243
2244 /* Always flip the bytes in the main data block and study blocks. */
2245
2246 re->magic_number = REVERSED_MAGIC_NUMBER;
2247 re->size = swap_uint32(re->size);
2248 re->options = swap_uint32(re->options);
2249 re->flags = swap_uint16(re->flags);
2250 re->top_bracket = swap_uint16(re->top_bracket);
2251 re->top_backref = swap_uint16(re->top_backref);
2252 re->first_char = swap_uint16(re->first_char);
2253 re->req_char = swap_uint16(re->req_char);
2254 re->name_table_offset = swap_uint16(re->name_table_offset);
2255 re->name_entry_size = swap_uint16(re->name_entry_size);
2256 re->name_count = swap_uint16(re->name_count);
2257
2258 if (extra != NULL)
2259 {
2260 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2261 rsd->size = swap_uint32(rsd->size);
2262 rsd->flags = swap_uint32(rsd->flags);
2263 rsd->minlength = swap_uint32(rsd->minlength);
2264 }
2265
2266 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2267 in the name table, if present, and then in the pattern itself. */
2268
2269 #ifdef SUPPORT_PCRE16
2270 if (pcre_mode != PCRE16_MODE) return;
2271
2272 while(TRUE)
2273 {
2274 /* Swap previous characters. */
2275 while (length-- > 0)
2276 {
2277 *ptr = swap_uint16(*ptr);
2278 ptr++;
2279 }
2280 #ifdef SUPPORT_UTF
2281 if (utf16_char)
2282 {
2283 if ((ptr[-1] & 0xfc00) == 0xd800)
2284 {
2285 /* We know that there is only one extra character in UTF-16. */
2286 *ptr = swap_uint16(*ptr);
2287 ptr++;
2288 }
2289 }
2290 utf16_char = FALSE;
2291 #endif /* SUPPORT_UTF */
2292
2293 /* Get next opcode. */
2294
2295 length = 0;
2296 op = *ptr;
2297 *ptr++ = swap_uint16(op);
2298
2299 switch (op)
2300 {
2301 case OP_END:
2302 return;
2303
2304 #ifdef SUPPORT_UTF
2305 case OP_CHAR:
2306 case OP_CHARI:
2307 case OP_NOT:
2308 case OP_NOTI:
2309 case OP_STAR:
2310 case OP_MINSTAR:
2311 case OP_PLUS:
2312 case OP_MINPLUS:
2313 case OP_QUERY:
2314 case OP_MINQUERY:
2315 case OP_UPTO:
2316 case OP_MINUPTO:
2317 case OP_EXACT:
2318 case OP_POSSTAR:
2319 case OP_POSPLUS:
2320 case OP_POSQUERY:
2321 case OP_POSUPTO:
2322 case OP_STARI:
2323 case OP_MINSTARI:
2324 case OP_PLUSI:
2325 case OP_MINPLUSI:
2326 case OP_QUERYI:
2327 case OP_MINQUERYI:
2328 case OP_UPTOI:
2329 case OP_MINUPTOI:
2330 case OP_EXACTI:
2331 case OP_POSSTARI:
2332 case OP_POSPLUSI:
2333 case OP_POSQUERYI:
2334 case OP_POSUPTOI:
2335 case OP_NOTSTAR:
2336 case OP_NOTMINSTAR:
2337 case OP_NOTPLUS:
2338 case OP_NOTMINPLUS:
2339 case OP_NOTQUERY:
2340 case OP_NOTMINQUERY:
2341 case OP_NOTUPTO:
2342 case OP_NOTMINUPTO:
2343 case OP_NOTEXACT:
2344 case OP_NOTPOSSTAR:
2345 case OP_NOTPOSPLUS:
2346 case OP_NOTPOSQUERY:
2347 case OP_NOTPOSUPTO:
2348 case OP_NOTSTARI:
2349 case OP_NOTMINSTARI:
2350 case OP_NOTPLUSI:
2351 case OP_NOTMINPLUSI:
2352 case OP_NOTQUERYI:
2353 case OP_NOTMINQUERYI:
2354 case OP_NOTUPTOI:
2355 case OP_NOTMINUPTOI:
2356 case OP_NOTEXACTI:
2357 case OP_NOTPOSSTARI:
2358 case OP_NOTPOSPLUSI:
2359 case OP_NOTPOSQUERYI:
2360 case OP_NOTPOSUPTOI:
2361 if (utf) utf16_char = TRUE;
2362 #endif
2363 /* Fall through. */
2364
2365 default:
2366 length = OP_lengths16[op] - 1;
2367 break;
2368
2369 case OP_CLASS:
2370 case OP_NCLASS:
2371 /* Skip the character bit map. */
2372 ptr += 32/sizeof(pcre_uint16);
2373 length = 0;
2374 break;
2375
2376 case OP_XCLASS:
2377 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2378 if (LINK_SIZE > 1)
2379 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2380 - (1 + LINK_SIZE + 1));
2381 else
2382 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2383
2384 /* Reverse the size of the XCLASS instance. */
2385 *ptr = swap_uint16(*ptr);
2386 ptr++;
2387 if (LINK_SIZE > 1)
2388 {
2389 *ptr = swap_uint16(*ptr);
2390 ptr++;
2391 }
2392
2393 op = *ptr;
2394 *ptr = swap_uint16(op);
2395 ptr++;
2396 if ((op & XCL_MAP) != 0)
2397 {
2398 /* Skip the character bit map. */
2399 ptr += 32/sizeof(pcre_uint16);
2400 length -= 32/sizeof(pcre_uint16);
2401 }
2402 break;
2403 }
2404 }
2405 /* Control should never reach here in 16 bit mode. */
2406 #endif /* SUPPORT_PCRE16 */
2407 }
2408 #endif /* SUPPORT_PCRE[8|16] */
2409
2410
2411
2412 #if defined SUPPORT_PCRE32
2413 static void
2414 regexflip_32(pcre *ere, pcre_extra *extra)
2415 {
2416 real_pcre32 *re = (real_pcre32 *)ere;
2417 int op;
2418 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2419 int length = re->name_count * re->name_entry_size;
2420 #ifdef SUPPORT_UTF
2421 BOOL utf = (re->options & PCRE_UTF32) != 0;
2422 #endif /* SUPPORT_UTF */
2423
2424 /* Always flip the bytes in the main data block and study blocks. */
2425
2426 re->magic_number = REVERSED_MAGIC_NUMBER;
2427 re->size = swap_uint32(re->size);
2428 re->options = swap_uint32(re->options);
2429 re->flags = swap_uint16(re->flags);
2430 re->top_bracket = swap_uint16(re->top_bracket);
2431 re->top_backref = swap_uint16(re->top_backref);
2432 re->first_char = swap_uint32(re->first_char);
2433 re->req_char = swap_uint32(re->req_char);
2434 re->name_table_offset = swap_uint16(re->name_table_offset);
2435 re->name_entry_size = swap_uint16(re->name_entry_size);
2436 re->name_count = swap_uint16(re->name_count);
2437
2438 if (extra != NULL)
2439 {
2440 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2441 rsd->size = swap_uint32(rsd->size);
2442 rsd->flags = swap_uint32(rsd->flags);
2443 rsd->minlength = swap_uint32(rsd->minlength);
2444 }
2445
2446 /* In 32-bit mode we must swap bytes
2447 in the name table, if present, and then in the pattern itself. */
2448
2449 while(TRUE)
2450 {
2451 /* Swap previous characters. */
2452 while (length-- > 0)
2453 {
2454 *ptr = swap_uint32(*ptr);
2455 ptr++;
2456 }
2457
2458 /* Get next opcode. */
2459
2460 length = 0;
2461 op = *ptr;
2462 *ptr++ = swap_uint32(op);
2463
2464 switch (op)
2465 {
2466 case OP_END:
2467 return;
2468
2469 default:
2470 length = OP_lengths32[op] - 1;
2471 break;
2472
2473 case OP_CLASS:
2474 case OP_NCLASS:
2475 /* Skip the character bit map. */
2476 ptr += 32/sizeof(pcre_uint32);
2477 length = 0;
2478 break;
2479
2480 case OP_XCLASS:
2481 /* LINK_SIZE can only be 1 in 32-bit mode. */
2482 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2483
2484 /* Reverse the size of the XCLASS instance. */
2485 *ptr = swap_uint32(*ptr);
2486 ptr++;
2487
2488 op = *ptr;
2489 *ptr = swap_uint32(op);
2490 ptr++;
2491 if ((op & XCL_MAP) != 0)
2492 {
2493 /* Skip the character bit map. */
2494 ptr += 32/sizeof(pcre_uint32);
2495 length -= 32/sizeof(pcre_uint32);
2496 }
2497 break;
2498 }
2499 }
2500 /* Control should never reach here in 32 bit mode. */
2501 }
2502
2503 #endif /* SUPPORT_PCRE32 */
2504
2505
2506
2507 static void
2508 regexflip(pcre *ere, pcre_extra *extra)
2509 {
2510 #if defined SUPPORT_PCRE32
2511 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2512 regexflip_32(ere, extra);
2513 #endif
2514 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2515 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2516 regexflip8_or_16(ere, extra);
2517 #endif
2518 }
2519
2520
2521
2522 /*************************************************
2523 * Check match or recursion limit *
2524 *************************************************/
2525
2526 static int
2527 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2528 int start_offset, int options, int *use_offsets, int use_size_offsets,
2529 int flag, unsigned long int *limit, int errnumber, const char *msg)
2530 {
2531 int count;
2532 int min = 0;
2533 int mid = 64;
2534 int max = -1;
2535
2536 extra->flags |= flag;
2537
2538 for (;;)
2539 {
2540 *limit = mid;
2541
2542 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2543 use_offsets, use_size_offsets);
2544
2545 if (count == errnumber)
2546 {
2547 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2548 min = mid;
2549 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2550 }
2551
2552 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2553 count == PCRE_ERROR_PARTIAL)
2554 {
2555 if (mid == min + 1)
2556 {
2557 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2558 break;
2559 }
2560 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2561 max = mid;
2562 mid = (min + mid)/2;
2563 }
2564 else break; /* Some other error */
2565 }
2566
2567 extra->flags &= ~flag;
2568 return count;
2569 }
2570
2571
2572
2573 /*************************************************
2574 * Case-independent strncmp() function *
2575 *************************************************/
2576
2577 /*
2578 Arguments:
2579 s first string
2580 t second string
2581 n number of characters to compare
2582
2583 Returns: < 0, = 0, or > 0, according to the comparison
2584 */
2585
2586 static int
2587 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2588 {
2589 while (n--)
2590 {
2591 int c = tolower(*s++) - tolower(*t++);
2592 if (c) return c;
2593 }
2594 return 0;
2595 }
2596
2597
2598
2599 /*************************************************
2600 * Check newline indicator *
2601 *************************************************/
2602
2603 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2604 a message and return 0 if there is no match.
2605
2606 Arguments:
2607 p points after the leading '<'
2608 f file for error message
2609
2610 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2611 */
2612
2613 static int
2614 check_newline(pcre_uint8 *p, FILE *f)
2615 {
2616 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2617 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2618 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2619 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2620 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2621 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2622 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2623 fprintf(f, "Unknown newline type at: <%s\n", p);
2624 return 0;
2625 }
2626
2627
2628
2629 /*************************************************
2630 * Usage function *
2631 *************************************************/
2632
2633 static void
2634 usage(void)
2635 {
2636 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2637 printf("Input and output default to stdin and stdout.\n");
2638 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2639 printf("If input is a terminal, readline() is used to read from it.\n");
2640 #else
2641 printf("This version of pcretest is not linked with readline().\n");
2642 #endif
2643 printf("\nOptions:\n");
2644 #ifdef SUPPORT_PCRE16
2645 printf(" -16 use the 16-bit library\n");
2646 #endif
2647 #ifdef SUPPORT_PCRE32
2648 printf(" -32 use the 32-bit library\n");
2649 #endif
2650 printf(" -b show compiled code\n");
2651 printf(" -C show PCRE compile-time options and exit\n");
2652 printf(" -C arg show a specific compile-time option\n");
2653 printf(" and exit with its value. The arg can be:\n");
2654 printf(" linksize internal link size [2, 3, 4]\n");
2655 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2656 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2657 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2658 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2659 printf(" ucp Unicode Properties supported [0, 1]\n");
2660 printf(" jit Just-in-time compiler supported [0, 1]\n");
2661 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2662 printf(" -d debug: show compiled code and information (-b and -i)\n");
2663 #if !defined NODFA
2664 printf(" -dfa force DFA matching for all subjects\n");
2665 #endif
2666 printf(" -help show usage information\n");
2667 printf(" -i show information about compiled patterns\n"
2668 " -M find MATCH_LIMIT minimum for each subject\n"
2669 " -m output memory used information\n"
2670 " -o <n> set size of offsets vector to <n>\n");
2671 #if !defined NOPOSIX
2672 printf(" -p use POSIX interface\n");
2673 #endif
2674 printf(" -q quiet: do not output PCRE version number at start\n");
2675 printf(" -S <n> set stack size to <n> megabytes\n");
2676 printf(" -s force each pattern to be studied at basic level\n"
2677 " -s+ force each pattern to be studied, using JIT if available\n"
2678 " -s++ ditto, verifying when JIT was actually used\n"
2679 " -s+n force each pattern to be studied, using JIT if available,\n"
2680 " where 1 <= n <= 7 selects JIT options\n"
2681 " -s++n ditto, verifying when JIT was actually used\n"
2682 " -t time compilation and execution\n");
2683 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2684 printf(" -tm time execution (matching) only\n");
2685 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2686 }
2687
2688
2689
2690 /*************************************************
2691 * Main Program *
2692 *************************************************/
2693
2694 /* Read lines from named file or stdin and write to named file or stdout; lines
2695 consist of a regular expression, in delimiters and optionally followed by
2696 options, followed by a set of test data, terminated by an empty line. */
2697
2698 int main(int argc, char **argv)
2699 {
2700 FILE *infile = stdin;
2701 const char *version;
2702 int options = 0;
2703 int study_options = 0;
2704 int default_find_match_limit = FALSE;
2705 int op = 1;
2706 int timeit = 0;
2707 int timeitm = 0;
2708 int showinfo = 0;
2709 int showstore = 0;
2710 int force_study = -1;
2711 int force_study_options = 0;
2712 int quiet = 0;
2713 int size_offsets = 45;
2714 int size_offsets_max;
2715 int *offsets = NULL;
2716 int debug = 0;
2717 int done = 0;
2718 int all_use_dfa = 0;
2719 int verify_jit = 0;
2720 int yield = 0;
2721 int stack_size;
2722
2723 #if !defined NOPOSIX
2724 int posix = 0;
2725 #endif
2726 #if !defined NODFA
2727 int *dfa_workspace = NULL;
2728 #endif
2729
2730 pcre_jit_stack *jit_stack = NULL;
2731
2732 /* These vectors store, end-to-end, a list of zero-terminated captured
2733 substring names, each list itself being terminated by an empty name. Assume
2734 that 1024 is plenty long enough for the few names we'll be testing. It is
2735 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2736 for the actual memory, to ensure alignment. */
2737
2738 pcre_uint32 copynames[1024];
2739 pcre_uint32 getnames[1024];
2740
2741 #ifdef SUPPORT_PCRE32
2742 pcre_uint32 *cn32ptr;
2743 pcre_uint32 *gn32ptr;
2744 #endif
2745
2746 #ifdef SUPPORT_PCRE16
2747 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2748 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2749 pcre_uint16 *cn16ptr;
2750 pcre_uint16 *gn16ptr;
2751 #endif
2752
2753 #ifdef SUPPORT_PCRE8
2754 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2755 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2756 pcre_uint8 *cn8ptr;
2757 pcre_uint8 *gn8ptr;
2758 #endif
2759
2760 /* Get buffers from malloc() so that valgrind will check their misuse when
2761 debugging. They grow automatically when very long lines are read. The 16-
2762 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2763
2764 buffer = (pcre_uint8 *)malloc(buffer_size);
2765 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2766 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2767
2768 /* The outfile variable is static so that new_malloc can use it. */
2769
2770 outfile = stdout;
2771
2772 /* The following _setmode() stuff is some Windows magic that tells its runtime
2773 library to translate CRLF into a single LF character. At least, that's what
2774 I've been told: never having used Windows I take this all on trust. Originally
2775 it set 0x8000, but then I was advised that _O_BINARY was better. */
2776
2777 #if defined(_WIN32) || defined(WIN32)
2778 _setmode( _fileno( stdout ), _O_BINARY );
2779 #endif
2780
2781 /* Get the version number: both pcre_version() and pcre16_version() give the
2782 same answer. We just need to ensure that we call one that is available. */
2783
2784 #if defined SUPPORT_PCRE8
2785 version = pcre_version();
2786 #elif defined SUPPORT_PCRE16
2787 version = pcre16_version();
2788 #elif defined SUPPORT_PCRE32
2789 version = pcre32_version();
2790 #endif
2791
2792 /* Scan options */
2793
2794 while (argc > 1 && argv[op][0] == '-')
2795 {
2796 pcre_uint8 *endptr;
2797 char *arg = argv[op];
2798
2799 if (strcmp(arg, "-m") == 0) showstore = 1;
2800 else if (strcmp(arg, "-s") == 0) force_study = 0;
2801
2802 else if (strncmp(arg, "-s+", 3) == 0)
2803 {
2804 arg += 3;
2805 if (*arg == '+') { arg++; verify_jit = TRUE; }
2806 force_study = 1;
2807 if (*arg == 0)
2808 force_study_options = jit_study_bits[6];
2809 else if (*arg >= '1' && *arg <= '7')
2810 force_study_options = jit_study_bits[*arg - '1'];
2811 else goto BAD_ARG;
2812 }
2813 else if (strcmp(arg, "-16") == 0)
2814 {
2815 #ifdef SUPPORT_PCRE16
2816 pcre_mode = PCRE16_MODE;
2817 #else
2818 printf("** This version of PCRE was built without 16-bit support\n");
2819 exit(1);
2820 #endif
2821 }
2822 else if (strcmp(arg, "-32") == 0)
2823 {
2824 #ifdef SUPPORT_PCRE32
2825 pcre_mode = PCRE32_MODE;
2826 #else
2827 printf("** This version of PCRE was built without 32-bit support\n");
2828 exit(1);
2829 #endif
2830 }
2831 else if (strcmp(arg, "-q") == 0) quiet = 1;
2832 else if (strcmp(arg, "-b") == 0) debug = 1;
2833 else if (strcmp(arg, "-i") == 0) showinfo = 1;
2834 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2835 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2836 #if !defined NODFA
2837 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2838 #endif
2839 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2840 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2841 *endptr == 0))
2842 {
2843 op++;
2844 argc--;
2845 }
2846 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2847 {
2848 int both = arg[2] == 0;
2849 int temp;
2850 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2851 *endptr == 0))
2852 {
2853 timeitm = temp;
2854 op++;
2855 argc--;
2856 }
2857 else timeitm = LOOPREPEAT;
2858 if (both) timeit = timeitm;
2859 }
2860 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2861 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2862 *endptr == 0))
2863 {
2864 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2865 printf("PCRE: -S not supported on this OS\n");
2866 exit(1);
2867 #else
2868 int rc;
2869 struct rlimit rlim;
2870 getrlimit(RLIMIT_STACK, &rlim);
2871 rlim.rlim_cur = stack_size * 1024 * 1024;
2872 rc = setrlimit(RLIMIT_STACK, &rlim);
2873 if (rc != 0)
2874 {
2875 printf("PCRE: setrlimit() failed with error %d\n", rc);
2876 exit(1);
2877 }
2878 op++;
2879 argc--;
2880 #endif
2881 }
2882 #if !defined NOPOSIX
2883 else if (strcmp(arg, "-p") == 0) posix = 1;
2884 #endif
2885 else if (strcmp(arg, "-C") == 0)
2886 {
2887 int rc;
2888 unsigned long int lrc;
2889
2890 if (argc > 2)
2891 {
2892 if (strcmp(argv[op + 1], "linksize") == 0)
2893 {
2894 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2895 printf("%d\n", rc);
2896 yield = rc;
2897 }
2898 else if (strcmp(argv[op + 1], "pcre8") == 0)
2899 {
2900 #ifdef SUPPORT_PCRE8
2901 printf("1\n");
2902 yield = 1;
2903 #else
2904 printf("0\n");
2905 yield = 0;
2906 #endif
2907 }
2908 else if (strcmp(argv[op + 1], "pcre16") == 0)
2909 {
2910 #ifdef SUPPORT_PCRE16
2911 printf("1\n");
2912 yield = 1;
2913 #else
2914 printf("0\n");
2915 yield = 0;
2916 #endif
2917 }
2918 else if (strcmp(argv[op + 1], "pcre32") == 0)
2919 {
2920 #ifdef SUPPORT_PCRE32
2921 printf("1\n");
2922 yield = 1;
2923 #else
2924 printf("0\n");
2925 yield = 0;
2926 #endif
2927 goto EXIT;
2928 }
2929 if (strcmp(argv[op + 1], "utf") == 0)
2930 {
2931 #ifdef SUPPORT_PCRE8
2932 if (pcre_mode == PCRE8_MODE)
2933 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2934 #endif
2935 #ifdef SUPPORT_PCRE16
2936 if (pcre_mode == PCRE16_MODE)
2937 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2938 #endif
2939 #ifdef SUPPORT_PCRE32
2940 if (pcre_mode == PCRE32_MODE)
2941 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
2942 #endif
2943 printf("%d\n", rc);
2944 yield = rc;
2945 goto EXIT;
2946 }
2947 else if (strcmp(argv[op + 1], "ucp") == 0)
2948 {
2949 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2950 printf("%d\n", rc);
2951 yield = rc;
2952 }
2953 else if (strcmp(argv[op + 1], "jit") == 0)
2954 {
2955 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2956 printf("%d\n", rc);
2957 yield = rc;
2958 }
2959 else if (strcmp(argv[op + 1], "newline") == 0)
2960 {
2961 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2962 print_newline_config(rc, TRUE);
2963 }
2964 else if (strcmp(argv[op + 1], "ebcdic") == 0)
2965 {
2966 #ifdef EBCDIC
2967 printf("1\n");
2968 yield = 1;
2969 #else
2970 printf("0\n");
2971 #endif
2972 }
2973 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2974 {
2975 #ifdef EBCDIC
2976 printf("0x%02x\n", CHAR_LF);
2977 #else
2978 printf("0\n");
2979 #endif
2980 }
2981 else
2982 {
2983 printf("Unknown -C option: %s\n", argv[op + 1]);
2984 }
2985 goto EXIT;
2986 }
2987
2988 /* No argument for -C: output all configuration information. */
2989
2990 printf("PCRE version %s\n", version);
2991 printf("Compiled with\n");
2992
2993 #ifdef EBCDIC
2994 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2995 #endif
2996
2997 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2998 are set, either both UTFs are supported or both are not supported. */
2999
3000 #ifdef SUPPORT_PCRE8
3001 printf(" 8-bit support\n");
3002 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3003 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3004 #endif
3005 #ifdef SUPPORT_PCRE16
3006 printf(" 16-bit support\n");
3007 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3008 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3009 #endif
3010 #ifdef SUPPORT_PCRE32
3011 printf(" 32-bit support\n");
3012 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3013 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3014 #endif
3015
3016 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3017 printf(" %sUnicode properties support\n", rc? "" : "No ");
3018 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3019 if (rc)
3020 {
3021 const char *arch;
3022 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3023 printf(" Just-in-time compiler support: %s\n", arch);
3024 }
3025 else
3026 printf(" No just-in-time compiler support\n");
3027 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3028 print_newline_config(rc, FALSE);
3029 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3030 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3031 "all Unicode newlines");
3032 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3033 printf(" Internal link size = %d\n", rc);
3034 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3035 printf(" POSIX malloc threshold = %d\n", rc);
3036 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3037 printf(" Default match limit = %ld\n", lrc);
3038 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3039 printf(" Default recursion depth limit = %ld\n", lrc);
3040 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3041 printf(" Match recursion uses %s", rc? "stack" : "heap");
3042 if (showstore)
3043 {
3044 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3045 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3046 }
3047 printf("\n");
3048 goto EXIT;
3049 }
3050 else if (strcmp(arg, "-help") == 0 ||
3051 strcmp(arg, "--help") == 0)
3052 {
3053 usage();
3054 goto EXIT;
3055 }
3056 else
3057 {
3058 BAD_ARG:
3059 printf("** Unknown or malformed option %s\n", arg);
3060 usage();
3061 yield = 1;
3062 goto EXIT;
3063 }
3064 op++;
3065 argc--;
3066 }
3067
3068 /* Get the store for the offsets vector, and remember what it was */
3069
3070 size_offsets_max = size_offsets;
3071 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3072 if (offsets == NULL)
3073 {
3074 printf("** Failed to get %d bytes of memory for offsets vector\n",
3075 (int)(size_offsets_max * sizeof(int)));
3076 yield = 1;
3077 goto EXIT;
3078 }
3079
3080 /* Sort out the input and output files */
3081
3082 if (argc > 1)
3083 {
3084 infile = fopen(argv[op], INPUT_MODE);
3085 if (infile == NULL)
3086 {
3087 printf("** Failed to open %s\n", argv[op]);
3088 yield = 1;
3089 goto EXIT;
3090 }
3091 }
3092
3093 if (argc > 2)
3094 {
3095 outfile = fopen(argv[op+1], OUTPUT_MODE);
3096 if (outfile == NULL)
3097 {
3098 printf("** Failed to open %s\n", argv[op+1]);
3099 yield = 1;
3100 goto EXIT;
3101 }
3102 }
3103
3104 /* Set alternative malloc function */
3105
3106 #ifdef SUPPORT_PCRE8
3107 pcre_malloc = new_malloc;
3108 pcre_free = new_free;
3109 pcre_stack_malloc = stack_malloc;
3110 pcre_stack_free = stack_free;
3111 #endif
3112
3113 #ifdef SUPPORT_PCRE16
3114 pcre16_malloc = new_malloc;
3115 pcre16_free = new_free;
3116 pcre16_stack_malloc = stack_malloc;
3117 pcre16_stack_free = stack_free;
3118 #endif
3119
3120 #ifdef SUPPORT_PCRE32
3121 pcre32_malloc = new_malloc;
3122 pcre32_free = new_free;
3123 pcre32_stack_malloc = stack_malloc;
3124 pcre32_stack_free = stack_free;
3125 #endif
3126
3127 /* Heading line unless quiet, then prompt for first regex if stdin */
3128
3129 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3130
3131 /* Main loop */
3132
3133 while (!done)
3134 {
3135 pcre *re = NULL;
3136 pcre_extra *extra = NULL;
3137
3138 #if !defined NOPOSIX /* There are still compilers that require no indent */
3139 regex_t preg;
3140 int do_posix = 0;
3141 #endif
3142
3143 const char *error;
3144 pcre_uint8 *markptr;
3145 pcre_uint8 *p, *pp, *ppp;
3146 pcre_uint8 *to_file = NULL;
3147 const pcre_uint8 *tables = NULL;
3148 unsigned long int get_options;
3149 unsigned long int true_size, true_study_size = 0;
3150 size_t size, regex_gotten_store;
3151 int do_allcaps = 0;
3152 int do_mark = 0;
3153 int do_study = 0;
3154 int no_force_study = 0;
3155 int do_debug = debug;
3156 int do_G = 0;
3157 int do_g = 0;
3158 int do_showinfo = showinfo;
3159 int do_showrest = 0;
3160 int do_showcaprest = 0;
3161 int do_flip = 0;
3162 int erroroffset, len, delimiter, poffset;
3163
3164 #if !defined NODFA
3165 int dfa_matched = 0;
3166 #endif
3167
3168 use_utf = 0;
3169 debug_lengths = 1;
3170
3171 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3172 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3173 fflush(outfile);
3174
3175 p = buffer;
3176 while (isspace(*p)) p++;
3177 if (*p == 0) continue;
3178
3179 /* See if the pattern is to be loaded pre-compiled from a file. */
3180
3181 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3182 {
3183 pcre_uint32 magic;
3184 pcre_uint8 sbuf[8];
3185 FILE *f;
3186
3187 p++;
3188 if (*p == '!')
3189 {
3190 do_debug = TRUE;
3191 do_showinfo = TRUE;
3192 p++;
3193 }
3194
3195 pp = p + (int)strlen((char *)p);
3196 while (isspace(pp[-1])) pp--;
3197 *pp = 0;
3198
3199 f = fopen((char *)p, "rb");
3200 if (f == NULL)
3201 {
3202 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3203 continue;
3204 }
3205
3206 first_gotten_store = 0;
3207 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3208
3209 true_size =
3210 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3211 true_study_size =
3212 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3213
3214 re = (pcre *)new_malloc(true_size);
3215 if (re == NULL)
3216 {
3217 printf("** Failed to get %d bytes of memory for pcre object\n",
3218 (int)true_size);
3219 yield = 1;
3220 goto EXIT;
3221 }
3222 regex_gotten_store = first_gotten_store;
3223
3224 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3225
3226 magic = REAL_PCRE_MAGIC(re);
3227 if (magic != MAGIC_NUMBER)
3228 {
3229 if (swap_uint32(magic) == MAGIC_NUMBER)
3230 {
3231 do_flip = 1;
3232 }
3233 else
3234 {
3235 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3236 new_free(re);
3237 fclose(f);
3238 continue;
3239 }
3240 }
3241
3242 /* We hide the byte-invert info for little and big endian tests. */
3243 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3244 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3245
3246 /* Now see if there is any following study data. */
3247
3248 if (true_study_size != 0)
3249 {
3250 pcre_study_data *psd;
3251
3252 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3253 extra->flags = PCRE_EXTRA_STUDY_DATA;
3254
3255 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3256 extra->study_data = psd;
3257
3258 if (fread(psd, 1, true_study_size, f) != true_study_size)
3259 {
3260 FAIL_READ:
3261 fprintf(outfile, "Failed to read data from %s\n", p);
3262 if (extra != NULL)
3263 {
3264 PCRE_FREE_STUDY(extra);
3265 }
3266 new_free(re);
3267 fclose(f);
3268 continue;
3269 }
3270 fprintf(outfile, "Study data loaded from %s\n", p);
3271 do_study = 1; /* To get the data output if requested */
3272 }
3273 else fprintf(outfile, "No study data\n");
3274
3275 /* Flip the necessary bytes. */
3276 if (do_flip)
3277 {
3278 int rc;
3279 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3280 if (rc == PCRE_ERROR_BADMODE)
3281 {
3282 /* Simulate the result of the function call below. */
3283 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3284 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3285 PCRE_INFO_OPTIONS);
3286 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3287 "%d-bit mode\n", 8 * CHAR_SIZE,
3288 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3289 new_free(re);
3290 fclose(f);
3291 continue;
3292 }
3293 }
3294
3295 /* Need to know if UTF-8 for printing data strings. */
3296
3297 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3298 {
3299 new_free(re);
3300 fclose(f);
3301 continue;
3302 }
3303 use_utf = (get_options & PCRE_UTF8) != 0;
3304
3305 fclose(f);
3306 goto SHOW_INFO;
3307 }
3308
3309 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3310 the pattern; if it isn't complete, read more. */
3311
3312 delimiter = *p++;
3313
3314 if (isalnum(delimiter) || delimiter == '\\')
3315 {
3316 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3317 goto SKIP_DATA;
3318 }
3319
3320 pp = p;
3321 poffset = (int)(p - buffer);
3322
3323 for(;;)
3324 {
3325 while (*pp != 0)
3326 {
3327 if (*pp == '\\' && pp[1] != 0) pp++;
3328 else if (*pp == delimiter) break;
3329 pp++;
3330 }
3331 if (*pp != 0) break;
3332 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3333 {
3334 fprintf(outfile, "** Unexpected EOF\n");
3335 done = 1;
3336 goto CONTINUE;
3337 }
3338 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3339 }
3340
3341 /* The buffer may have moved while being extended; reset the start of data
3342 pointer to the correct relative point in the buffer. */
3343
3344 p = buffer + poffset;
3345
3346 /* If the first character after the delimiter is backslash, make
3347 the pattern end with backslash. This is purely to provide a way
3348 of testing for the error message when a pattern ends with backslash. */
3349
3350 if (pp[1] == '\\') *pp++ = '\\';
3351
3352 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3353 for callouts. */
3354
3355 *pp++ = 0;
3356 strcpy((char *)pbuffer, (char *)p);
3357
3358 /* Look for options after final delimiter */
3359
3360 options = 0;
3361 study_options = force_study_options;
3362 log_store = showstore; /* default from command line */
3363
3364 while (*pp != 0)
3365 {
3366 switch (*pp++)
3367 {
3368 case 'f': options |= PCRE_FIRSTLINE; break;
3369 case 'g': do_g = 1; break;
3370 case 'i': options |= PCRE_CASELESS; break;
3371 case 'm': options |= PCRE_MULTILINE; break;
3372 case 's': options |= PCRE_DOTALL; break;
3373 case 'x': options |= PCRE_EXTENDED; break;
3374
3375 case '+':
3376 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3377 break;
3378
3379 case '=': do_allcaps = 1; break;
3380 case 'A': options |= PCRE_ANCHORED; break;
3381 case 'B': do_debug = 1; break;
3382 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3383 case 'D': do_debug = do_showinfo = 1; break;
3384 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3385 case 'F': do_flip = 1; break;
3386 case 'G': do_G = 1; break;
3387 case 'I': do_showinfo = 1; break;
3388 case 'J': options |= PCRE_DUPNAMES; break;
3389 case 'K': do_mark = 1; break;
3390 case 'M': log_store = 1; break;
3391 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3392
3393 #if !defined NOPOSIX
3394 case 'P': do_posix = 1; break;
3395 #endif
3396
3397 case 'S':
3398 do_study = 1;
3399 for (;;)
3400 {
3401 switch (*pp++)
3402 {
3403 case 'S':
3404 do_study = 0;
3405 no_force_study = 1;
3406 break;
3407
3408 case '!':
3409 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3410 break;
3411
3412 case '+':
3413 if (*pp == '+')
3414 {
3415 verify_jit = TRUE;
3416 pp++;
3417 }
3418 if (*pp >= '1' && *pp <= '7')
3419 study_options |= jit_study_bits[*pp++ - '1'];
3420 else
3421 study_options |= jit_study_bits[6];
3422 break;
3423
3424 case '-':
3425 study_options &= ~PCRE_STUDY_ALLJIT;
3426 break;
3427
3428 default:
3429 pp--;
3430 goto ENDLOOP;
3431 }
3432 }
3433 ENDLOOP:
3434 break;
3435
3436 case 'U': options |= PCRE_UNGREEDY; break;
3437 case 'W': options |= PCRE_UCP; break;
3438 case 'X': options |= PCRE_EXTRA; break;
3439 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3440 case 'Z': debug_lengths = 0; break;
3441 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3442 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3443
3444 case 'T':
3445 switch (*pp++)
3446 {
3447 case '0': tables = tables0; break;
3448 case '1': tables = tables1; break;
3449
3450 case '\r':
3451 case '\n':
3452 case ' ':
3453 case 0:
3454 fprintf(outfile, "** Missing table number after /T\n");
3455 goto SKIP_DATA;
3456
3457 default:
3458 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3459 goto SKIP_DATA;
3460 }
3461 break;
3462
3463 case 'L':
3464 ppp = pp;
3465 /* The '\r' test here is so that it works on Windows. */
3466 /* The '0' test is just in case this is an unterminated line. */
3467 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3468 *ppp = 0;
3469 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3470 {
3471 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3472 goto SKIP_DATA;
3473 }
3474 locale_set = 1;
3475 tables = PCRE_MAKETABLES;
3476 pp = ppp;
3477 break;
3478
3479 case '>':
3480 to_file = pp;
3481 while (*pp != 0) pp++;
3482 while (isspace(pp[-1])) pp--;
3483 *pp = 0;
3484 break;
3485
3486 case '<':
3487 {
3488 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3489 {
3490 options |= PCRE_JAVASCRIPT_COMPAT;
3491 pp += 3;
3492 }
3493 else
3494 {
3495 int x = check_newline(pp, outfile);
3496 if (x == 0) goto SKIP_DATA;
3497 options |= x;
3498 while (*pp++ != '>');
3499 }
3500 }
3501 break;
3502
3503 case '\r': /* So that it works in Windows */
3504 case '\n':
3505 case ' ':
3506 break;
3507
3508 default:
3509 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3510 goto SKIP_DATA;
3511 }
3512 }
3513
3514 /* Handle compiling via the POSIX interface, which doesn't support the
3515 timing, showing, or debugging options, nor the ability to pass over
3516 local character tables. Neither does it have 16-bit support. */
3517
3518 #if !defined NOPOSIX
3519 if (posix || do_posix)
3520 {
3521 int rc;
3522 int cflags = 0;
3523
3524 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3525 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3526 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3527 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3528 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3529 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3530 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3531
3532 first_gotten_store = 0;
3533 rc = regcomp(&preg, (char *)p, cflags);
3534
3535 /* Compilation failed; go back for another re, skipping to blank line
3536 if non-interactive. */
3537
3538 if (rc != 0)
3539 {
3540 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3541 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3542 goto SKIP_DATA;
3543 }
3544 }
3545
3546 /* Handle compiling via the native interface */
3547
3548 else
3549 #endif /* !defined NOPOSIX */
3550
3551 {
3552 /* In 16- or 32-bit mode, convert the input. */
3553
3554 #ifdef SUPPORT_PCRE16
3555 if (pcre_mode == PCRE16_MODE)
3556 {
3557 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3558 {
3559 case -1:
3560 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3561 "converted to UTF-16\n");
3562 goto SKIP_DATA;
3563
3564 case -2:
3565 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3566 "cannot be converted to UTF-16\n");
3567 goto SKIP_DATA;
3568
3569 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3570 fprintf(outfile, "**Failed: character value greater than 0xffff "
3571 "cannot be converted to 16-bit in non-UTF mode\n");
3572 goto SKIP_DATA;
3573
3574 default:
3575 break;
3576 }
3577 p = (pcre_uint8 *)buffer16;
3578 }
3579 #endif
3580
3581 #ifdef SUPPORT_PCRE32
3582 if (pcre_mode == PCRE32_MODE)
3583 {
3584 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3585 {
3586 case -1:
3587 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3588 "converted to UTF-32\n");
3589 goto SKIP_DATA;
3590
3591 case -2:
3592 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3593 "cannot be converted to UTF-32\n");
3594 goto SKIP_DATA;
3595
3596 case -3:
3597 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3598 goto SKIP_DATA;
3599
3600 default:
3601 break;
3602 }
3603 p = (pcre_uint8 *)buffer32;
3604 }
3605 #endif
3606
3607 /* Compile many times when timing */
3608
3609 if (timeit > 0)
3610 {
3611 register int i;
3612 clock_t time_taken;
3613 clock_t start_time = clock();
3614 for (i = 0; i < timeit; i++)
3615 {
3616 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3617 if (re != NULL) free(re);
3618 }
3619 time_taken = clock() - start_time;
3620 fprintf(outfile, "Compile time %.4f milliseconds\n",
3621 (((double)time_taken * 1000.0) / (double)timeit) /
3622 (double)CLOCKS_PER_SEC);
3623 }
3624
3625 first_gotten_store = 0;
3626 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3627
3628 /* Compilation failed; go back for another re, skipping to blank line
3629 if non-interactive. */
3630
3631 if (re == NULL)
3632 {
3633 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3634 SKIP_DATA:
3635 if (infile != stdin)
3636 {
3637 for (;;)
3638 {
3639 if (extend_inputline(infile, buffer, NULL) == NULL)
3640 {
3641 done = 1;
3642 goto CONTINUE;
3643 }
3644 len = (int)strlen((char *)buffer);
3645 while (len > 0 && isspace(buffer[len-1])) len--;
3646 if (len == 0) break;
3647 }
3648 fprintf(outfile, "\n");
3649 }
3650 goto CONTINUE;
3651 }
3652
3653 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3654 within the regex; check for this so that we know how to process the data
3655 lines. */
3656
3657 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3658 goto SKIP_DATA;
3659 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3660
3661 /* Extract the size for possible writing before possibly flipping it,
3662 and remember the store that was got. */
3663
3664 true_size = REAL_PCRE_SIZE(re);
3665 regex_gotten_store = first_gotten_store;
3666
3667 /* Output code size information if requested */
3668
3669 if (log_store)
3670 {
3671 int name_count, name_entry_size, real_pcre_size;
3672
3673 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3674 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3675 #ifdef SUPPORT_PCRE8
3676 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3677 real_pcre_size = sizeof(real_pcre);
3678 #endif
3679 #ifdef SUPPORT_PCRE16
3680 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3681 real_pcre_size = sizeof(real_pcre16);
3682 #endif
3683 #ifdef SUPPORT_PCRE32
3684 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3685 real_pcre_size = sizeof(real_pcre32);
3686 #endif
3687 fprintf(outfile, "Memory allocation (code space): %d\n",
3688 (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3689 }
3690
3691 /* If -s or /S was present, study the regex to generate additional info to
3692 help with the matching, unless the pattern has the SS option, which
3693 suppresses the effect of /S (used for a few test patterns where studying is
3694 never sensible). */
3695
3696 if (do_study || (force_study >= 0 && !no_force_study))
3697 {
3698 if (timeit > 0)
3699 {
3700 register int i;
3701 clock_t time_taken;
3702 clock_t start_time = clock();
3703 for (i = 0; i < timeit; i++)
3704 {
3705 PCRE_STUDY(extra, re, study_options, &error);
3706 }
3707 time_taken = clock() - start_time;
3708 if (extra != NULL)
3709 {
3710 PCRE_FREE_STUDY(extra);
3711 }
3712 fprintf(outfile, " Study time %.4f milliseconds\n",
3713 (((double)time_taken * 1000.0) / (double)timeit) /
3714 (double)CLOCKS_PER_SEC);
3715 }
3716 PCRE_STUDY(extra, re, study_options, &error);
3717 if (error != NULL)
3718 fprintf(outfile, "Failed to study: %s\n", error);
3719 else if (extra != NULL)
3720 {
3721 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3722 if (log_store)
3723 {
3724 size_t jitsize;
3725 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3726 jitsize != 0)
3727 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3728 }
3729 }
3730 }
3731
3732 /* If /K was present, we set up for handling MARK data. */
3733
3734 if (do_mark)
3735 {
3736 if (extra == NULL)
3737 {
3738 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3739 extra->flags = 0;
3740 }
3741 extra->mark = &markptr;
3742 extra->flags |= PCRE_EXTRA_MARK;
3743 }
3744
3745 /* Extract and display information from the compiled data if required. */
3746
3747 SHOW_INFO:
3748
3749 if (do_debug)
3750 {
3751 fprintf(outfile, "------------------------------------------------------------------\n");
3752 PCRE_PRINTINT(re, outfile, debug_lengths);
3753 }
3754
3755 /* We already have the options in get_options (see above) */
3756
3757 if (do_showinfo)
3758 {
3759 unsigned long int all_options;
3760 pcre_uint32 first_char, need_char;
3761 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
3762 hascrorlf, maxlookbehind;
3763 int nameentrysize, namecount;
3764 const pcre_uint8 *nametable;
3765
3766 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3767 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3768 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3769 new_info(re, NULL, PCRE_INFO_FIRSTLITERAL, &first_char) +
3770 new_info(re, NULL, PCRE_INFO_FIRSTLITERALSET, &first_char_set) +
3771 new_info(re, NULL, PCRE_INFO_LASTLITERAL2, &need_char) +
3772 new_info(re, NULL, PCRE_INFO_LASTLITERAL2SET, &need_char_set) +
3773 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3774 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3775 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3776 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3777 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3778 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3779 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3780 != 0)
3781 goto SKIP_DATA;
3782
3783 if (size != regex_gotten_store) fprintf(outfile,
3784 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3785 (int)size, (int)regex_gotten_store);
3786
3787 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3788 if (backrefmax > 0)
3789 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3790
3791 if (namecount > 0)
3792 {
3793 fprintf(outfile, "Named capturing subpatterns:\n");
3794 while (namecount-- > 0)
3795 {
3796 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
3797 int length = (int)STRLEN(nametable + imm2_size);
3798 fprintf(outfile, " ");
3799 PCHARSV(nametable, imm2_size, length, outfile);
3800 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3801 #ifdef SUPPORT_PCRE32
3802 if (pcre_mode == PCRE32_MODE)
3803 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
3804 #endif
3805 #ifdef SUPPORT_PCRE16
3806 if (pcre_mode == PCRE16_MODE)
3807 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
3808 #endif
3809 #ifdef SUPPORT_PCRE8
3810 if (pcre_mode == PCRE8_MODE)
3811 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
3812 #endif
3813 nametable += nameentrysize * CHAR_SIZE;
3814 }
3815 }
3816
3817 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3818 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3819
3820 all_options = REAL_PCRE_OPTIONS(re);
3821 if (do_flip) all_options = swap_uint32(all_options);
3822
3823 if (get_options == 0) fprintf(outfile, "No options\n");
3824 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3825 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3826 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3827 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3828 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3829 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3830 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3831 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3832 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3833 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3834 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3835 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3836 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3837 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3838 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3839 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3840 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3841 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3842
3843 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3844
3845 switch (get_options & PCRE_NEWLINE_BITS)
3846 {
3847 case PCRE_NEWLINE_CR:
3848 fprintf(outfile, "Forced newline sequence: CR\n");
3849 break;
3850
3851 case PCRE_NEWLINE_LF:
3852 fprintf(outfile, "Forced newline sequence: LF\n");
3853 break;
3854
3855 case PCRE_NEWLINE_CRLF:
3856 fprintf(outfile, "Forced newline sequence: CRLF\n");
3857 break;
3858
3859 case PCRE_NEWLINE_ANYCRLF:
3860 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3861 break;
3862
3863 case PCRE_NEWLINE_ANY:
3864 fprintf(outfile, "Forced newline sequence: ANY\n");
3865 break;
3866
3867 default:
3868 break;
3869 }
3870
3871 if (first_char_set == 2)
3872 {
3873 fprintf(outfile, "First char at start or follows newline\n");
3874 }
3875 else if (first_char_set == 1)
3876 {
3877 const char *caseless =
3878 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
3879 "" : " (caseless)";
3880
3881 if (PRINTOK(first_char))
3882 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3883 else
3884 {
3885 fprintf(outfile, "First char = ");
3886 pchar(first_char, outfile);
3887 fprintf(outfile, "%s\n", caseless);
3888 }
3889 }
3890 else
3891 {
3892 fprintf(outfile, "No first char\n");
3893 }
3894
3895 if (need_char_set == 0)
3896 {
3897 fprintf(outfile, "No need char\n");
3898 }
3899 else
3900 {
3901 const char *caseless =
3902 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
3903 "" : " (caseless)";
3904
3905 if (PRINTOK(need_char))
3906 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3907 else
3908 {
3909 fprintf(outfile, "Need char = ");
3910 pchar(need_char, outfile);
3911 fprintf(outfile, "%s\n", caseless);
3912 }
3913 }
3914
3915 if (maxlookbehind > 0)
3916 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3917
3918 /* Don't output study size; at present it is in any case a fixed
3919 value, but it varies, depending on the computer architecture, and
3920 so messes up the test suite. (And with the /F option, it might be
3921 flipped.) If study was forced by an external -s, don't show this
3922 information unless -i or -d was also present. This means that, except
3923 when auto-callouts are involved, the output from runs with and without
3924 -s should be identical. */
3925
3926 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3927 {
3928 if (extra == NULL)
3929 fprintf(outfile, "Study returned NULL\n");
3930 else
3931 {
3932 pcre_uint8 *start_bits = NULL;
3933 int minlength;
3934
3935 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3936 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3937
3938 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3939 {
3940 if (start_bits == NULL)
3941 fprintf(outfile, "No set of starting bytes\n");
3942 else
3943 {
3944 int i;
3945 int c = 24;
3946 fprintf(outfile, "Starting byte set: ");
3947 for (i = 0; i < 256; i++)
3948 {
3949 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3950 {
3951 if (c > 75)
3952 {
3953 fprintf(outfile, "\n ");
3954 c = 2;
3955 }
3956 if (PRINTOK(i) && i != ' ')
3957 {
3958 fprintf(outfile, "%c ", i);
3959 c += 2;
3960 }
3961 else
3962 {
3963 fprintf(outfile, "\\x%02x ", i);
3964 c += 5;
3965 }
3966 }
3967 }
3968 fprintf(outfile, "\n");
3969 }
3970 }
3971 }
3972
3973 /* Show this only if the JIT was set by /S, not by -s. */
3974
3975 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3976 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3977 {
3978 int jit;
3979 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3980 {
3981 if (jit)
3982 fprintf(outfile, "JIT study was successful\n");
3983 else
3984 #ifdef SUPPORT_JIT
3985 fprintf(outfile, "JIT study was not successful\n");
3986 #else
3987 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3988 #endif
3989 }
3990 }
3991 }
3992 }
3993
3994 /* If the '>' option was present, we write out the regex to a file, and
3995 that is all. The first 8 bytes of the file are the regex length and then
3996 the study length, in big-endian order. */
3997
3998 if (to_file != NULL)
3999 {
4000 FILE *f = fopen((char *)to_file, "wb");
4001 if (f == NULL)
4002 {
4003 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4004 }
4005 else
4006 {
4007 pcre_uint8 sbuf[8];
4008
4009 if (do_flip) regexflip(re, extra);
4010 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4011 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4012 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4013 sbuf[3] = (pcre_uint8)((true_size) & 255);
4014 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4015 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4016 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4017 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4018
4019 if (fwrite(sbuf, 1, 8, f) < 8 ||
4020 fwrite(re, 1, true_size, f) < true_size)
4021 {
4022 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4023 }
4024 else
4025 {
4026 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4027
4028 /* If there is study data, write it. */
4029
4030 if (extra != NULL)
4031 {
4032 if (fwrite(extra->study_data, 1, true_study_size, f) <
4033 true_study_size)
4034 {
4035 fprintf(outfile, "Write error on %s: %s\n", to_file,
4036 strerror(errno));
4037 }
4038 else fprintf(outfile, "Study data written to %s\n", to_file);
4039 }
4040 }
4041 fclose(f);
4042 }
4043
4044 new_free(re);
4045 if (extra != NULL)
4046 {
4047 PCRE_FREE_STUDY(extra);
4048 }
4049 if (locale_set)
4050 {
4051 new_free((void *)tables);
4052 setlocale(LC_CTYPE, "C");
4053 locale_set = 0;
4054 }
4055 continue; /* With next regex */
4056 }
4057 } /* End of non-POSIX compile */
4058
4059 /* Read data lines and test them */
4060
4061 for (;;)
4062 {
4063 pcre_uint8 *q;
4064 pcre_uint8 *bptr;
4065 int *use_offsets = offsets;
4066 int use_size_offsets = size_offsets;
4067 int callout_data = 0;
4068 int callout_data_set = 0;
4069 int count;
4070 pcre_uint32 c;
4071 int copystrings = 0;
4072 int find_match_limit = default_find_match_limit;
4073 int getstrings = 0;
4074 int getlist = 0;
4075 int gmatched = 0;
4076 int start_offset = 0;
4077 int start_offset_sign = 1;
4078 int g_notempty = 0;
4079 int use_dfa = 0;
4080
4081 *copynames = 0;
4082 *getnames = 0;
4083
4084 #ifdef SUPPORT_PCRE32
4085 cn32ptr = copynames;
4086 gn32ptr = getnames;
4087 #endif
4088 #ifdef SUPPORT_PCRE16
4089 cn16ptr = copynames16;
4090 gn16ptr = getnames16;
4091 #endif
4092 #ifdef SUPPORT_PCRE8
4093 cn8ptr = copynames8;
4094 gn8ptr = getnames8;
4095 #endif
4096
4097 SET_PCRE_CALLOUT(callout);
4098 first_callout = 1;
4099 last_callout_mark = NULL;
4100 callout_extra = 0;
4101 callout_count = 0;
4102 callout_fail_count = 999999;
4103 callout_fail_id = -1;
4104 show_malloc = 0;
4105 options = 0;
4106
4107 if (extra != NULL) extra->flags &=
4108 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4109
4110 len = 0;
4111 for (;;)
4112 {
4113 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4114 {
4115 if (len > 0) /* Reached EOF without hitting a newline */
4116 {
4117 fprintf(outfile, "\n");
4118 break;
4119 }
4120 done = 1;
4121 goto CONTINUE;
4122 }
4123 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4124 len = (int)strlen((char *)buffer);
4125 if (buffer[len-1] == '\n') break;
4126 }
4127
4128 while (len > 0 && isspace(buffer[len-1])) len--;
4129 buffer[len] = 0;
4130 if (len == 0) break;
4131
4132 p = buffer;
4133 while (isspace(*p)) p++;
4134
4135 bptr = q = dbuffer;
4136 while ((c = *p++) != 0)
4137 {
4138 int i = 0;
4139 int n = 0;
4140
4141 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4142 In non-UTF mode, allow the value of the byte to fall through to later,
4143 where values greater than 127 are turned into UTF-8 when running in
4144 16-bit or 32-bit mode. */
4145
4146 if (c != '\\')
4147 {
4148 if (use_utf)
4149 {
4150 *q++ = c;
4151 continue;
4152 }
4153 }
4154
4155 /* Handle backslash escapes */
4156
4157 else switch ((c = *p++))
4158 {
4159 case 'a': c = 7; break;
4160 case 'b': c = '\b'; break;
4161 case 'e': c = 27; break;
4162 case 'f': c = '\f'; break;
4163 case 'n': c = '\n'; break;
4164 case 'r': c = '\r'; break;
4165 case 't': c = '\t'; break;
4166 case 'v': c = '\v'; break;
4167
4168 case '0': case '1': case '2': case '3':
4169 case '4': case '5': case '6': case '7':
4170 c -= '0';
4171 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4172 c = c * 8 + *p++ - '0';
4173 break;
4174
4175 case 'x':
4176 if (*p == '{')
4177 {
4178 pcre_uint8 *pt = p;
4179 c = 0;
4180
4181 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4182 when isxdigit() is a macro that refers to its argument more than
4183 once. This is banned by the C Standard, but apparently happens in at
4184 least one MacOS environment. */
4185
4186 for (pt++; isxdigit(*pt); pt++)
4187 {
4188 if (++i == 9)
4189 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4190 "using only the first eight.\n");
4191 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4192 }
4193 if (*pt == '}')
4194 {
4195 p = pt + 1;
4196 break;
4197 }
4198 /* Not correct form for \x{...}; fall through */
4199 }
4200
4201 /* \x without {} always defines just one byte in 8-bit mode. This
4202 allows UTF-8 characters to be constructed byte by byte, and also allows
4203 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4204 Otherwise, pass it down to later code so that it can be turned into
4205 UTF-8 when running in 16/32-bit mode. */
4206
4207 c = 0;
4208 while (i++ < 2 && isxdigit(*p))
4209 {
4210 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4211 p++;
4212 }
4213 if (use_utf)
4214 {
4215 *q++ = c;
4216 continue;
4217 }
4218 break;
4219
4220 case 0: /* \ followed by EOF allows for an empty line */
4221 p--;
4222 continue;
4223
4224 case '>':
4225 if (*p == '-')
4226 {
4227 start_offset_sign = -1;
4228 p++;
4229 }
4230 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4231 start_offset *= start_offset_sign;
4232 continue;
4233
4234 case 'A': /* Option setting */
4235 options |= PCRE_ANCHORED;
4236 continue;
4237
4238 case 'B':
4239 options |= PCRE_NOTBOL;
4240 continue;
4241
4242 case 'C':
4243 if (isdigit(*p)) /* Set copy string */
4244 {
4245 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4246 copystrings |= 1 << n;
4247 }
4248 else if (isalnum(*p))
4249 {
4250 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4251 }
4252 else if (*p == '+')
4253 {
4254 callout_extra = 1;
4255 p++;
4256 }
4257 else if (*p == '-')
4258 {
4259 SET_PCRE_CALLOUT(NULL);
4260 p++;
4261 }
4262 else if (*p == '!')
4263 {
4264 callout_fail_id = 0;
4265 p++;
4266 while(isdigit(*p))
4267 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4268 callout_fail_count = 0;
4269 if (*p == '!')
4270 {
4271 p++;
4272 while(isdigit(*p))
4273 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4274 }
4275 }
4276 else if (*p == '*')
4277 {
4278 int sign = 1;
4279 callout_data = 0;
4280 if (*(++p) == '-') { sign = -1; p++; }
4281 while(isdigit(*p))
4282 callout_data = callout_data * 10 + *p++ - '0';
4283 callout_data *= sign;
4284 callout_data_set = 1;
4285 }
4286 continue;
4287
4288 #if !defined NODFA
4289 case 'D':
4290 #if !defined NOPOSIX
4291 if (posix || do_posix)
4292 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4293 else
4294 #endif
4295 use_dfa = 1;
4296 continue;
4297 #endif
4298
4299 #if !defined NODFA
4300 case 'F':
4301 options |= PCRE_DFA_SHORTEST;
4302 continue;
4303 #endif
4304
4305 case 'G':
4306 if (isdigit(*p))
4307 {
4308 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4309 getstrings |= 1 << n;
4310 }
4311 else if (isalnum(*p))
4312 {
4313 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4314 }
4315 continue;
4316
4317 case 'J':
4318 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4319 if (extra != NULL
4320 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4321 && extra->executable_jit != NULL)
4322 {
4323 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4324 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4325 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4326 }
4327 continue;
4328
4329 case 'L':
4330 getlist = 1;
4331 continue;
4332
4333 case 'M':
4334 find_match_limit = 1;
4335 continue;
4336
4337 case 'N':
4338 if ((options & PCRE_NOTEMPTY) != 0)
4339 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4340 else
4341 options |= PCRE_NOTEMPTY;
4342 continue;
4343
4344 case 'O':
4345 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4346 if (n > size_offsets_max)
4347 {
4348 size_offsets_max = n;
4349 free(offsets);
4350 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4351 if (offsets == NULL)
4352 {
4353 printf("** Failed to get %d bytes of memory for offsets vector\n",
4354 (int)(size_offsets_max * sizeof(int)));
4355 yield = 1;
4356 goto EXIT;
4357 }
4358 }
4359 use_size_offsets = n;
4360 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4361 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4362 continue;
4363
4364 case 'P':
4365 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4366 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4367 continue;
4368
4369 case 'Q':
4370 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4371 if (extra == NULL)
4372 {
4373 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4374 extra->flags = 0;
4375 }
4376 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4377 extra->match_limit_recursion = n;
4378 continue;
4379
4380 case 'q':
4381 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4382 if (extra == NULL)
4383 {
4384 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4385 extra->flags = 0;
4386 }
4387 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4388 extra->match_limit = n;
4389 continue;
4390
4391 #if !defined NODFA
4392 case 'R':
4393 options |= PCRE_DFA_RESTART;
4394 continue;
4395 #endif
4396
4397 case 'S':
4398 show_malloc = 1;
4399 continue;
4400
4401 case 'Y':
4402 options |= PCRE_NO_START_OPTIMIZE;
4403 continue;
4404
4405 case 'Z':
4406 options |= PCRE_NOTEOL;
4407 continue;
4408
4409 case '?':
4410 options |= PCRE_NO_UTF8_CHECK;
4411 continue;
4412
4413 case '<':
4414 {
4415 int x = check_newline(p, outfile);
4416 if (x == 0) goto NEXT_DATA;
4417 options |= x;
4418 while (*p++ != '>');
4419 }
4420 continue;
4421 }
4422
4423 /* We now have a character value in c that may be greater than 255. In
4424 16-bit or 32-bit mode, we always convert characters to UTF-8 so that
4425 values greater than 255 can be passed to non-UTF 16- or 32-bit strings.
4426 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4427 than 127 in UTF mode must have come from \x{...} or octal constructs
4428 because values from \x.. get this far only in non-UTF mode. */
4429
4430 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
4431 if (pcre_mode != PCRE8_MODE || use_utf)
4432 {
4433 pcre_uint8 buff8[8];
4434 int ii, utn;
4435 utn = ord2utf8(c, buff8);
4436 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
4437 }
4438 else
4439 #endif
4440 {
4441 if (c > 255)
4442 {
4443 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4444 "and UTF-8 mode is not enabled.\n", c);
4445 fprintf(outfile, "** Truncation will probably give the wrong "
4446 "result.\n");
4447 }
4448 *q++ = c;
4449 }
4450 }
4451
4452 /* Reached end of subject string */
4453
4454 *q = 0;
4455 len = (int)(q - dbuffer);
4456
4457 /* Move the data to the end of the buffer so that a read over the end of
4458 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
4459 we are using the POSIX interface, we must include the terminating zero. */
4460
4461 #if !defined NOPOSIX
4462 if (posix || do_posix)
4463 {
4464 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
4465 bptr += buffer_size - len - 1;
4466 }
4467 else
4468 #endif
4469 {
4470 memmove(bptr + buffer_size - len, bptr, len);
4471 bptr += buffer_size - len;
4472 }
4473
4474 if ((all_use_dfa || use_dfa) && find_match_limit)
4475 {
4476 printf("**Match limit not relevant for DFA matching: ignored\n");
4477 find_match_limit = 0;
4478 }
4479
4480 /* Handle matching via the POSIX interface, which does not
4481 support timing or playing with the match limit or callout data. */
4482
4483 #if !defined NOPOSIX
4484 if (posix || do_posix)
4485 {
4486 int rc;
4487 int eflags = 0;
4488 regmatch_t *pmatch = NULL;
4489 if (use_size_offsets > 0)
4490 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4491 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4492 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4493 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
4494
4495 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
4496
4497 if (rc != 0)
4498 {
4499 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4500 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4501 }
4502 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
4503 {
4504 fprintf(outfile, "Matched with REG_NOSUB\n");
4505 }
4506 else
4507 {
4508 size_t i;
4509 for (i = 0; i < (size_t)use_size_offsets; i++)
4510 {
4511 if (pmatch[i].rm_so >= 0)
4512 {
4513 fprintf(outfile, "%2d: ", (int)i);
4514 PCHARSV(dbuffer, pmatch[i].rm_so,
4515 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
4516 fprintf(outfile, "\n");
4517 if (do_showcaprest || (i == 0 && do_showrest))
4518 {
4519 fprintf(outfile, "%2d+ ", (int)i);
4520 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
4521 outfile);
4522 fprintf(outfile, "\n");
4523 }
4524 }
4525 }
4526 }
4527 free(pmatch);
4528 goto NEXT_DATA;
4529 }
4530
4531 #endif /* !defined NOPOSIX */
4532
4533 /* Handle matching via the native interface - repeats for /g and /G */
4534
4535 #ifdef SUPPORT_PCRE16
4536 if (pcre_mode == PCRE16_MODE)
4537 {
4538 len = to16(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF8, len);
4539 switch(len)
4540 {
4541 case -1:
4542 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
4543 "converted to UTF-16\n");
4544 goto NEXT_DATA;
4545
4546 case -2:
4547 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4548 "cannot be converted to UTF-16\n");
4549 goto NEXT_DATA;
4550
4551 case -3:
4552 fprintf(outfile, "**Failed: character value greater than 0xffff "
4553 "cannot be converted to 16-bit in non-UTF mode\n");
4554 goto NEXT_DATA;
4555
4556 default:
4557 break;
4558 }
4559 bptr = (pcre_uint8 *)buffer16;
4560 }
4561 #endif
4562
4563 #ifdef SUPPORT_PCRE32
4564 if (pcre_mode == PCRE32_MODE)
4565 {
4566 len = to32(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF32, len);
4567 switch(len)
4568 {
4569 case -1:
4570 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
4571 "converted to UTF-32\n");
4572 goto NEXT_DATA;
4573
4574 case -2:
4575 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4576 "cannot be converted to UTF-32\n");
4577 goto NEXT_DATA;
4578
4579 case -3:
4580 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
4581 goto NEXT_DATA;
4582
4583 default:
4584 break;
4585 }
4586 bptr = (pcre_uint8 *)buffer32;
4587 }
4588 #endif
4589
4590 /* Ensure that there is a JIT callback if we want to verify that JIT was
4591 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4592
4593 if (verify_jit && jit_stack == NULL && extra != NULL)
4594 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4595
4596 for (;; gmatched++) /* Loop for /g or /G */
4597 {
4598 markptr = NULL;
4599 jit_was_used = FALSE;
4600
4601 if (timeitm > 0)
4602 {
4603 register int i;
4604 clock_t time_taken;
4605 clock_t start_time = clock();
4606
4607 #if !defined NODFA
4608 if (all_use_dfa || use_dfa)
4609 {
4610 if ((options & PCRE_DFA_RESTART) != 0)
4611 {
4612 fprintf(outfile, "Timing DFA restarts is not supported\n");
4613 break;
4614 }
4615 if (dfa_workspace == NULL)
4616 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4617 for (i = 0; i < timeitm; i++)
4618 {
4619 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4620 (options | g_notempty), use_offsets, use_size_offsets,
4621 dfa_workspace, DFA_WS_DIMENSION);
4622 }
4623 }
4624 else
4625 #endif
4626
4627 for (i = 0; i < timeitm; i++)
4628 {
4629 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4630 (options | g_notempty), use_offsets, use_size_offsets);
4631 }
4632 time_taken = clock() - start_time;
4633 fprintf(outfile, "Execute time %.4f milliseconds\n",
4634 (((double)time_taken * 1000.0) / (double)timeitm) /
4635 (double)CLOCKS_PER_SEC);
4636 }
4637
4638 /* If find_match_limit is set, we want to do repeated matches with
4639 varying limits in order to find the minimum value for the match limit and
4640 for the recursion limit. The match limits are relevant only to the normal
4641 running of pcre_exec(), so disable the JIT optimization. This makes it
4642 possible to run the same set of tests with and without JIT externally
4643 requested. */
4644
4645 if (find_match_limit)
4646 {
4647 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4648 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4649 extra->flags = 0;
4650
4651 (void)check_match_limit(re, extra, bptr, len, start_offset,
4652 options|g_notempty, use_offsets, use_size_offsets,
4653 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4654 PCRE_ERROR_MATCHLIMIT, "match()");
4655
4656 count = check_match_limit(re, extra, bptr, len, start_offset,
4657 options|g_notempty, use_offsets, use_size_offsets,
4658 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4659 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4660 }
4661
4662 /* If callout_data is set, use the interface with additional data */
4663
4664 else if (callout_data_set)
4665 {
4666 if (extra == NULL)
4667 {
4668 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4669 extra->flags = 0;
4670 }
4671 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4672 extra->callout_data = &callout_data;
4673 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4674 options | g_notempty, use_offsets, use_size_offsets);
4675 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4676 }
4677
4678 /* The normal case is just to do the match once, with the default
4679 value of match_limit. */
4680
4681 #if !defined NODFA
4682 else if (all_use_dfa || use_dfa)
4683 {
4684 if (dfa_workspace == NULL)
4685 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4686 if (dfa_matched++ == 0)
4687 dfa_workspace[0] = -1; /* To catch bad restart */
4688 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4689 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4690 DFA_WS_DIMENSION);
4691 if (count == 0)
4692 {
4693 fprintf(outfile, "Matched, but too many subsidiary matches\n");
4694 count = use_size_offsets/2;
4695 }
4696 }
4697 #endif
4698
4699 else
4700 {
4701 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4702 options | g_notempty, use_offsets, use_size_offsets);
4703 if (count == 0)
4704 {
4705 fprintf(outfile, "Matched, but too many substrings\n");
4706 count = use_size_offsets/3;
4707 }
4708 }
4709
4710 /* Matched */
4711
4712 if (count >= 0)
4713 {
4714 int i, maxcount;
4715 void *cnptr, *gnptr;
4716
4717 #if !defined NODFA
4718 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4719 #endif
4720 maxcount = use_size_offsets/3;
4721
4722 /* This is a check against a lunatic return value. */
4723
4724 if (count > maxcount)
4725 {
4726 fprintf(outfile,
4727 "** PCRE error: returned count %d is too big for offset size %d\n",
4728 count, use_size_offsets);
4729 count = use_size_offsets/3;
4730 if (do_g || do_G)
4731 {
4732 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4733 do_g = do_G = FALSE; /* Break g/G loop */
4734 }
4735 }
4736
4737 /* do_allcaps requests showing of all captures in the pattern, to check
4738 unset ones at the end. */
4739
4740 if (do_allcaps)
4741 {
4742 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4743 goto SKIP_DATA;
4744 count++; /* Allow for full match */
4745 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4746 }
4747
4748 /* Output the captured substrings */
4749
4750 for (i = 0; i < count * 2; i += 2)
4751 {
4752 if (use_offsets[i] < 0)
4753 {
4754 if (use_offsets[i] != -1)
4755 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4756 use_offsets[i], i);
4757 if (use_offsets[i+1] != -1)
4758 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4759 use_offsets[i+1], i+1);
4760 fprintf(outfile, "%2d: <unset>\n", i/2);
4761 }
4762 else
4763 {
4764 fprintf(outfile, "%2d: ", i/2);
4765 PCHARSV(bptr, use_offsets[i],
4766 use_offsets[i+1] - use_offsets[i], outfile);
4767 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4768 fprintf(outfile, "\n");
4769 if (do_showcaprest || (i == 0 && do_showrest))
4770 {
4771 fprintf(outfile, "%2d+ ", i/2);
4772 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4773 outfile);
4774 fprintf(outfile, "\n");
4775 }
4776 }
4777 }
4778
4779 if (markptr != NULL)
4780 {
4781 fprintf(outfile, "MK: ");
4782 PCHARSV(markptr, 0, -1, outfile);
4783 fprintf(outfile, "\n");
4784 }
4785
4786 for (i = 0; i < 32; i++)
4787 {
4788 if ((copystrings & (1 << i)) != 0)
4789 {
4790 int rc;
4791 char copybuffer[256];
4792 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4793 copybuffer, sizeof(copybuffer));
4794 if (rc < 0)
4795 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4796 else
4797 {
4798 fprintf(outfile, "%2dC ", i);
4799 PCHARSV(copybuffer, 0, rc, outfile);
4800 fprintf(outfile, " (%d)\n", rc);
4801 }
4802 }
4803 }
4804
4805 cnptr = copynames;
4806 for (;;)
4807 {
4808 int rc;
4809 char copybuffer[256];
4810
4811 if (pcre_mode == PCRE16_MODE)
4812 {
4813 if (*(pcre_uint16 *)cnptr == 0) break;
4814 }
4815 else
4816 {
4817 if (*(pcre_uint8 *)cnptr == 0) break;
4818 }
4819
4820 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4821 cnptr, copybuffer, sizeof(copybuffer));
4822
4823 if (rc < 0)
4824 {
4825 fprintf(outfile, "copy substring ");
4826 PCHARSV(cnptr, 0, -1, outfile);
4827 fprintf(outfile, " failed %d\n", rc);
4828 }
4829 else
4830 {
4831 fprintf(outfile, " C ");
4832 PCHARSV(copybuffer, 0, rc, outfile);
4833 fprintf(outfile, " (%d) ", rc);
4834 PCHARSV(cnptr, 0, -1, outfile);
4835 putc('\n', outfile);
4836 }
4837
4838 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4839 }
4840
4841 for (i = 0; i < 32; i++)
4842 {
4843 if ((getstrings & (1 << i)) != 0)
4844 {
4845 int rc;
4846 const char *substring;
4847 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4848 if (rc < 0)
4849 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4850 else
4851 {
4852 fprintf(outfile, "%2dG ", i);
4853 PCHARSV(substring, 0, rc, outfile);
4854 fprintf(outfile, " (%d)\n", rc);
4855 PCRE_FREE_SUBSTRING(substring);
4856 }
4857 }
4858 }
4859
4860 gnptr = getnames;
4861 for (;;)
4862 {
4863 int rc;
4864 const char *substring;
4865
4866 if (pcre_mode == PCRE16_MODE)
4867 {
4868 if (*(pcre_uint16 *)gnptr == 0) break;
4869 }
4870 else
4871 {
4872 if (*(pcre_uint8 *)gnptr == 0) break;
4873 }
4874
4875 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4876 gnptr, &substring);
4877 if (rc < 0)
4878 {
4879 fprintf(outfile, "get substring ");
4880 PCHARSV(gnptr, 0, -1, outfile);
4881 fprintf(outfile, " failed %d\n", rc);
4882 }
4883 else
4884 {
4885 fprintf(outfile, " G ");
4886 PCHARSV(substring, 0, rc, outfile);
4887 fprintf(outfile, " (%d) ", rc);
4888 PCHARSV(gnptr, 0, -1, outfile);
4889 PCRE_FREE_SUBSTRING(substring);
4890 putc('\n', outfile);
4891 }
4892
4893 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4894 }
4895
4896 if (getlist)
4897 {
4898 int rc;
4899 const char **stringlist;
4900 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4901 if (rc < 0)
4902 fprintf(outfile, "get substring list failed %d\n", rc);
4903 else
4904 {
4905 for (i = 0; i < count; i++)
4906 {
4907 fprintf(outfile, "%2dL ", i);
4908 PCHARSV(stringlist[i], 0, -1, outfile);
4909 putc('\n', outfile);
4910 }
4911 if (stringlist[i] != NULL)
4912 fprintf(outfile, "string list not terminated by NULL\n");
4913 PCRE_FREE_SUBSTRING_LIST(stringlist);
4914 }
4915 }
4916 }
4917
4918 /* There was a partial match */
4919
4920 else if (count == PCRE_ERROR_PARTIAL)
4921 {
4922 if (markptr == NULL) fprintf(outfile, "Partial match");
4923 else
4924 {
4925 fprintf(outfile, "Partial match, mark=");
4926 PCHARSV(markptr, 0, -1, outfile);
4927 }
4928 if (use_size_offsets > 1)
4929 {
4930 fprintf(outfile, ": ");
4931 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4932 outfile);
4933 }
4934 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4935 fprintf(outfile, "\n");
4936 break; /* Out of the /g loop */
4937 }
4938
4939 /* Failed to match. If this is a /g or /G loop and we previously set
4940 g_notempty after a null match, this is not necessarily the end. We want
4941 to advance the start offset, and continue. We won't be at the end of the
4942 string - that was checked before setting g_notempty.
4943
4944 Complication arises in the case when the newline convention is "any",
4945 "crlf", or "anycrlf". If the previous match was at the end of a line
4946 terminated by CRLF, an advance of one character just passes the \r,
4947 whereas we should prefer the longer newline sequence, as does the code in
4948 pcre_exec(). Fudge the offset value to achieve this. We check for a
4949 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4950 find the default.
4951
4952 Otherwise, in the case of UTF-8 matching, the advance must be one
4953 character, not one byte. */
4954
4955 else
4956 {
4957 if (g_notempty != 0)
4958 {
4959 int onechar = 1;
4960 unsigned int obits = REAL_PCRE_OPTIONS(re);
4961 use_offsets[0] = start_offset;
4962 if ((obits & PCRE_NEWLINE_BITS) == 0)
4963 {
4964 int d;
4965 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4966 /* Note that these values are always the ASCII ones, even in
4967 EBCDIC environments. CR = 13, NL = 10. */
4968 obits = (d == 13)? PCRE_NEWLINE_CR :
4969 (d == 10)? PCRE_NEWLINE_LF :
4970 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4971 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4972 (d == -1)? PCRE_NEWLINE_ANY : 0;
4973 }
4974 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4975 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4976 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4977 &&
4978 start_offset < len - 1 && (
4979 #ifdef SUPPORT_PCRE8
4980 (pcre_mode == PCRE8_MODE &&
4981 bptr[start_offset] == '\r' &&
4982 bptr[start_offset + 1] == '\n') ||
4983 #endif
4984 #ifdef SUPPORT_PCRE16
4985 (pcre_mode == PCRE16_MODE &&
4986 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
4987 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
4988 #endif
4989 #ifdef SUPPORT_PCRE32
4990 (pcre_mode == PCRE32_MODE &&
4991 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
4992 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
4993 #endif
4994 0))
4995 onechar++;
4996 else if (use_utf)
4997 {
4998 while (start_offset + onechar < len)
4999 {
5000 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5001 onechar++;
5002 }
5003 }
5004 use_offsets[1] = start_offset + onechar;
5005 }
5006 else
5007 {
5008 switch(count)
5009 {
5010 case PCRE_ERROR_NOMATCH:
5011 if (gmatched == 0)
5012 {
5013 if (markptr == NULL)
5014 {
5015 fprintf(outfile, "No match");
5016 }
5017 else
5018 {
5019 fprintf(outfile, "No match, mark = ");
5020 PCHARSV(markptr, 0, -1, outfile);
5021 }
5022 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5023 putc('\n', outfile);
5024 }
5025 break;
5026
5027 case PCRE_ERROR_BADUTF8:
5028 case PCRE_ERROR_SHORTUTF8:
5029 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5030 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5031 8 * CHAR_SIZE);
5032 if (use_size_offsets >= 2)
5033 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5034 use_offsets[1]);
5035 fprintf(outfile, "\n");
5036 break;
5037
5038 case PCRE_ERROR_BADUTF8_OFFSET:
5039 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5040 8 * CHAR_SIZE);
5041 break;
5042
5043 default:
5044 if (count < 0 &&
5045 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5046 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5047 else
5048 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5049 break;
5050 }
5051
5052 break; /* Out of the /g loop */
5053 }
5054 }
5055
5056 /* If not /g or /G we are done */
5057
5058 if (!do_g && !do_G) break;
5059
5060 /* If we have matched an empty string, first check to see if we are at
5061 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5062 Perl's /g options does. This turns out to be rather cunning. First we set
5063 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5064 same point. If this fails (picked up above) we advance to the next
5065 character. */
5066
5067 g_notempty = 0;
5068
5069 if (use_offsets[0] == use_offsets[1])
5070 {
5071 if (use_offsets[0] == len) break;
5072 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5073 }
5074
5075 /* For /g, update the start offset, leaving the rest alone */
5076
5077 if (do_g) start_offset = use_offsets[1];
5078
5079 /* For /G, update the pointer and length */
5080
5081 else
5082 {
5083 bptr += use_offsets[1] * CHAR_SIZE;
5084 len -= use_offsets[1];
5085 }
5086 } /* End of loop for /g and /G */
5087
5088 NEXT_DATA: continue;
5089 } /* End of loop for data lines */
5090
5091 CONTINUE:
5092
5093 #if !defined NOPOSIX
5094 if (posix || do_posix) regfree(&preg);
5095 #endif
5096
5097 if (re != NULL) new_free(re);
5098 if (extra != NULL)
5099 {
5100 PCRE_FREE_STUDY(extra);
5101 }
5102 if (locale_set)
5103 {
5104 new_free((void *)tables);
5105 setlocale(LC_CTYPE, "C");
5106 locale_set = 0;
5107 }
5108 if (jit_stack != NULL)
5109 {
5110 PCRE_JIT_STACK_FREE(jit_stack);
5111 jit_stack = NULL;
5112 }
5113 }
5114
5115 if (infile == stdin) fprintf(outfile, "\n");
5116
5117 EXIT:
5118
5119 if (infile != NULL && infile != stdin) fclose(infile);
5120 if (outfile != NULL && outfile != stdout) fclose(outfile);
5121
5122 free(buffer);
5123 free(dbuffer);
5124 free(pbuffer);
5125 free(offsets);
5126
5127 #ifdef SUPPORT_PCRE16
5128 if (buffer16 != NULL) free(buffer16);
5129 #endif
5130 #ifdef SUPPORT_PCRE32
5131 if (buffer32 != NULL) free(buffer32);
5132 #endif
5133
5134 #if !defined NODFA
5135 if (dfa_workspace != NULL)
5136 free(dfa_workspace);
5137 #endif
5138
5139 return yield;
5140 }
5141
5142 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5