/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 818 - (show annotations)
Thu Dec 22 20:11:31 2011 UTC (8 years, 5 months ago) by ph10
File MIME type: text/plain
File size: 120175 byte(s)
More 16-bit fixes, and removal of pcre_info().
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <time.h>
49 #include <locale.h>
50 #include <errno.h>
51
52 #ifdef SUPPORT_LIBREADLINE
53 #ifdef HAVE_UNISTD_H
54 #include <unistd.h>
55 #endif
56 #include <readline/readline.h>
57 #include <readline/history.h>
58 #endif
59
60
61 /* A number of things vary for Windows builds. Originally, pcretest opened its
62 input and output without "b"; then I was told that "b" was needed in some
63 environments, so it was added for release 5.0 to both the input and output. (It
64 makes no difference on Unix-like systems.) Later I was told that it is wrong
65 for the input on Windows. I've now abstracted the modes into two macros that
66 are set here, to make it easier to fiddle with them, and removed "b" from the
67 input mode under Windows. */
68
69 #if defined(_WIN32) || defined(WIN32)
70 #include <io.h> /* For _setmode() */
71 #include <fcntl.h> /* For _O_BINARY */
72 #define INPUT_MODE "r"
73 #define OUTPUT_MODE "wb"
74
75 #ifndef isatty
76 #define isatty _isatty /* This is what Windows calls them, I'm told, */
77 #endif /* though in some environments they seem to */
78 /* be already defined, hence the #ifndefs. */
79 #ifndef fileno
80 #define fileno _fileno
81 #endif
82
83 /* A user sent this fix for Borland Builder 5 under Windows. */
84
85 #ifdef __BORLANDC__
86 #define _setmode(handle, mode) setmode(handle, mode)
87 #endif
88
89 /* Not Windows */
90
91 #else
92 #include <sys/time.h> /* These two includes are needed */
93 #include <sys/resource.h> /* for setrlimit(). */
94 #define INPUT_MODE "rb"
95 #define OUTPUT_MODE "wb"
96 #endif
97
98
99 /* We have to include pcre_internal.h because we need the internal info for
100 displaying the results of pcre_study() and we also need to know about the
101 internal macros, structures, and other internal data values; pcretest has
102 "inside information" compared to a program that strictly follows the PCRE API.
103
104 Although pcre_internal.h does itself include pcre.h, we explicitly include it
105 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106 appropriately for an application, not for building PCRE. */
107
108 #include "pcre.h"
109 #include "pcre_internal.h"
110
111 /* The pcre_printint() function, which prints the internal form of a compiled
112 regex, is held in a separate file so that (a) it can be compiled in either
113 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114 when that is compiled in debug mode. */
115
116 #ifdef SUPPORT_PCRE8
117 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118 #endif
119 #ifdef SUPPORT_PCRE16
120 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121 #endif
122
123 /* We need access to some of the data tables that PCRE uses. So as not to have
124 to keep two copies, we include the source file here, changing the names of the
125 external symbols to prevent clashes. */
126
127 #define _pcre_ucp_gentype ucp_gentype
128 #define _pcre_ucp_typerange ucp_typerange
129 #define _pcre_utf8_table1 utf8_table1
130 #define _pcre_utf8_table1_size utf8_table1_size
131 #define _pcre_utf8_table2 utf8_table2
132 #define _pcre_utf8_table3 utf8_table3
133 #define _pcre_utf8_table4 utf8_table4
134 #define _pcre_utt utt
135 #define _pcre_utt_size utt_size
136 #define _pcre_utt_names utt_names
137 #define _pcre_OP_lengths OP_lengths
138
139 #include "pcre_tables.c"
140
141 /* The definition of the macro PRINTABLE, which determines whether to print an
142 output character as-is or as a hex value when showing compiled patterns, is
143 the same as in the printint.src file. We uses it here in cases when the locale
144 has not been explicitly changed, so as to get consistent output from systems
145 that differ in their output from isprint() even in the "C" locale. */
146
147 #ifdef EBCDIC
148 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149 #else
150 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151 #endif
152
153 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154
155 /* It is possible to compile this test program without including support for
156 testing the POSIX interface, though this is not available via the standard
157 Makefile. */
158
159 #if !defined NOPOSIX
160 #include "pcreposix.h"
161 #endif
162
163 /* It is also possible, originally for the benefit of a version that was
164 imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165 without the interface to the DFA matcher (NODFA). In fact, we automatically cut
166 out the UTF8 support if PCRE is built without it. */
167
168 #ifndef SUPPORT_UTF8
169 #ifndef NOUTF8
170 #define NOUTF8
171 #endif
172 #endif
173
174 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
175 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
176 only from one place and is handled differently). I couldn't dream up any way of
177 using a single macro to do this in a generic way, because of the many different
178 argument requirements. We know that at least one of SUPPORT_PCRE8 and
179 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
180 use these in the definitions of generic macros. */
181
182 #ifdef SUPPORT_PCRE8
183
184 #define PCHARS8(lv, p, len, f) \
185 lv = pchars((pcre_uint8 *)p, len, f)
186
187 #define PCHARSV8(p, len, f) \
188 (void)pchars((pcre_uint8 *)p, len, f)
189
190 #define SET_PCRE_CALLOUT8(callout) \
191 pcre_callout = callout
192
193 #define STRLEN8(p) ((int)strlen((char *)p))
194
195
196 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
197 re = pcre_compile((char *)pat, options, error, erroffset, tables)
198
199 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
200 namesptr, cbuffer, size) \
201 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
202 (char *)copynamesptr, cbuffer, size)
203
204 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
205 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
206
207 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
208 offsets, size_offsets, workspace, size_workspace) \
209 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
210 offsets, size_offsets, workspace, size_workspace)
211
212 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
213 offsets, size_offsets) \
214 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
215 offsets, size_offsets)
216
217 #define PCRE_FREE_STUDY8(extra) \
218 pcre_free_study(extra)
219
220 #define PCRE_FREE_SUBSTRING8(substring) \
221 pcre_free_substring(substring)
222
223 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
224 pcre_free_substring_list(listptr)
225
226 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
227 getnamesptr, subsptr) \
228 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
229 (char *)getnamesptr, subsptr)
230
231 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
232 n = pcre_get_stringnumber(re, (char *)ptr)
233
234 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
235 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
236
237 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
238 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
239
240 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
241 pcre_pattern_to_host_byte_order(re, extra, tables)
242
243 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
244 pcre_printint(re, outfile, debug_lengths)
245
246 #define PCRE_STUDY8(extra, re, options, error) \
247 extra = pcre_study(re, options, error)
248
249 #endif /* SUPPORT_PCRE8 */
250
251 /* -----------------------------------------------------------*/
252
253 #ifdef SUPPORT_PCRE16
254
255 #define PCHARS16(lv, p, len, f) \
256 lv = pchars16((PCRE_SPTR16)p, len, f)
257
258 #define PCHARSV16(p, len, f) \
259 (void)pchars16((PCRE_SPTR16)p, len, f)
260
261 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
262
263 #define SET_PCRE_CALLOUT16(callout) \
264 pcre16_callout = callout
265
266
267 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
268 re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
269
270 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
271 namesptr, cbuffer, size) \
272 rc = pcre16_copy_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
273 (PCRE_SPTR16)namesptr, (PCRE_SCHAR16 *)cbuffer, size/2)
274
275 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
276 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
277 (PCRE_SCHAR16 *)cbuffer, size/2)
278
279 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
280 offsets, size_offsets, workspace, size_workspace) \
281 count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
282 options, offsets, size_offsets, workspace, size_workspace)
283
284 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
285 offsets, size_offsets) \
286 count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
287 options, offsets, size_offsets)
288
289 #define PCRE_FREE_STUDY16(extra) \
290 pcre16_free_study(extra)
291
292 #define PCRE_FREE_SUBSTRING16(substring) \
293 pcre16_free_substring((PCRE_SPTR16)substring)
294
295 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
296 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
297
298 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
299 getnamesptr, subsptr) \
300 rc = pcre16_get_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
301 (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)subsptr)
302
303 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
304 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
305
306 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
307 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
308 (PCRE_SPTR16 *)subsptr)
309
310 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
311 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
312 (PCRE_SPTR16 **)listptr)
313
314 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
315 pcre16_pattern_to_host_byte_order(re, extra, tables)
316
317 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
318 pcre16_printint(re, outfile, debug_lengths)
319
320 #define PCRE_STUDY16(extra, re, options, error) \
321 extra = pcre16_study(re, options, error)
322
323 #endif /* SUPPORT_PCRE16 */
324
325
326 /* ----- Both modes are supported; a runtime test is needed, except for
327 pcre_config(), and the JIT stack functions, when it doesn't matter which
328 version is called. ----- */
329
330 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
331
332 #define CHAR_SIZE (use_pcre16? 2:1)
333
334 #define PCHARS(lv, p, len, f) \
335 if (use_pcre16) \
336 PCHARS16(lv, p, len, f); \
337 else \
338 PCHARS8(lv, p, len, f)
339
340 #define PCHARSV(p, len, f) \
341 if (use_pcre16) \
342 PCHARSV16(p, len, f); \
343 else \
344 PCHARSV8(p, len, f)
345
346 #define SET_PCRE_CALLOUT(callout) \
347 if (use_pcre16) \
348 SET_PCRE_CALLOUT16(callout); \
349 else \
350 SET_PCRE_CALLOUT8(callout)
351
352 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
353
354 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
355
356 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
357 if (use_pcre16) \
358 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
359 else \
360 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
361
362 #define PCRE_CONFIG pcre_config
363
364 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
365 namesptr, cbuffer, size) \
366 if (use_pcre16) \
367 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
368 namesptr, cbuffer, size); \
369 else \
370 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
371 namesptr, cbuffer, size)
372
373 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
374 if (use_pcre16) \
375 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
376 else \
377 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
378
379 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
380 offsets, size_offsets, workspace, size_workspace) \
381 if (use_pcre16) \
382 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
383 offsets, size_offsets, workspace, size_workspace); \
384 else \
385 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
386 offsets, size_offsets, workspace, size_workspace)
387
388 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
389 offsets, size_offsets) \
390 if (use_pcre16) \
391 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
392 offsets, size_offsets); \
393 else \
394 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
395 offsets, size_offsets)
396
397 #define PCRE_FREE_STUDY(extra) \
398 if (use_pcre16) \
399 PCRE_FREE_STUDY16(extra); \
400 else \
401 PCRE_FREE_STUDY8(extra)
402
403 #define PCRE_FREE_SUBSTRING(substring) \
404 if (use_pcre16) \
405 PCRE_FREE_SUBSTRING16(substring); \
406 else \
407 PCRE_FREE_SUBSTRING8(substring)
408
409 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
410 if (use_pcre16) \
411 PCRE_FREE_SUBSTRING_LIST16(listptr); \
412 else \
413 PCRE_FREE_SUBSTRING_LIST8(listptr)
414
415 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
416 getnamesptr, subsptr) \
417 if (use_pcre16) \
418 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
419 getnamesptr, subsptr); \
420 else \
421 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
422 getnamesptr, subsptr)
423
424 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
425 if (use_pcre16) \
426 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
427 else \
428 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
429
430 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
431 if (use_pcre16) \
432 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
433 else \
434 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
435
436 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
437 if (use_pcre16) \
438 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
439 else \
440 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
441
442 #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
443 #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
444
445 #define PCRE_MAKETABLES \
446 (use_pcre16? pcre16_maketables() : pcre_maketables())
447
448 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
449 if (use_pcre16) \
450 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
451 else \
452 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
453
454 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
455 if (use_pcre16) \
456 PCRE_PRINTINT16(re, outfile, debug_lengths); \
457 else \
458 PCRE_PRINTINT8(re, outfile, debug_lengths)
459
460 #define PCRE_STUDY(extra, re, options, error) \
461 if (use_pcre16) \
462 PCRE_STUDY16(extra, re, options, error); \
463 else \
464 PCRE_STUDY8(extra, re, options, error)
465
466 /* ----- Only 8-bit mode is supported ----- */
467
468 #elif defined SUPPORT_PCRE8
469 #define CHAR_SIZE 1
470 #define PCHARS PCHARS8
471 #define PCHARSV PCHARSV8
472 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
473 #define STRLEN STRLEN8
474 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
475 #define PCRE_COMPILE PCRE_COMPILE8
476 #define PCRE_CONFIG pcre_config
477 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
478 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
479 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
480 #define PCRE_EXEC PCRE_EXEC8
481 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
482 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
483 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
484 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
485 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
486 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
487 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
488 #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
489 #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
490 #define PCRE_MAKETABLES pcre_maketables()
491 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
492 #define PCRE_PRINTINT PCRE_PRINTINT8
493 #define PCRE_STUDY PCRE_STUDY8
494
495 /* ----- Only 16-bit mode is supported ----- */
496
497 #else
498 #define CHAR_SIZE 1
499 #define PCHARS PCHARS16
500 #define PCHARSV PCHARSV16
501 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
502 #define STRLEN STRLEN16
503 #define PCRE_ASSIGN_JIT_STACK pcre16_assign_jit_stack
504 #define PCRE_COMPILE PCRE_COMPILE16
505 #define PCRE_CONFIG pcre16_config
506 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
507 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
508 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
509 #define PCRE_EXEC PCRE_EXEC16
510 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
511 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
512 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
513 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
514 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
515 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
516 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
517 #define PCRE_JIT_STACK_ALLOC pcre16_jit_stack_alloc
518 #define PCRE_JIT_STACK_FREE pcre16_jit_stack_free
519 #define PCRE_MAKETABLES pcre16_maketables()
520 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
521 #define PCRE_PRINTINT PCRE_PRINTINT16
522 #define PCRE_STUDY PCRE_STUDY16
523 #endif
524
525 /* ----- End of mode-specific function call macros ----- */
526
527
528 /* Other parameters */
529
530 #ifndef CLOCKS_PER_SEC
531 #ifdef CLK_TCK
532 #define CLOCKS_PER_SEC CLK_TCK
533 #else
534 #define CLOCKS_PER_SEC 100
535 #endif
536 #endif
537
538 /* This is the default loop count for timing. */
539
540 #define LOOPREPEAT 500000
541
542 /* Static variables */
543
544 static FILE *outfile;
545 static int log_store = 0;
546 static int callout_count;
547 static int callout_extra;
548 static int callout_fail_count;
549 static int callout_fail_id;
550 static int debug_lengths;
551 static int first_callout;
552 static int locale_set = 0;
553 static int show_malloc;
554 static int use_utf;
555 static size_t gotten_store;
556 static size_t first_gotten_store = 0;
557 static const unsigned char *last_callout_mark = NULL;
558
559 /* The buffers grow automatically if very long input lines are encountered. */
560
561 static int buffer_size = 50000;
562 static pcre_uint8 *buffer = NULL;
563 static pcre_uint8 *dbuffer = NULL;
564 static pcre_uint8 *pbuffer = NULL;
565
566 /* Another buffer is needed translation to 16-bit character strings. It will
567 obtained and extended as required. */
568
569 #ifdef SUPPORT_PCRE16
570 static int buffer16_size = 0;
571 static pcre_uint16 *buffer16 = NULL;
572
573 /* We need the table of operator lengths that is used for 16-bit compiling, in
574 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
575 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
576 appropriately for the 16-bit world. Just as a safety check, make sure that
577 COMPILE_PCRE16 is *not* set. */
578
579 #ifdef COMPILE_PCRE16
580 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
581 #endif
582
583 #if LINK_SIZE == 2
584 #undef LINK_SIZE
585 #define LINK_SIZE 1
586 #elif LINK_SIZE == 3 || LINK_SIZE == 4
587 #undef LINK_SIZE
588 #define LINK_SIZE 2
589 #else
590 #error LINK_SIZE must be either 2, 3, or 4
591 #endif
592
593 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
594
595 #endif /* SUPPORT_PCRE16 */
596
597 /* If we have 8-bit support, default use_pcre16 to false; if there is also
598 16-bit support, it can be changed by an option. If there is no 8-bit support,
599 there must be 16-bit support, so default it to 1. */
600
601 #ifdef SUPPORT_PCRE8
602 static int use_pcre16 = 0;
603 #else
604 static int use_pcre16 = 1;
605 #endif
606
607 /* Textual explanations for runtime error codes */
608
609 static const char *errtexts[] = {
610 NULL, /* 0 is no error */
611 NULL, /* NOMATCH is handled specially */
612 "NULL argument passed",
613 "bad option value",
614 "magic number missing",
615 "unknown opcode - pattern overwritten?",
616 "no more memory",
617 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
618 "match limit exceeded",
619 "callout error code",
620 NULL, /* BADUTF8 is handled specially */
621 "bad UTF-8 offset",
622 NULL, /* PARTIAL is handled specially */
623 "not used - internal error",
624 "internal error - pattern overwritten?",
625 "bad count value",
626 "item unsupported for DFA matching",
627 "backreference condition or recursion test not supported for DFA matching",
628 "match limit not supported for DFA matching",
629 "workspace size exceeded in DFA matching",
630 "too much recursion for DFA matching",
631 "recursion limit exceeded",
632 "not used - internal error",
633 "invalid combination of newline options",
634 "bad offset value",
635 NULL, /* SHORTUTF8 is handled specially */
636 "nested recursion at the same subject position",
637 "JIT stack limit reached",
638 "pattern compiled in wrong mode (8-bit/16-bit error)"
639 };
640
641
642 /*************************************************
643 * Alternate character tables *
644 *************************************************/
645
646 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
647 using the default tables of the library. However, the T option can be used to
648 select alternate sets of tables, for different kinds of testing. Note also that
649 the L (locale) option also adjusts the tables. */
650
651 /* This is the set of tables distributed as default with PCRE. It recognizes
652 only ASCII characters. */
653
654 static const pcre_uint8 tables0[] = {
655
656 /* This table is a lower casing table. */
657
658 0, 1, 2, 3, 4, 5, 6, 7,
659 8, 9, 10, 11, 12, 13, 14, 15,
660 16, 17, 18, 19, 20, 21, 22, 23,
661 24, 25, 26, 27, 28, 29, 30, 31,
662 32, 33, 34, 35, 36, 37, 38, 39,
663 40, 41, 42, 43, 44, 45, 46, 47,
664 48, 49, 50, 51, 52, 53, 54, 55,
665 56, 57, 58, 59, 60, 61, 62, 63,
666 64, 97, 98, 99,100,101,102,103,
667 104,105,106,107,108,109,110,111,
668 112,113,114,115,116,117,118,119,
669 120,121,122, 91, 92, 93, 94, 95,
670 96, 97, 98, 99,100,101,102,103,
671 104,105,106,107,108,109,110,111,
672 112,113,114,115,116,117,118,119,
673 120,121,122,123,124,125,126,127,
674 128,129,130,131,132,133,134,135,
675 136,137,138,139,140,141,142,143,
676 144,145,146,147,148,149,150,151,
677 152,153,154,155,156,157,158,159,
678 160,161,162,163,164,165,166,167,
679 168,169,170,171,172,173,174,175,
680 176,177,178,179,180,181,182,183,
681 184,185,186,187,188,189,190,191,
682 192,193,194,195,196,197,198,199,
683 200,201,202,203,204,205,206,207,
684 208,209,210,211,212,213,214,215,
685 216,217,218,219,220,221,222,223,
686 224,225,226,227,228,229,230,231,
687 232,233,234,235,236,237,238,239,
688 240,241,242,243,244,245,246,247,
689 248,249,250,251,252,253,254,255,
690
691 /* This table is a case flipping table. */
692
693 0, 1, 2, 3, 4, 5, 6, 7,
694 8, 9, 10, 11, 12, 13, 14, 15,
695 16, 17, 18, 19, 20, 21, 22, 23,
696 24, 25, 26, 27, 28, 29, 30, 31,
697 32, 33, 34, 35, 36, 37, 38, 39,
698 40, 41, 42, 43, 44, 45, 46, 47,
699 48, 49, 50, 51, 52, 53, 54, 55,
700 56, 57, 58, 59, 60, 61, 62, 63,
701 64, 97, 98, 99,100,101,102,103,
702 104,105,106,107,108,109,110,111,
703 112,113,114,115,116,117,118,119,
704 120,121,122, 91, 92, 93, 94, 95,
705 96, 65, 66, 67, 68, 69, 70, 71,
706 72, 73, 74, 75, 76, 77, 78, 79,
707 80, 81, 82, 83, 84, 85, 86, 87,
708 88, 89, 90,123,124,125,126,127,
709 128,129,130,131,132,133,134,135,
710 136,137,138,139,140,141,142,143,
711 144,145,146,147,148,149,150,151,
712 152,153,154,155,156,157,158,159,
713 160,161,162,163,164,165,166,167,
714 168,169,170,171,172,173,174,175,
715 176,177,178,179,180,181,182,183,
716 184,185,186,187,188,189,190,191,
717 192,193,194,195,196,197,198,199,
718 200,201,202,203,204,205,206,207,
719 208,209,210,211,212,213,214,215,
720 216,217,218,219,220,221,222,223,
721 224,225,226,227,228,229,230,231,
722 232,233,234,235,236,237,238,239,
723 240,241,242,243,244,245,246,247,
724 248,249,250,251,252,253,254,255,
725
726 /* This table contains bit maps for various character classes. Each map is 32
727 bytes long and the bits run from the least significant end of each byte. The
728 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
729 graph, print, punct, and cntrl. Other classes are built from combinations. */
730
731 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
732 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
733 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
734 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
735
736 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
737 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
738 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
739 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
740
741 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
742 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
743 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
744 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
745
746 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
747 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
748 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
749 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
750
751 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
752 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
753 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
754 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
755
756 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
757 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
758 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
759 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
760
761 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
762 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
763 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
764 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
765
766 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
767 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
768 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
769 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
770
771 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
772 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
773 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
774 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
775
776 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
777 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
778 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
779 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
780
781 /* This table identifies various classes of character by individual bits:
782 0x01 white space character
783 0x02 letter
784 0x04 decimal digit
785 0x08 hexadecimal digit
786 0x10 alphanumeric or '_'
787 0x80 regular expression metacharacter or binary zero
788 */
789
790 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
791 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
792 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
793 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
794 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
795 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
796 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
797 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
798 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
799 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
800 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
801 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
802 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
803 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
804 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
805 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
806 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
807 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
808 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
809 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
810 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
811 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
812 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
813 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
814 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
815 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
816 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
817 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
818 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
819 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
820 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
821 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
822
823 /* This is a set of tables that came orginally from a Windows user. It seems to
824 be at least an approximation of ISO 8859. In particular, there are characters
825 greater than 128 that are marked as spaces, letters, etc. */
826
827 static const pcre_uint8 tables1[] = {
828 0,1,2,3,4,5,6,7,
829 8,9,10,11,12,13,14,15,
830 16,17,18,19,20,21,22,23,
831 24,25,26,27,28,29,30,31,
832 32,33,34,35,36,37,38,39,
833 40,41,42,43,44,45,46,47,
834 48,49,50,51,52,53,54,55,
835 56,57,58,59,60,61,62,63,
836 64,97,98,99,100,101,102,103,
837 104,105,106,107,108,109,110,111,
838 112,113,114,115,116,117,118,119,
839 120,121,122,91,92,93,94,95,
840 96,97,98,99,100,101,102,103,
841 104,105,106,107,108,109,110,111,
842 112,113,114,115,116,117,118,119,
843 120,121,122,123,124,125,126,127,
844 128,129,130,131,132,133,134,135,
845 136,137,138,139,140,141,142,143,
846 144,145,146,147,148,149,150,151,
847 152,153,154,155,156,157,158,159,
848 160,161,162,163,164,165,166,167,
849 168,169,170,171,172,173,174,175,
850 176,177,178,179,180,181,182,183,
851 184,185,186,187,188,189,190,191,
852 224,225,226,227,228,229,230,231,
853 232,233,234,235,236,237,238,239,
854 240,241,242,243,244,245,246,215,
855 248,249,250,251,252,253,254,223,
856 224,225,226,227,228,229,230,231,
857 232,233,234,235,236,237,238,239,
858 240,241,242,243,244,245,246,247,
859 248,249,250,251,252,253,254,255,
860 0,1,2,3,4,5,6,7,
861 8,9,10,11,12,13,14,15,
862 16,17,18,19,20,21,22,23,
863 24,25,26,27,28,29,30,31,
864 32,33,34,35,36,37,38,39,
865 40,41,42,43,44,45,46,47,
866 48,49,50,51,52,53,54,55,
867 56,57,58,59,60,61,62,63,
868 64,97,98,99,100,101,102,103,
869 104,105,106,107,108,109,110,111,
870 112,113,114,115,116,117,118,119,
871 120,121,122,91,92,93,94,95,
872 96,65,66,67,68,69,70,71,
873 72,73,74,75,76,77,78,79,
874 80,81,82,83,84,85,86,87,
875 88,89,90,123,124,125,126,127,
876 128,129,130,131,132,133,134,135,
877 136,137,138,139,140,141,142,143,
878 144,145,146,147,148,149,150,151,
879 152,153,154,155,156,157,158,159,
880 160,161,162,163,164,165,166,167,
881 168,169,170,171,172,173,174,175,
882 176,177,178,179,180,181,182,183,
883 184,185,186,187,188,189,190,191,
884 224,225,226,227,228,229,230,231,
885 232,233,234,235,236,237,238,239,
886 240,241,242,243,244,245,246,215,
887 248,249,250,251,252,253,254,223,
888 192,193,194,195,196,197,198,199,
889 200,201,202,203,204,205,206,207,
890 208,209,210,211,212,213,214,247,
891 216,217,218,219,220,221,222,255,
892 0,62,0,0,1,0,0,0,
893 0,0,0,0,0,0,0,0,
894 32,0,0,0,1,0,0,0,
895 0,0,0,0,0,0,0,0,
896 0,0,0,0,0,0,255,3,
897 126,0,0,0,126,0,0,0,
898 0,0,0,0,0,0,0,0,
899 0,0,0,0,0,0,0,0,
900 0,0,0,0,0,0,255,3,
901 0,0,0,0,0,0,0,0,
902 0,0,0,0,0,0,12,2,
903 0,0,0,0,0,0,0,0,
904 0,0,0,0,0,0,0,0,
905 254,255,255,7,0,0,0,0,
906 0,0,0,0,0,0,0,0,
907 255,255,127,127,0,0,0,0,
908 0,0,0,0,0,0,0,0,
909 0,0,0,0,254,255,255,7,
910 0,0,0,0,0,4,32,4,
911 0,0,0,128,255,255,127,255,
912 0,0,0,0,0,0,255,3,
913 254,255,255,135,254,255,255,7,
914 0,0,0,0,0,4,44,6,
915 255,255,127,255,255,255,127,255,
916 0,0,0,0,254,255,255,255,
917 255,255,255,255,255,255,255,127,
918 0,0,0,0,254,255,255,255,
919 255,255,255,255,255,255,255,255,
920 0,2,0,0,255,255,255,255,
921 255,255,255,255,255,255,255,127,
922 0,0,0,0,255,255,255,255,
923 255,255,255,255,255,255,255,255,
924 0,0,0,0,254,255,0,252,
925 1,0,0,248,1,0,0,120,
926 0,0,0,0,254,255,255,255,
927 0,0,128,0,0,0,128,0,
928 255,255,255,255,0,0,0,0,
929 0,0,0,0,0,0,0,128,
930 255,255,255,255,0,0,0,0,
931 0,0,0,0,0,0,0,0,
932 128,0,0,0,0,0,0,0,
933 0,1,1,0,1,1,0,0,
934 0,0,0,0,0,0,0,0,
935 0,0,0,0,0,0,0,0,
936 1,0,0,0,128,0,0,0,
937 128,128,128,128,0,0,128,0,
938 28,28,28,28,28,28,28,28,
939 28,28,0,0,0,0,0,128,
940 0,26,26,26,26,26,26,18,
941 18,18,18,18,18,18,18,18,
942 18,18,18,18,18,18,18,18,
943 18,18,18,128,128,0,128,16,
944 0,26,26,26,26,26,26,18,
945 18,18,18,18,18,18,18,18,
946 18,18,18,18,18,18,18,18,
947 18,18,18,128,128,0,0,0,
948 0,0,0,0,0,1,0,0,
949 0,0,0,0,0,0,0,0,
950 0,0,0,0,0,0,0,0,
951 0,0,0,0,0,0,0,0,
952 1,0,0,0,0,0,0,0,
953 0,0,18,0,0,0,0,0,
954 0,0,20,20,0,18,0,0,
955 0,20,18,0,0,0,0,0,
956 18,18,18,18,18,18,18,18,
957 18,18,18,18,18,18,18,18,
958 18,18,18,18,18,18,18,0,
959 18,18,18,18,18,18,18,18,
960 18,18,18,18,18,18,18,18,
961 18,18,18,18,18,18,18,18,
962 18,18,18,18,18,18,18,0,
963 18,18,18,18,18,18,18,18
964 };
965
966
967
968
969 #ifndef HAVE_STRERROR
970 /*************************************************
971 * Provide strerror() for non-ANSI libraries *
972 *************************************************/
973
974 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
975 in their libraries, but can provide the same facility by this simple
976 alternative function. */
977
978 extern int sys_nerr;
979 extern char *sys_errlist[];
980
981 char *
982 strerror(int n)
983 {
984 if (n < 0 || n >= sys_nerr) return "unknown error number";
985 return sys_errlist[n];
986 }
987 #endif /* HAVE_STRERROR */
988
989
990 /*************************************************
991 * JIT memory callback *
992 *************************************************/
993
994 static pcre_jit_stack* jit_callback(void *arg)
995 {
996 return (pcre_jit_stack *)arg;
997 }
998
999
1000 /*************************************************
1001 * Convert UTF-8 string to value *
1002 *************************************************/
1003
1004 /* This function takes one or more bytes that represents a UTF-8 character,
1005 and returns the value of the character.
1006
1007 Argument:
1008 utf8bytes a pointer to the byte vector
1009 vptr a pointer to an int to receive the value
1010
1011 Returns: > 0 => the number of bytes consumed
1012 -6 to 0 => malformed UTF-8 character at offset = (-return)
1013 */
1014
1015 #if !defined NOUTF8
1016
1017 static int
1018 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1019 {
1020 int c = *utf8bytes++;
1021 int d = c;
1022 int i, j, s;
1023
1024 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1025 {
1026 if ((d & 0x80) == 0) break;
1027 d <<= 1;
1028 }
1029
1030 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1031 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1032
1033 /* i now has a value in the range 1-5 */
1034
1035 s = 6*i;
1036 d = (c & utf8_table3[i]) << s;
1037
1038 for (j = 0; j < i; j++)
1039 {
1040 c = *utf8bytes++;
1041 if ((c & 0xc0) != 0x80) return -(j+1);
1042 s -= 6;
1043 d |= (c & 0x3f) << s;
1044 }
1045
1046 /* Check that encoding was the correct unique one */
1047
1048 for (j = 0; j < utf8_table1_size; j++)
1049 if (d <= utf8_table1[j]) break;
1050 if (j != i) return -(i+1);
1051
1052 /* Valid value */
1053
1054 *vptr = d;
1055 return i+1;
1056 }
1057
1058 #endif
1059
1060
1061
1062 /*************************************************
1063 * Convert character value to UTF-8 *
1064 *************************************************/
1065
1066 /* This function takes an integer value in the range 0 - 0x7fffffff
1067 and encodes it as a UTF-8 character in 0 to 6 bytes.
1068
1069 Arguments:
1070 cvalue the character value
1071 utf8bytes pointer to buffer for result - at least 6 bytes long
1072
1073 Returns: number of characters placed in the buffer
1074 */
1075
1076 #if !defined NOUTF8
1077
1078 static int
1079 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1080 {
1081 register int i, j;
1082 for (i = 0; i < utf8_table1_size; i++)
1083 if (cvalue <= utf8_table1[i]) break;
1084 utf8bytes += i;
1085 for (j = i; j > 0; j--)
1086 {
1087 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1088 cvalue >>= 6;
1089 }
1090 *utf8bytes = utf8_table2[i] | cvalue;
1091 return i + 1;
1092 }
1093
1094 #endif
1095
1096
1097
1098 #ifdef SUPPORT_PCRE16
1099 /*************************************************
1100 * Convert a string to 16-bit *
1101 *************************************************/
1102
1103 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1104 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1105 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1106 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1107 result is always left in buffer16.
1108
1109 Arguments:
1110 p points to a byte string
1111 utf true if UTF-8 (to be converted to UTF-16)
1112 len number of bytes in the string (excluding trailing zero)
1113
1114 Returns: number of 16-bit data items used (excluding trailing zero)
1115 OR -1 if a UTF-8 string is malformed
1116 */
1117
1118 static int
1119 to16(pcre_uint8 *p, int utf, int len)
1120 {
1121 pcre_uint16 *pp;
1122
1123 if (buffer16_size < 2*len + 2)
1124 {
1125 if (buffer16 != NULL) free(buffer16);
1126 buffer16_size = 2*len + 2;
1127 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1128 if (buffer16 == NULL)
1129 {
1130 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1131 exit(1);
1132 }
1133 }
1134
1135 pp = buffer16;
1136
1137 if (!utf)
1138 {
1139 while (len-- > 0) *pp++ = *p++;
1140 }
1141
1142 else
1143 {
1144 int c;
1145 while (len > 0)
1146 {
1147 int chlen = utf82ord(p, &c);
1148 if (chlen <= 0) return -1;
1149 p += chlen;
1150 len -= chlen;
1151 if (c < 0x10000) *pp++ = c; else
1152 {
1153 c -= 0x10000;
1154 *pp++ = 0xD800 | (c >> 10);
1155 *pp++ = 0xDC00 | (c & 0x3ff);
1156 }
1157 }
1158 }
1159
1160 *pp = 0;
1161 return pp - buffer16;
1162 }
1163 #endif
1164
1165
1166 /*************************************************
1167 * Read or extend an input line *
1168 *************************************************/
1169
1170 /* Input lines are read into buffer, but both patterns and data lines can be
1171 continued over multiple input lines. In addition, if the buffer fills up, we
1172 want to automatically expand it so as to be able to handle extremely large
1173 lines that are needed for certain stress tests. When the input buffer is
1174 expanded, the other two buffers must also be expanded likewise, and the
1175 contents of pbuffer, which are a copy of the input for callouts, must be
1176 preserved (for when expansion happens for a data line). This is not the most
1177 optimal way of handling this, but hey, this is just a test program!
1178
1179 Arguments:
1180 f the file to read
1181 start where in buffer to start (this *must* be within buffer)
1182 prompt for stdin or readline()
1183
1184 Returns: pointer to the start of new data
1185 could be a copy of start, or could be moved
1186 NULL if no data read and EOF reached
1187 */
1188
1189 static pcre_uint8 *
1190 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1191 {
1192 pcre_uint8 *here = start;
1193
1194 for (;;)
1195 {
1196 int rlen = (int)(buffer_size - (here - buffer));
1197
1198 if (rlen > 1000)
1199 {
1200 int dlen;
1201
1202 /* If libreadline support is required, use readline() to read a line if the
1203 input is a terminal. Note that readline() removes the trailing newline, so
1204 we must put it back again, to be compatible with fgets(). */
1205
1206 #ifdef SUPPORT_LIBREADLINE
1207 if (isatty(fileno(f)))
1208 {
1209 size_t len;
1210 char *s = readline(prompt);
1211 if (s == NULL) return (here == start)? NULL : start;
1212 len = strlen(s);
1213 if (len > 0) add_history(s);
1214 if (len > rlen - 1) len = rlen - 1;
1215 memcpy(here, s, len);
1216 here[len] = '\n';
1217 here[len+1] = 0;
1218 free(s);
1219 }
1220 else
1221 #endif
1222
1223 /* Read the next line by normal means, prompting if the file is stdin. */
1224
1225 {
1226 if (f == stdin) printf("%s", prompt);
1227 if (fgets((char *)here, rlen, f) == NULL)
1228 return (here == start)? NULL : start;
1229 }
1230
1231 dlen = (int)strlen((char *)here);
1232 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1233 here += dlen;
1234 }
1235
1236 else
1237 {
1238 int new_buffer_size = 2*buffer_size;
1239 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1240 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1241 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1242
1243 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1244 {
1245 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1246 exit(1);
1247 }
1248
1249 memcpy(new_buffer, buffer, buffer_size);
1250 memcpy(new_pbuffer, pbuffer, buffer_size);
1251
1252 buffer_size = new_buffer_size;
1253
1254 start = new_buffer + (start - buffer);
1255 here = new_buffer + (here - buffer);
1256
1257 free(buffer);
1258 free(dbuffer);
1259 free(pbuffer);
1260
1261 buffer = new_buffer;
1262 dbuffer = new_dbuffer;
1263 pbuffer = new_pbuffer;
1264 }
1265 }
1266
1267 return NULL; /* Control never gets here */
1268 }
1269
1270
1271
1272 /*************************************************
1273 * Read number from string *
1274 *************************************************/
1275
1276 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1277 around with conditional compilation, just do the job by hand. It is only used
1278 for unpicking arguments, so just keep it simple.
1279
1280 Arguments:
1281 str string to be converted
1282 endptr where to put the end pointer
1283
1284 Returns: the unsigned long
1285 */
1286
1287 static int
1288 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1289 {
1290 int result = 0;
1291 while(*str != 0 && isspace(*str)) str++;
1292 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1293 *endptr = str;
1294 return(result);
1295 }
1296
1297
1298
1299 /*************************************************
1300 * Print one character *
1301 *************************************************/
1302
1303 /* Print a single character either literally, or as a hex escape. */
1304
1305 static int pchar(int c, FILE *f)
1306 {
1307 if (PRINTOK(c))
1308 {
1309 if (f != NULL) fprintf(f, "%c", c);
1310 return 1;
1311 }
1312
1313 if (c < 0x100)
1314 {
1315 if (use_utf)
1316 {
1317 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1318 return 6;
1319 }
1320 else
1321 {
1322 if (f != NULL) fprintf(f, "\\x%02x", c);
1323 return 4;
1324 }
1325 }
1326
1327 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1328 return (c <= 0x000000ff)? 6 :
1329 (c <= 0x00000fff)? 7 :
1330 (c <= 0x0000ffff)? 8 :
1331 (c <= 0x000fffff)? 9 : 10;
1332 }
1333
1334
1335
1336 #ifdef SUPPORT_PCRE8
1337 /*************************************************
1338 * Print 8-bit character string *
1339 *************************************************/
1340
1341 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1342 If handed a NULL file, just counts chars without printing. */
1343
1344 static int pchars(pcre_uint8 *p, int length, FILE *f)
1345 {
1346 int c = 0;
1347 int yield = 0;
1348
1349 if (length < 0)
1350 length = strlen((char *)p);
1351
1352 while (length-- > 0)
1353 {
1354 #if !defined NOUTF8
1355 if (use_utf)
1356 {
1357 int rc = utf82ord(p, &c);
1358 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1359 {
1360 length -= rc - 1;
1361 p += rc;
1362 yield += pchar(c, f);
1363 continue;
1364 }
1365 }
1366 #endif
1367 c = *p++;
1368 yield += pchar(c, f);
1369 }
1370
1371 return yield;
1372 }
1373 #endif
1374
1375
1376
1377 #ifdef SUPPORT_PCRE16
1378 /*************************************************
1379 * Find length of 0-terminated 16-bit string *
1380 *************************************************/
1381
1382 static int strlen16(PCRE_SPTR16 p)
1383 {
1384 int len = 0;
1385 while (*p++ != 0) len++;
1386 return len;
1387 }
1388
1389
1390
1391 /*************************************************
1392 * Print 16-bit character string *
1393 *************************************************/
1394
1395 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1396 If handed a NULL file, just counts chars without printing. */
1397
1398 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1399 {
1400 int yield = 0;
1401
1402 if (length < 0)
1403 length = strlen16(p);
1404
1405 while (length-- > 0)
1406 {
1407 int c = *p++ & 0xffff;
1408 #if !defined NOUTF8
1409 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1410 {
1411 int d = *p & 0xffff;
1412 if (d >= 0xDC00 && d < 0xDFFF)
1413 {
1414 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1415 length--;
1416 p++;
1417 }
1418 }
1419 #endif
1420 yield += pchar(c, f);
1421 }
1422
1423 return yield;
1424 }
1425 #endif
1426
1427
1428
1429 /*************************************************
1430 * Callout function *
1431 *************************************************/
1432
1433 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1434 the match. Yield zero unless more callouts than the fail count, or the callout
1435 data is not zero. */
1436
1437 static int callout(pcre_callout_block *cb)
1438 {
1439 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1440 int i, pre_start, post_start, subject_length;
1441
1442 if (callout_extra)
1443 {
1444 fprintf(f, "Callout %d: last capture = %d\n",
1445 cb->callout_number, cb->capture_last);
1446
1447 for (i = 0; i < cb->capture_top * 2; i += 2)
1448 {
1449 if (cb->offset_vector[i] < 0)
1450 fprintf(f, "%2d: <unset>\n", i/2);
1451 else
1452 {
1453 fprintf(f, "%2d: ", i/2);
1454 PCHARSV(cb->subject + cb->offset_vector[i],
1455 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1456 fprintf(f, "\n");
1457 }
1458 }
1459 }
1460
1461 /* Re-print the subject in canonical form, the first time or if giving full
1462 datails. On subsequent calls in the same match, we use pchars just to find the
1463 printed lengths of the substrings. */
1464
1465 if (f != NULL) fprintf(f, "--->");
1466
1467 PCHARS(pre_start, cb->subject, cb->start_match, f);
1468 PCHARS(post_start, cb->subject + cb->start_match,
1469 cb->current_position - cb->start_match, f);
1470
1471 PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1472
1473 PCHARSV(cb->subject + cb->current_position,
1474 cb->subject_length - cb->current_position, f);
1475
1476 if (f != NULL) fprintf(f, "\n");
1477
1478 /* Always print appropriate indicators, with callout number if not already
1479 shown. For automatic callouts, show the pattern offset. */
1480
1481 if (cb->callout_number == 255)
1482 {
1483 fprintf(outfile, "%+3d ", cb->pattern_position);
1484 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1485 }
1486 else
1487 {
1488 if (callout_extra) fprintf(outfile, " ");
1489 else fprintf(outfile, "%3d ", cb->callout_number);
1490 }
1491
1492 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1493 fprintf(outfile, "^");
1494
1495 if (post_start > 0)
1496 {
1497 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1498 fprintf(outfile, "^");
1499 }
1500
1501 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1502 fprintf(outfile, " ");
1503
1504 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1505 pbuffer + cb->pattern_position);
1506
1507 fprintf(outfile, "\n");
1508 first_callout = 0;
1509
1510 if (cb->mark != last_callout_mark)
1511 {
1512 fprintf(outfile, "Latest Mark: %s\n",
1513 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1514 last_callout_mark = cb->mark;
1515 }
1516
1517 if (cb->callout_data != NULL)
1518 {
1519 int callout_data = *((int *)(cb->callout_data));
1520 if (callout_data != 0)
1521 {
1522 fprintf(outfile, "Callout data = %d\n", callout_data);
1523 return callout_data;
1524 }
1525 }
1526
1527 return (cb->callout_number != callout_fail_id)? 0 :
1528 (++callout_count >= callout_fail_count)? 1 : 0;
1529 }
1530
1531
1532 /*************************************************
1533 * Local malloc functions *
1534 *************************************************/
1535
1536 /* Alternative malloc function, to test functionality and save the size of a
1537 compiled re, which is the first store request that pcre_compile() makes. The
1538 show_malloc variable is set only during matching. */
1539
1540 static void *new_malloc(size_t size)
1541 {
1542 void *block = malloc(size);
1543 gotten_store = size;
1544 if (first_gotten_store == 0) first_gotten_store = size;
1545 if (show_malloc)
1546 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1547 return block;
1548 }
1549
1550 static void new_free(void *block)
1551 {
1552 if (show_malloc)
1553 fprintf(outfile, "free %p\n", block);
1554 free(block);
1555 }
1556
1557 /* For recursion malloc/free, to test stacking calls */
1558
1559 static void *stack_malloc(size_t size)
1560 {
1561 void *block = malloc(size);
1562 if (show_malloc)
1563 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1564 return block;
1565 }
1566
1567 static void stack_free(void *block)
1568 {
1569 if (show_malloc)
1570 fprintf(outfile, "stack_free %p\n", block);
1571 free(block);
1572 }
1573
1574
1575 /*************************************************
1576 * Call pcre_fullinfo() *
1577 *************************************************/
1578
1579 /* Get one piece of information from the pcre_fullinfo() function. When only
1580 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1581 value, but the code is defensive. */
1582
1583 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1584 {
1585 int rc;
1586
1587 if (use_pcre16)
1588 #ifdef SUPPORT_PCRE16
1589 rc = pcre16_fullinfo(re, study, option, ptr);
1590 #else
1591 rc = PCRE_ERROR_BADMODE;
1592 #endif
1593 else
1594 #ifdef SUPPORT_PCRE8
1595 rc = pcre_fullinfo(re, study, option, ptr);
1596 #else
1597 rc = PCRE_ERROR_BADMODE;
1598 #endif
1599
1600 if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1601 use_pcre16? "16" : "", option);
1602 }
1603
1604
1605
1606 /*************************************************
1607 * Swap byte functions *
1608 *************************************************/
1609
1610 /* The following functions swap the bytes of a pcre_uint16
1611 and pcre_uint32 value.
1612
1613 Arguments:
1614 value any number
1615
1616 Returns: the byte swapped value
1617 */
1618
1619 static pcre_uint32
1620 swap_uint32(pcre_uint32 value)
1621 {
1622 return ((value & 0x000000ff) << 24) |
1623 ((value & 0x0000ff00) << 8) |
1624 ((value & 0x00ff0000) >> 8) |
1625 (value >> 24);
1626 }
1627
1628 static pcre_uint16
1629 swap_uint16(pcre_uint16 value)
1630 {
1631 return (value >> 8) | (value << 8);
1632 }
1633
1634
1635
1636 /*************************************************
1637 * Flip bytes in a compiled pattern *
1638 *************************************************/
1639
1640 /* This function is called if the 'F' option was present on a pattern that is
1641 to be written to a file. We flip the bytes of all the integer fields in the
1642 regex data block and the study block. In 16-bit mode this also flips relevant
1643 bytes in the pattern itself. This is to make it possible to test PCRE's
1644 ability to reload byte-flipped patterns, e.g. those compiled on a different
1645 architecture. */
1646
1647 static void
1648 regexflip(pcre *ere, pcre_extra *extra)
1649 {
1650 real_pcre *re = (real_pcre *)ere;
1651 int op;
1652
1653 #ifdef SUPPORT_PCRE16
1654 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1655 int length = re->name_count * re->name_entry_size;
1656 #ifdef SUPPORT_UTF
1657 BOOL utf = (re->options & PCRE_UTF16) != 0;
1658 BOOL utf16_char = FALSE;
1659 #endif /* SUPPORT_UTF */
1660 #endif /* SUPPORT_PCRE16 */
1661
1662 /* Always flip the bytes in the main data block and study blocks. */
1663
1664 re->magic_number = REVERSED_MAGIC_NUMBER;
1665 re->size = swap_uint32(re->size);
1666 re->options = swap_uint32(re->options);
1667 re->flags = swap_uint16(re->flags);
1668 re->top_bracket = swap_uint16(re->top_bracket);
1669 re->top_backref = swap_uint16(re->top_backref);
1670 re->first_char = swap_uint16(re->first_char);
1671 re->req_char = swap_uint16(re->req_char);
1672 re->name_table_offset = swap_uint16(re->name_table_offset);
1673 re->name_entry_size = swap_uint16(re->name_entry_size);
1674 re->name_count = swap_uint16(re->name_count);
1675
1676 if (extra != NULL)
1677 {
1678 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1679 rsd->size = swap_uint32(rsd->size);
1680 rsd->flags = swap_uint32(rsd->flags);
1681 rsd->minlength = swap_uint32(rsd->minlength);
1682 }
1683
1684 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1685 in the name table, if present, and then in the pattern itself. */
1686
1687 #ifdef SUPPORT_PCRE16
1688 if (!use_pcre16) return;
1689
1690 while(TRUE)
1691 {
1692 /* Swap previous characters. */
1693 while (length-- > 0)
1694 {
1695 *ptr = swap_uint16(*ptr);
1696 ptr++;
1697 }
1698 #ifdef SUPPORT_UTF
1699 if (utf16_char)
1700 {
1701 if ((ptr[-1] & 0xfc00) == 0xd800)
1702 {
1703 /* We know that there is only one extra character in UTF-16. */
1704 *ptr = swap_uint16(*ptr);
1705 ptr++;
1706 }
1707 }
1708 utf16_char = FALSE;
1709 #endif /* SUPPORT_UTF */
1710
1711 /* Get next opcode. */
1712
1713 length = 0;
1714 op = *ptr;
1715 *ptr++ = swap_uint16(op);
1716
1717 switch (op)
1718 {
1719 case OP_END:
1720 return;
1721
1722 #ifdef SUPPORT_UTF
1723 case OP_CHAR:
1724 case OP_CHARI:
1725 case OP_NOT:
1726 case OP_NOTI:
1727 case OP_STAR:
1728 case OP_MINSTAR:
1729 case OP_PLUS:
1730 case OP_MINPLUS:
1731 case OP_QUERY:
1732 case OP_MINQUERY:
1733 case OP_UPTO:
1734 case OP_MINUPTO:
1735 case OP_EXACT:
1736 case OP_POSSTAR:
1737 case OP_POSPLUS:
1738 case OP_POSQUERY:
1739 case OP_POSUPTO:
1740 case OP_STARI:
1741 case OP_MINSTARI:
1742 case OP_PLUSI:
1743 case OP_MINPLUSI:
1744 case OP_QUERYI:
1745 case OP_MINQUERYI:
1746 case OP_UPTOI:
1747 case OP_MINUPTOI:
1748 case OP_EXACTI:
1749 case OP_POSSTARI:
1750 case OP_POSPLUSI:
1751 case OP_POSQUERYI:
1752 case OP_POSUPTOI:
1753 case OP_NOTSTAR:
1754 case OP_NOTMINSTAR:
1755 case OP_NOTPLUS:
1756 case OP_NOTMINPLUS:
1757 case OP_NOTQUERY:
1758 case OP_NOTMINQUERY:
1759 case OP_NOTUPTO:
1760 case OP_NOTMINUPTO:
1761 case OP_NOTEXACT:
1762 case OP_NOTPOSSTAR:
1763 case OP_NOTPOSPLUS:
1764 case OP_NOTPOSQUERY:
1765 case OP_NOTPOSUPTO:
1766 case OP_NOTSTARI:
1767 case OP_NOTMINSTARI:
1768 case OP_NOTPLUSI:
1769 case OP_NOTMINPLUSI:
1770 case OP_NOTQUERYI:
1771 case OP_NOTMINQUERYI:
1772 case OP_NOTUPTOI:
1773 case OP_NOTMINUPTOI:
1774 case OP_NOTEXACTI:
1775 case OP_NOTPOSSTARI:
1776 case OP_NOTPOSPLUSI:
1777 case OP_NOTPOSQUERYI:
1778 case OP_NOTPOSUPTOI:
1779 if (utf) utf16_char = TRUE;
1780 #endif
1781 /* Fall through. */
1782
1783 default:
1784 length = OP_lengths16[op] - 1;
1785 break;
1786
1787 case OP_CLASS:
1788 case OP_NCLASS:
1789 /* Skip the character bit map. */
1790 ptr += 32/sizeof(pcre_uint16);
1791 length = 0;
1792 break;
1793
1794 case OP_XCLASS:
1795 /* Reverse the size of the XCLASS instance. */
1796 ptr++;
1797 *ptr = swap_uint16(*ptr);
1798 if (LINK_SIZE > 1)
1799 {
1800 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1801 ptr++;
1802 *ptr = swap_uint16(*ptr);
1803 }
1804 ptr++;
1805
1806 if (LINK_SIZE > 1)
1807 length = ((ptr[-LINK_SIZE] << 16) | ptr[-LINK_SIZE + 1]) -
1808 (1 + LINK_SIZE + 1);
1809 else
1810 length = ptr[-LINK_SIZE] - (1 + LINK_SIZE + 1);
1811
1812 op = *ptr;
1813 *ptr = swap_uint16(op);
1814 if ((op & XCL_MAP) != 0)
1815 {
1816 /* Skip the character bit map. */
1817 ptr += 32/sizeof(pcre_uint16);
1818 length -= 32/sizeof(pcre_uint16);
1819 }
1820 break;
1821 }
1822 }
1823 /* Control should never reach here in 16 bit mode. */
1824 #endif /* SUPPORT_PCRE16 */
1825 }
1826
1827
1828
1829 /*************************************************
1830 * Check match or recursion limit *
1831 *************************************************/
1832
1833 static int
1834 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1835 int start_offset, int options, int *use_offsets, int use_size_offsets,
1836 int flag, unsigned long int *limit, int errnumber, const char *msg)
1837 {
1838 int count;
1839 int min = 0;
1840 int mid = 64;
1841 int max = -1;
1842
1843 extra->flags |= flag;
1844
1845 for (;;)
1846 {
1847 *limit = mid;
1848
1849 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1850 use_offsets, use_size_offsets);
1851
1852 if (count == errnumber)
1853 {
1854 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1855 min = mid;
1856 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1857 }
1858
1859 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1860 count == PCRE_ERROR_PARTIAL)
1861 {
1862 if (mid == min + 1)
1863 {
1864 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1865 break;
1866 }
1867 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1868 max = mid;
1869 mid = (min + mid)/2;
1870 }
1871 else break; /* Some other error */
1872 }
1873
1874 extra->flags &= ~flag;
1875 return count;
1876 }
1877
1878
1879
1880 /*************************************************
1881 * Case-independent strncmp() function *
1882 *************************************************/
1883
1884 /*
1885 Arguments:
1886 s first string
1887 t second string
1888 n number of characters to compare
1889
1890 Returns: < 0, = 0, or > 0, according to the comparison
1891 */
1892
1893 static int
1894 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1895 {
1896 while (n--)
1897 {
1898 int c = tolower(*s++) - tolower(*t++);
1899 if (c) return c;
1900 }
1901 return 0;
1902 }
1903
1904
1905
1906 /*************************************************
1907 * Check newline indicator *
1908 *************************************************/
1909
1910 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1911 a message and return 0 if there is no match.
1912
1913 Arguments:
1914 p points after the leading '<'
1915 f file for error message
1916
1917 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1918 */
1919
1920 static int
1921 check_newline(pcre_uint8 *p, FILE *f)
1922 {
1923 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1924 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1925 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1926 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1927 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1928 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1929 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1930 fprintf(f, "Unknown newline type at: <%s\n", p);
1931 return 0;
1932 }
1933
1934
1935
1936 /*************************************************
1937 * Usage function *
1938 *************************************************/
1939
1940 static void
1941 usage(void)
1942 {
1943 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1944 printf("Input and output default to stdin and stdout.\n");
1945 #ifdef SUPPORT_LIBREADLINE
1946 printf("If input is a terminal, readline() is used to read from it.\n");
1947 #else
1948 printf("This version of pcretest is not linked with readline().\n");
1949 #endif
1950 printf("\nOptions:\n");
1951 #ifdef SUPPORT_PCRE16
1952 printf(" -16 use 16-bit interface\n");
1953 #endif
1954 printf(" -b show compiled code (bytecode)\n");
1955 printf(" -C show PCRE compile-time options and exit\n");
1956 printf(" -d debug: show compiled code and information (-b and -i)\n");
1957 #if !defined NODFA
1958 printf(" -dfa force DFA matching for all subjects\n");
1959 #endif
1960 printf(" -help show usage information\n");
1961 printf(" -i show information about compiled patterns\n"
1962 " -M find MATCH_LIMIT minimum for each subject\n"
1963 " -m output memory used information\n"
1964 " -o <n> set size of offsets vector to <n>\n");
1965 #if !defined NOPOSIX
1966 printf(" -p use POSIX interface\n");
1967 #endif
1968 printf(" -q quiet: do not output PCRE version number at start\n");
1969 printf(" -S <n> set stack size to <n> megabytes\n");
1970 printf(" -s force each pattern to be studied at basic level\n"
1971 " -s+ force each pattern to be studied, using JIT if available\n"
1972 " -t time compilation and execution\n");
1973 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1974 printf(" -tm time execution (matching) only\n");
1975 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1976 }
1977
1978
1979
1980 /*************************************************
1981 * Main Program *
1982 *************************************************/
1983
1984 /* Read lines from named file or stdin and write to named file or stdout; lines
1985 consist of a regular expression, in delimiters and optionally followed by
1986 options, followed by a set of test data, terminated by an empty line. */
1987
1988 int main(int argc, char **argv)
1989 {
1990 FILE *infile = stdin;
1991 const char *version;
1992 int options = 0;
1993 int study_options = 0;
1994 int default_find_match_limit = FALSE;
1995 int op = 1;
1996 int timeit = 0;
1997 int timeitm = 0;
1998 int showinfo = 0;
1999 int showstore = 0;
2000 int force_study = -1;
2001 int force_study_options = 0;
2002 int quiet = 0;
2003 int size_offsets = 45;
2004 int size_offsets_max;
2005 int *offsets = NULL;
2006 #if !defined NOPOSIX
2007 int posix = 0;
2008 #endif
2009 int debug = 0;
2010 int done = 0;
2011 int all_use_dfa = 0;
2012 int yield = 0;
2013 int stack_size;
2014
2015 pcre_jit_stack *jit_stack = NULL;
2016
2017 /* These vectors store, end-to-end, a list of captured substring names. Assume
2018 that 1024 is plenty long enough for the few names we'll be testing. */
2019
2020 pcre_uchar copynames[1024];
2021 pcre_uchar getnames[1024];
2022
2023 pcre_uchar *copynamesptr;
2024 pcre_uchar *getnamesptr;
2025
2026 /* Get buffers from malloc() so that valgrind will check their misuse when
2027 debugging. They grow automatically when very long lines are read. The 16-bit
2028 buffer (buffer16) is obtained only if needed. */
2029
2030 buffer = (pcre_uint8 *)malloc(buffer_size);
2031 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2032 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2033
2034 /* The outfile variable is static so that new_malloc can use it. */
2035
2036 outfile = stdout;
2037
2038 /* The following _setmode() stuff is some Windows magic that tells its runtime
2039 library to translate CRLF into a single LF character. At least, that's what
2040 I've been told: never having used Windows I take this all on trust. Originally
2041 it set 0x8000, but then I was advised that _O_BINARY was better. */
2042
2043 #if defined(_WIN32) || defined(WIN32)
2044 _setmode( _fileno( stdout ), _O_BINARY );
2045 #endif
2046
2047 /* Get the version number: both pcre_version() and pcre16_version() give the
2048 same answer. We just need to ensure that we call one that is availab.e */
2049
2050 #ifdef SUPPORT_PCRE8
2051 version = pcre_version();
2052 #else
2053 version = pcre16_version();
2054 #endif
2055
2056 /* Scan options */
2057
2058 while (argc > 1 && argv[op][0] == '-')
2059 {
2060 pcre_uint8 *endptr;
2061
2062 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2063 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2064 else if (strcmp(argv[op], "-s+") == 0)
2065 {
2066 force_study = 1;
2067 force_study_options = PCRE_STUDY_JIT_COMPILE;
2068 }
2069 else if (strcmp(argv[op], "-16") == 0)
2070 {
2071 #ifdef SUPPORT_PCRE16
2072 use_pcre16 = 1;
2073 #else
2074 printf("** This version of PCRE was built without 16-bit support\n");
2075 exit(1);
2076 #endif
2077 }
2078 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2079 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2080 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2081 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2082 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2083 #if !defined NODFA
2084 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2085 #endif
2086 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2087 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2088 *endptr == 0))
2089 {
2090 op++;
2091 argc--;
2092 }
2093 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2094 {
2095 int both = argv[op][2] == 0;
2096 int temp;
2097 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2098 *endptr == 0))
2099 {
2100 timeitm = temp;
2101 op++;
2102 argc--;
2103 }
2104 else timeitm = LOOPREPEAT;
2105 if (both) timeit = timeitm;
2106 }
2107 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2108 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2109 *endptr == 0))
2110 {
2111 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2112 printf("PCRE: -S not supported on this OS\n");
2113 exit(1);
2114 #else
2115 int rc;
2116 struct rlimit rlim;
2117 getrlimit(RLIMIT_STACK, &rlim);
2118 rlim.rlim_cur = stack_size * 1024 * 1024;
2119 rc = setrlimit(RLIMIT_STACK, &rlim);
2120 if (rc != 0)
2121 {
2122 printf("PCRE: setrlimit() failed with error %d\n", rc);
2123 exit(1);
2124 }
2125 op++;
2126 argc--;
2127 #endif
2128 }
2129 #if !defined NOPOSIX
2130 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2131 #endif
2132 else if (strcmp(argv[op], "-C") == 0)
2133 {
2134 int rc;
2135 unsigned long int lrc;
2136 printf("PCRE version %s\n", version);
2137 printf("Compiled with\n");
2138
2139 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2140 are set, either both UTFs are supported or both are not supported. */
2141
2142 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2143 printf(" 8-bit and 16-bit support\n");
2144 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2145 if (rc)
2146 printf(" UTF-8 and UTF-16 support\n");
2147 else
2148 printf(" No UTF-8 or UTF-16 support\n");
2149 #elif defined SUPPORT_PCRE8
2150 printf(" 8-bit support only\n");
2151 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2152 printf(" %sUTF-8 support\n", rc? "" : "No ");
2153 #else
2154 printf(" 16-bit support only\n");
2155 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2156 printf(" %sUTF-16 support\n", rc? "" : "No ");
2157 #endif
2158
2159 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2160 printf(" %sUnicode properties support\n", rc? "" : "No ");
2161 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2162 if (rc)
2163 printf(" Just-in-time compiler support\n");
2164 else
2165 printf(" No just-in-time compiler support\n");
2166 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2167 /* Note that these values are always the ASCII values, even
2168 in EBCDIC environments. CR is 13 and NL is 10. */
2169 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2170 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2171 (rc == -2)? "ANYCRLF" :
2172 (rc == -1)? "ANY" : "???");
2173 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2174 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2175 "all Unicode newlines");
2176 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2177 printf(" Internal link size = %d\n", rc);
2178 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2179 printf(" POSIX malloc threshold = %d\n", rc);
2180 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2181 printf(" Default match limit = %ld\n", lrc);
2182 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2183 printf(" Default recursion depth limit = %ld\n", lrc);
2184 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2185 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
2186 goto EXIT;
2187 }
2188 else if (strcmp(argv[op], "-help") == 0 ||
2189 strcmp(argv[op], "--help") == 0)
2190 {
2191 usage();
2192 goto EXIT;
2193 }
2194 else
2195 {
2196 printf("** Unknown or malformed option %s\n", argv[op]);
2197 usage();
2198 yield = 1;
2199 goto EXIT;
2200 }
2201 op++;
2202 argc--;
2203 }
2204
2205 /* Get the store for the offsets vector, and remember what it was */
2206
2207 size_offsets_max = size_offsets;
2208 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2209 if (offsets == NULL)
2210 {
2211 printf("** Failed to get %d bytes of memory for offsets vector\n",
2212 (int)(size_offsets_max * sizeof(int)));
2213 yield = 1;
2214 goto EXIT;
2215 }
2216
2217 /* Sort out the input and output files */
2218
2219 if (argc > 1)
2220 {
2221 infile = fopen(argv[op], INPUT_MODE);
2222 if (infile == NULL)
2223 {
2224 printf("** Failed to open %s\n", argv[op]);
2225 yield = 1;
2226 goto EXIT;
2227 }
2228 }
2229
2230 if (argc > 2)
2231 {
2232 outfile = fopen(argv[op+1], OUTPUT_MODE);
2233 if (outfile == NULL)
2234 {
2235 printf("** Failed to open %s\n", argv[op+1]);
2236 yield = 1;
2237 goto EXIT;
2238 }
2239 }
2240
2241 /* Set alternative malloc function */
2242
2243 #ifdef SUPPORT_PCRE8
2244 pcre_malloc = new_malloc;
2245 pcre_free = new_free;
2246 pcre_stack_malloc = stack_malloc;
2247 pcre_stack_free = stack_free;
2248 #endif
2249
2250 #ifdef SUPPORT_PCRE16
2251 pcre16_malloc = new_malloc;
2252 pcre16_free = new_free;
2253 pcre16_stack_malloc = stack_malloc;
2254 pcre16_stack_free = stack_free;
2255 #endif
2256
2257 /* Heading line unless quiet, then prompt for first regex if stdin */
2258
2259 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2260
2261 /* Main loop */
2262
2263 while (!done)
2264 {
2265 pcre *re = NULL;
2266 pcre_extra *extra = NULL;
2267
2268 #if !defined NOPOSIX /* There are still compilers that require no indent */
2269 regex_t preg;
2270 int do_posix = 0;
2271 #endif
2272
2273 const char *error;
2274 pcre_uint8 *markptr;
2275 pcre_uint8 *p, *pp, *ppp;
2276 pcre_uint8 *to_file = NULL;
2277 const pcre_uint8 *tables = NULL;
2278 unsigned long int true_size, true_study_size = 0;
2279 size_t size, regex_gotten_store;
2280 int do_allcaps = 0;
2281 int do_mark = 0;
2282 int do_study = 0;
2283 int no_force_study = 0;
2284 int do_debug = debug;
2285 int do_G = 0;
2286 int do_g = 0;
2287 int do_showinfo = showinfo;
2288 int do_showrest = 0;
2289 int do_showcaprest = 0;
2290 int do_flip = 0;
2291 int erroroffset, len, delimiter, poffset;
2292
2293 use_utf = 0;
2294 debug_lengths = 1;
2295
2296 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2297 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2298 fflush(outfile);
2299
2300 p = buffer;
2301 while (isspace(*p)) p++;
2302 if (*p == 0) continue;
2303
2304 /* See if the pattern is to be loaded pre-compiled from a file. */
2305
2306 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2307 {
2308 unsigned long int magic, get_options;
2309 pcre_uint8 sbuf[8];
2310 FILE *f;
2311
2312 p++;
2313 pp = p + (int)strlen((char *)p);
2314 while (isspace(pp[-1])) pp--;
2315 *pp = 0;
2316
2317 f = fopen((char *)p, "rb");
2318 if (f == NULL)
2319 {
2320 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2321 continue;
2322 }
2323
2324 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2325
2326 true_size =
2327 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2328 true_study_size =
2329 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2330
2331 re = (real_pcre *)new_malloc(true_size);
2332 regex_gotten_store = first_gotten_store;
2333
2334 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2335
2336 magic = ((real_pcre *)re)->magic_number;
2337 if (magic != MAGIC_NUMBER)
2338 {
2339 if (swap_uint32(magic) == MAGIC_NUMBER)
2340 {
2341 do_flip = 1;
2342 }
2343 else
2344 {
2345 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2346 fclose(f);
2347 continue;
2348 }
2349 }
2350
2351 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2352 do_flip? " (byte-inverted)" : "", p);
2353
2354 /* Now see if there is any following study data. */
2355
2356 if (true_study_size != 0)
2357 {
2358 pcre_study_data *psd;
2359
2360 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2361 extra->flags = PCRE_EXTRA_STUDY_DATA;
2362
2363 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2364 extra->study_data = psd;
2365
2366 if (fread(psd, 1, true_study_size, f) != true_study_size)
2367 {
2368 FAIL_READ:
2369 fprintf(outfile, "Failed to read data from %s\n", p);
2370 if (extra != NULL)
2371 {
2372 PCRE_FREE_STUDY(extra);
2373 }
2374 if (re != NULL) new_free(re);
2375 fclose(f);
2376 continue;
2377 }
2378 fprintf(outfile, "Study data loaded from %s\n", p);
2379 do_study = 1; /* To get the data output if requested */
2380 }
2381 else fprintf(outfile, "No study data\n");
2382
2383 /* Flip the necessary bytes. */
2384 if (do_flip)
2385 {
2386 PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
2387 }
2388
2389 /* Need to know if UTF-8 for printing data strings */
2390
2391 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2392 use_utf = (get_options & PCRE_UTF8) != 0;
2393
2394 fclose(f);
2395 goto SHOW_INFO;
2396 }
2397
2398 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2399 the pattern; if it isn't complete, read more. */
2400
2401 delimiter = *p++;
2402
2403 if (isalnum(delimiter) || delimiter == '\\')
2404 {
2405 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2406 goto SKIP_DATA;
2407 }
2408
2409 pp = p;
2410 poffset = (int)(p - buffer);
2411
2412 for(;;)
2413 {
2414 while (*pp != 0)
2415 {
2416 if (*pp == '\\' && pp[1] != 0) pp++;
2417 else if (*pp == delimiter) break;
2418 pp++;
2419 }
2420 if (*pp != 0) break;
2421 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2422 {
2423 fprintf(outfile, "** Unexpected EOF\n");
2424 done = 1;
2425 goto CONTINUE;
2426 }
2427 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2428 }
2429
2430 /* The buffer may have moved while being extended; reset the start of data
2431 pointer to the correct relative point in the buffer. */
2432
2433 p = buffer + poffset;
2434
2435 /* If the first character after the delimiter is backslash, make
2436 the pattern end with backslash. This is purely to provide a way
2437 of testing for the error message when a pattern ends with backslash. */
2438
2439 if (pp[1] == '\\') *pp++ = '\\';
2440
2441 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2442 for callouts. */
2443
2444 *pp++ = 0;
2445 strcpy((char *)pbuffer, (char *)p);
2446
2447 /* Look for options after final delimiter */
2448
2449 options = 0;
2450 study_options = 0;
2451 log_store = showstore; /* default from command line */
2452
2453 while (*pp != 0)
2454 {
2455 switch (*pp++)
2456 {
2457 case 'f': options |= PCRE_FIRSTLINE; break;
2458 case 'g': do_g = 1; break;
2459 case 'i': options |= PCRE_CASELESS; break;
2460 case 'm': options |= PCRE_MULTILINE; break;
2461 case 's': options |= PCRE_DOTALL; break;
2462 case 'x': options |= PCRE_EXTENDED; break;
2463
2464 case '+':
2465 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2466 break;
2467
2468 case '=': do_allcaps = 1; break;
2469 case 'A': options |= PCRE_ANCHORED; break;
2470 case 'B': do_debug = 1; break;
2471 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2472 case 'D': do_debug = do_showinfo = 1; break;
2473 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2474 case 'F': do_flip = 1; break;
2475 case 'G': do_G = 1; break;
2476 case 'I': do_showinfo = 1; break;
2477 case 'J': options |= PCRE_DUPNAMES; break;
2478 case 'K': do_mark = 1; break;
2479 case 'M': log_store = 1; break;
2480 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2481
2482 #if !defined NOPOSIX
2483 case 'P': do_posix = 1; break;
2484 #endif
2485
2486 case 'S':
2487 if (do_study == 0)
2488 {
2489 do_study = 1;
2490 if (*pp == '+')
2491 {
2492 study_options |= PCRE_STUDY_JIT_COMPILE;
2493 pp++;
2494 }
2495 }
2496 else
2497 {
2498 do_study = 0;
2499 no_force_study = 1;
2500 }
2501 break;
2502
2503 case 'U': options |= PCRE_UNGREEDY; break;
2504 case 'W': options |= PCRE_UCP; break;
2505 case 'X': options |= PCRE_EXTRA; break;
2506 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2507 case 'Z': debug_lengths = 0; break;
2508 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2509 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2510
2511 case 'T':
2512 switch (*pp++)
2513 {
2514 case '0': tables = tables0; break;
2515 case '1': tables = tables1; break;
2516
2517 case '\r':
2518 case '\n':
2519 case ' ':
2520 case 0:
2521 fprintf(outfile, "** Missing table number after /T\n");
2522 goto SKIP_DATA;
2523
2524 default:
2525 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2526 goto SKIP_DATA;
2527 }
2528 break;
2529
2530 case 'L':
2531 ppp = pp;
2532 /* The '\r' test here is so that it works on Windows. */
2533 /* The '0' test is just in case this is an unterminated line. */
2534 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2535 *ppp = 0;
2536 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2537 {
2538 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2539 goto SKIP_DATA;
2540 }
2541 locale_set = 1;
2542 tables = PCRE_MAKETABLES;
2543 pp = ppp;
2544 break;
2545
2546 case '>':
2547 to_file = pp;
2548 while (*pp != 0) pp++;
2549 while (isspace(pp[-1])) pp--;
2550 *pp = 0;
2551 break;
2552
2553 case '<':
2554 {
2555 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2556 {
2557 options |= PCRE_JAVASCRIPT_COMPAT;
2558 pp += 3;
2559 }
2560 else
2561 {
2562 int x = check_newline(pp, outfile);
2563 if (x == 0) goto SKIP_DATA;
2564 options |= x;
2565 while (*pp++ != '>');
2566 }
2567 }
2568 break;
2569
2570 case '\r': /* So that it works in Windows */
2571 case '\n':
2572 case ' ':
2573 break;
2574
2575 default:
2576 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2577 goto SKIP_DATA;
2578 }
2579 }
2580
2581 /* Handle compiling via the POSIX interface, which doesn't support the
2582 timing, showing, or debugging options, nor the ability to pass over
2583 local character tables. Neither does it have 16-bit support. */
2584
2585 #if !defined NOPOSIX
2586 if (posix || do_posix)
2587 {
2588 int rc;
2589 int cflags = 0;
2590
2591 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2592 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2593 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2594 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2595 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2596 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2597 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2598
2599 first_gotten_store = 0;
2600 rc = regcomp(&preg, (char *)p, cflags);
2601
2602 /* Compilation failed; go back for another re, skipping to blank line
2603 if non-interactive. */
2604
2605 if (rc != 0)
2606 {
2607 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2608 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2609 goto SKIP_DATA;
2610 }
2611 }
2612
2613 /* Handle compiling via the native interface */
2614
2615 else
2616 #endif /* !defined NOPOSIX */
2617
2618 {
2619 unsigned long int get_options;
2620
2621 /* In 16-bit mode, convert the input. */
2622
2623 #ifdef SUPPORT_PCRE16
2624 if (use_pcre16)
2625 {
2626 if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2627 {
2628 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2629 "converted to UTF-16\n");
2630 goto SKIP_DATA;
2631 }
2632 p = (pcre_uint8 *)buffer16;
2633 }
2634 #endif
2635
2636 /* Compile many times when timing */
2637
2638 if (timeit > 0)
2639 {
2640 register int i;
2641 clock_t time_taken;
2642 clock_t start_time = clock();
2643 for (i = 0; i < timeit; i++)
2644 {
2645 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2646 if (re != NULL) free(re);
2647 }
2648 time_taken = clock() - start_time;
2649 fprintf(outfile, "Compile time %.4f milliseconds\n",
2650 (((double)time_taken * 1000.0) / (double)timeit) /
2651 (double)CLOCKS_PER_SEC);
2652 }
2653
2654 first_gotten_store = 0;
2655 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2656
2657 /* Compilation failed; go back for another re, skipping to blank line
2658 if non-interactive. */
2659
2660 if (re == NULL)
2661 {
2662 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2663 SKIP_DATA:
2664 if (infile != stdin)
2665 {
2666 for (;;)
2667 {
2668 if (extend_inputline(infile, buffer, NULL) == NULL)
2669 {
2670 done = 1;
2671 goto CONTINUE;
2672 }
2673 len = (int)strlen((char *)buffer);
2674 while (len > 0 && isspace(buffer[len-1])) len--;
2675 if (len == 0) break;
2676 }
2677 fprintf(outfile, "\n");
2678 }
2679 goto CONTINUE;
2680 }
2681
2682 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2683 within the regex; check for this so that we know how to process the data
2684 lines. */
2685
2686 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2687 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2688
2689 /* Extract the size for possible writing before possibly flipping it,
2690 and remember the store that was got. */
2691
2692 true_size = ((real_pcre *)re)->size;
2693 regex_gotten_store = first_gotten_store;
2694
2695 /* Output code size information if requested */
2696
2697 if (log_store)
2698 fprintf(outfile, "Memory allocation (code space): %d\n",
2699 (int)(first_gotten_store -
2700 sizeof(real_pcre) -
2701 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2702
2703 /* If -s or /S was present, study the regex to generate additional info to
2704 help with the matching, unless the pattern has the SS option, which
2705 suppresses the effect of /S (used for a few test patterns where studying is
2706 never sensible). */
2707
2708 if (do_study || (force_study >= 0 && !no_force_study))
2709 {
2710 if (timeit > 0)
2711 {
2712 register int i;
2713 clock_t time_taken;
2714 clock_t start_time = clock();
2715 for (i = 0; i < timeit; i++)
2716 {
2717 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2718 }
2719 time_taken = clock() - start_time;
2720 if (extra != NULL)
2721 {
2722 PCRE_FREE_STUDY(extra);
2723 }
2724 fprintf(outfile, " Study time %.4f milliseconds\n",
2725 (((double)time_taken * 1000.0) / (double)timeit) /
2726 (double)CLOCKS_PER_SEC);
2727 }
2728 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2729 if (error != NULL)
2730 fprintf(outfile, "Failed to study: %s\n", error);
2731 else if (extra != NULL)
2732 {
2733 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2734 if (log_store)
2735 {
2736 size_t jitsize;
2737 new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2738 if (jitsize != 0)
2739 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
2740 }
2741 }
2742 }
2743
2744 /* If /K was present, we set up for handling MARK data. */
2745
2746 if (do_mark)
2747 {
2748 if (extra == NULL)
2749 {
2750 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2751 extra->flags = 0;
2752 }
2753 extra->mark = &markptr;
2754 extra->flags |= PCRE_EXTRA_MARK;
2755 }
2756
2757 /* Extract and display information from the compiled data if required. */
2758
2759 SHOW_INFO:
2760
2761 if (do_debug)
2762 {
2763 fprintf(outfile, "------------------------------------------------------------------\n");
2764 PCRE_PRINTINT(re, outfile, debug_lengths);
2765 }
2766
2767 /* We already have the options in get_options (see above) */
2768
2769 if (do_showinfo)
2770 {
2771 unsigned long int all_options;
2772 int count, backrefmax, first_char, need_char, okpartial, jchanged,
2773 hascrorlf;
2774 int nameentrysize, namecount;
2775 const pcre_uchar *nametable;
2776
2777 new_info(re, NULL, PCRE_INFO_SIZE, &size);
2778 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2779 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
2780 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
2781 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
2782 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2783 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2784 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2785 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2786 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2787 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2788
2789 if (size != regex_gotten_store) fprintf(outfile,
2790 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2791 (int)size, (int)regex_gotten_store);
2792
2793 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2794 if (backrefmax > 0)
2795 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2796
2797 if (namecount > 0)
2798 {
2799 fprintf(outfile, "Named capturing subpatterns:\n");
2800 while (namecount-- > 0)
2801 {
2802 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2803 int imm2_size = use_pcre16 ? 1 : 2;
2804 #else
2805 int imm2_size = IMM2_SIZE;
2806 #endif
2807 int length = (int)STRLEN(nametable + imm2_size);
2808 fprintf(outfile, " ");
2809 PCHARSV(nametable + imm2_size, length, outfile);
2810 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
2811 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2812 fprintf(outfile, "%3d\n", use_pcre16?
2813 (int)nametable[0] : ((int)nametable[0] << 8) | (int)nametable[1]);
2814 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
2815 #else
2816 fprintf(outfile, "%3d\n", GET2(nametable, 0));
2817 nametable += nameentrysize;
2818 #endif
2819 }
2820 }
2821
2822 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2823 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2824
2825 all_options = ((real_pcre *)re)->options;
2826 if (do_flip) all_options = swap_uint32(all_options);
2827
2828 if (get_options == 0) fprintf(outfile, "No options\n");
2829 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2830 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2831 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2832 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2833 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2834 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2835 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2836 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2837 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2838 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2839 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2840 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2841 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2842 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2843 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2844 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2845 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2846 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2847
2848 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2849
2850 switch (get_options & PCRE_NEWLINE_BITS)
2851 {
2852 case PCRE_NEWLINE_CR:
2853 fprintf(outfile, "Forced newline sequence: CR\n");
2854 break;
2855
2856 case PCRE_NEWLINE_LF:
2857 fprintf(outfile, "Forced newline sequence: LF\n");
2858 break;
2859
2860 case PCRE_NEWLINE_CRLF:
2861 fprintf(outfile, "Forced newline sequence: CRLF\n");
2862 break;
2863
2864 case PCRE_NEWLINE_ANYCRLF:
2865 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2866 break;
2867
2868 case PCRE_NEWLINE_ANY:
2869 fprintf(outfile, "Forced newline sequence: ANY\n");
2870 break;
2871
2872 default:
2873 break;
2874 }
2875
2876 if (first_char == -1)
2877 {
2878 fprintf(outfile, "First char at start or follows newline\n");
2879 }
2880 else if (first_char < 0)
2881 {
2882 fprintf(outfile, "No first char\n");
2883 }
2884 else
2885 {
2886 const char *caseless =
2887 ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2888 "" : " (caseless)";
2889
2890 if (PRINTOK(first_char))
2891 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2892 else
2893 {
2894 fprintf(outfile, "First char = ");
2895 pchar(first_char, outfile);
2896 fprintf(outfile, "%s\n", caseless);
2897 }
2898 }
2899
2900 if (need_char < 0)
2901 {
2902 fprintf(outfile, "No need char\n");
2903 }
2904 else
2905 {
2906 const char *caseless =
2907 ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2908 "" : " (caseless)";
2909
2910 if (PRINTOK(need_char))
2911 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2912 else
2913 {
2914 fprintf(outfile, "Need char = ");
2915 pchar(need_char, outfile);
2916 fprintf(outfile, "%s\n", caseless);
2917 }
2918 }
2919
2920 /* Don't output study size; at present it is in any case a fixed
2921 value, but it varies, depending on the computer architecture, and
2922 so messes up the test suite. (And with the /F option, it might be
2923 flipped.) If study was forced by an external -s, don't show this
2924 information unless -i or -d was also present. This means that, except
2925 when auto-callouts are involved, the output from runs with and without
2926 -s should be identical. */
2927
2928 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2929 {
2930 if (extra == NULL)
2931 fprintf(outfile, "Study returned NULL\n");
2932 else
2933 {
2934 pcre_uint8 *start_bits = NULL;
2935 int minlength;
2936
2937 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2938 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2939
2940 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2941 if (start_bits == NULL)
2942 fprintf(outfile, "No set of starting bytes\n");
2943 else
2944 {
2945 int i;
2946 int c = 24;
2947 fprintf(outfile, "Starting byte set: ");
2948 for (i = 0; i < 256; i++)
2949 {
2950 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2951 {
2952 if (c > 75)
2953 {
2954 fprintf(outfile, "\n ");
2955 c = 2;
2956 }
2957 if (PRINTOK(i) && i != ' ')
2958 {
2959 fprintf(outfile, "%c ", i);
2960 c += 2;
2961 }
2962 else
2963 {
2964 fprintf(outfile, "\\x%02x ", i);
2965 c += 5;
2966 }
2967 }
2968 }
2969 fprintf(outfile, "\n");
2970 }
2971 }
2972
2973 /* Show this only if the JIT was set by /S, not by -s. */
2974
2975 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2976 {
2977 int jit;
2978 new_info(re, extra, PCRE_INFO_JIT, &jit);
2979 if (jit)
2980 fprintf(outfile, "JIT study was successful\n");
2981 else
2982 #ifdef SUPPORT_JIT
2983 fprintf(outfile, "JIT study was not successful\n");
2984 #else
2985 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2986 #endif
2987 }
2988 }
2989 }
2990
2991 /* If the '>' option was present, we write out the regex to a file, and
2992 that is all. The first 8 bytes of the file are the regex length and then
2993 the study length, in big-endian order. */
2994
2995 if (to_file != NULL)
2996 {
2997 FILE *f = fopen((char *)to_file, "wb");
2998 if (f == NULL)
2999 {
3000 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3001 }
3002 else
3003 {
3004 pcre_uint8 sbuf[8];
3005
3006 if (do_flip) regexflip(re, extra);
3007 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3008 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3009 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3010 sbuf[3] = (pcre_uint8)((true_size) & 255);
3011 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3012 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3013 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3014 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3015
3016 if (fwrite(sbuf, 1, 8, f) < 8 ||
3017 fwrite(re, 1, true_size, f) < true_size)
3018 {
3019 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3020 }
3021 else
3022 {
3023 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3024
3025 /* If there is study data, write it. */
3026
3027 if (extra != NULL)
3028 {
3029 if (fwrite(extra->study_data, 1, true_study_size, f) <
3030 true_study_size)
3031 {
3032 fprintf(outfile, "Write error on %s: %s\n", to_file,
3033 strerror(errno));
3034 }
3035 else fprintf(outfile, "Study data written to %s\n", to_file);
3036 }
3037 }
3038 fclose(f);
3039 }
3040
3041 new_free(re);
3042 if (extra != NULL)
3043 {
3044 PCRE_FREE_STUDY(extra);
3045 }
3046 if (locale_set)
3047 {
3048 new_free((void *)tables);
3049 setlocale(LC_CTYPE, "C");
3050 locale_set = 0;
3051 }
3052 continue; /* With next regex */
3053 }
3054 } /* End of non-POSIX compile */
3055
3056 /* Read data lines and test them */
3057
3058 for (;;)
3059 {
3060 pcre_uint8 *q;
3061 pcre_uint8 *bptr;
3062 int *use_offsets = offsets;
3063 int use_size_offsets = size_offsets;
3064 int callout_data = 0;
3065 int callout_data_set = 0;
3066 int count, c;
3067 int copystrings = 0;
3068 int find_match_limit = default_find_match_limit;
3069 int getstrings = 0;
3070 int getlist = 0;
3071 int gmatched = 0;
3072 int start_offset = 0;
3073 int start_offset_sign = 1;
3074 int g_notempty = 0;
3075 int use_dfa = 0;
3076
3077 options = 0;
3078
3079 *copynames = 0;
3080 copynames[1] = 0;
3081 *getnames = 0;
3082 getnames[1] = 0;
3083
3084 copynamesptr = copynames;
3085 getnamesptr = getnames;
3086
3087 SET_PCRE_CALLOUT(callout);
3088 first_callout = 1;
3089 last_callout_mark = NULL;
3090 callout_extra = 0;
3091 callout_count = 0;
3092 callout_fail_count = 999999;
3093 callout_fail_id = -1;
3094 show_malloc = 0;
3095
3096 if (extra != NULL) extra->flags &=
3097 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3098
3099 len = 0;
3100 for (;;)
3101 {
3102 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3103 {
3104 if (len > 0) /* Reached EOF without hitting a newline */
3105 {
3106 fprintf(outfile, "\n");
3107 break;
3108 }
3109 done = 1;
3110 goto CONTINUE;
3111 }
3112 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3113 len = (int)strlen((char *)buffer);
3114 if (buffer[len-1] == '\n') break;
3115 }
3116
3117 while (len > 0 && isspace(buffer[len-1])) len--;
3118 buffer[len] = 0;
3119 if (len == 0) break;
3120
3121 p = buffer;
3122 while (isspace(*p)) p++;
3123
3124 bptr = q = dbuffer;
3125 while ((c = *p++) != 0)
3126 {
3127 int i = 0;
3128 int n = 0;
3129
3130 if (c == '\\') switch ((c = *p++))
3131 {
3132 case 'a': c = 7; break;
3133 case 'b': c = '\b'; break;
3134 case 'e': c = 27; break;
3135 case 'f': c = '\f'; break;
3136 case 'n': c = '\n'; break;
3137 case 'r': c = '\r'; break;
3138 case 't': c = '\t'; break;
3139 case 'v': c = '\v'; break;
3140
3141 case '0': case '1': case '2': case '3':
3142 case '4': case '5': case '6': case '7':
3143 c -= '0';
3144 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3145 c = c * 8 + *p++ - '0';
3146
3147 #if !defined NOUTF8
3148 if (use_utf && c > 255)
3149 {
3150 pcre_uint8 buff8[8];
3151 int ii, utn;
3152 utn = ord2utf8(c, buff8);
3153 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
3154 c = buff8[ii]; /* Last byte */
3155 }
3156 #endif
3157 break;
3158
3159 case 'x':
3160
3161 /* Handle \x{..} specially - new Perl thing for utf8 */
3162
3163 #if !defined NOUTF8
3164 if (*p == '{')
3165 {
3166 pcre_uint8 *pt = p;
3167 c = 0;
3168
3169 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3170 when isxdigit() is a macro that refers to its argument more than
3171 once. This is banned by the C Standard, but apparently happens in at
3172 least one MacOS environment. */
3173
3174 for (pt++; isxdigit(*pt); pt++)
3175 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3176 if (*pt == '}')
3177 {
3178 pcre_uint8 buff8[8];
3179 int ii, utn;
3180 if (use_utf)
3181 {
3182 utn = ord2utf8(c, buff8);
3183 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
3184 c = buff8[ii]; /* Last byte */
3185 }
3186 else
3187 {
3188 if (c > 255)
3189 {
3190 if (use_pcre16)
3191 fprintf(outfile, "** Character \\x{%x} is greater than 255.\n"
3192 "** Because its input is first processed as 8-bit, pcretest "
3193 "does not\n** support such characters in 16-bit mode when "
3194 "UTF-16 is not set.\n", c);
3195 else
3196 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3197 "and UTF-8 mode is not enabled.\n", c);
3198
3199 fprintf(outfile, "** Truncation will probably give the wrong "
3200 "result.\n");
3201 }
3202 }
3203 p = pt + 1;
3204 break;
3205 }
3206 /* Not correct form; fall through */
3207 }
3208 #endif
3209
3210 /* Ordinary \x */
3211
3212 c = 0;
3213 while (i++ < 2 && isxdigit(*p))
3214 {
3215 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3216 p++;
3217 }
3218 break;
3219
3220 case 0: /* \ followed by EOF allows for an empty line */
3221 p--;
3222 continue;
3223
3224 case '>':
3225 if (*p == '-')
3226 {
3227 start_offset_sign = -1;
3228 p++;
3229 }
3230 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3231 start_offset *= start_offset_sign;
3232 continue;
3233
3234 case 'A': /* Option setting */
3235 options |= PCRE_ANCHORED;
3236 continue;
3237
3238 case 'B':
3239 options |= PCRE_NOTBOL;
3240 continue;
3241
3242 case 'C':
3243 if (isdigit(*p)) /* Set copy string */
3244 {
3245 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3246 copystrings |= 1 << n;
3247 }
3248 else if (isalnum(*p))
3249 {
3250 pcre_uchar *namestart = copynamesptr;
3251 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3252 if (use_pcre16)
3253 {
3254 PCRE_SCHAR16 *npp = (PCRE_SCHAR16 *)copynamesptr;
3255 while (isalnum(*p)) *npp++ = *p++;
3256 *npp++ = 0;
3257 *npp = 0;
3258 PCRE_GET_STRINGNUMBER(n, re, copynamesptr);
3259 copynamesptr = (pcre_uchar *)npp;
3260 }
3261 else
3262 {
3263 #endif
3264 pcre_uchar *npp = copynamesptr;
3265 while (isalnum(*p)) *npp++ = *p++;
3266 *npp++ = 0;
3267 *npp = 0;
3268 PCRE_GET_STRINGNUMBER(n, re, copynamesptr);
3269 copynamesptr = npp;
3270 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3271 }
3272 #endif
3273 if (n < 0)
3274 {
3275 fprintf(outfile, "no parentheses with name \"");
3276 PCHARSV(namestart, -1, outfile);
3277 fprintf(outfile, "\"\n");
3278 }
3279 }
3280 else if (*p == '+')
3281 {
3282 callout_extra = 1;
3283 p++;
3284 }
3285 else if (*p == '-')
3286 {
3287 SET_PCRE_CALLOUT(NULL);
3288 p++;
3289 }
3290 else if (*p == '!')
3291 {
3292 callout_fail_id = 0;
3293 p++;
3294 while(isdigit(*p))
3295 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3296 callout_fail_count = 0;
3297 if (*p == '!')
3298 {
3299 p++;
3300 while(isdigit(*p))
3301 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3302 }
3303 }
3304 else if (*p == '*')
3305 {
3306 int sign = 1;
3307 callout_data = 0;
3308 if (*(++p) == '-') { sign = -1; p++; }
3309 while(isdigit(*p))
3310 callout_data = callout_data * 10 + *p++ - '0';
3311 callout_data *= sign;
3312 callout_data_set = 1;
3313 }
3314 continue;
3315
3316 #if !defined NODFA
3317 case 'D':
3318 #if !defined NOPOSIX
3319 if (posix || do_posix)
3320 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3321 else
3322 #endif
3323 use_dfa = 1;
3324 continue;
3325 #endif
3326
3327 #if !defined NODFA
3328 case 'F':
3329 options |= PCRE_DFA_SHORTEST;
3330 continue;
3331 #endif
3332
3333 case 'G':
3334 if (isdigit(*p))
3335 {
3336 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3337 getstrings |= 1 << n;
3338 }
3339 else if (isalnum(*p))
3340 {
3341 pcre_uchar *namestart = getnamesptr;
3342 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3343 if (use_pcre16)
3344 {
3345 PCRE_SCHAR16 *npp = (PCRE_SCHAR16 *)getnamesptr;
3346 while (isalnum(*p)) *npp++ = *p++;
3347 *npp++ = 0;
3348 *npp = 0;
3349 PCRE_GET_STRINGNUMBER(n, re, getnamesptr);
3350 getnamesptr = (pcre_uchar *)npp;
3351 }
3352 else
3353 {
3354 #endif
3355 pcre_uchar *npp = getnamesptr;
3356 while (isalnum(*p)) *npp++ = *p++;
3357 *npp++ = 0;
3358 *npp = 0;
3359 PCRE_GET_STRINGNUMBER(n, re, getnamesptr);
3360 getnamesptr = npp;
3361 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3362 }
3363 #endif
3364 if (n < 0)
3365 {
3366 fprintf(outfile, "no parentheses with name \"");
3367 PCHARSV(namestart, -1, outfile);
3368 fprintf(outfile, "\"\n");
3369 }
3370 }
3371 continue;
3372
3373 case 'J':
3374 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3375 if (extra != NULL
3376 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3377 && extra->executable_jit != NULL)
3378 {
3379 if (jit_stack != NULL) PCRE_JIT_STACK_FREE(jit_stack);
3380 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3381 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3382 }
3383 continue;
3384
3385 case 'L':
3386 getlist = 1;
3387 continue;
3388
3389 case 'M':
3390 find_match_limit = 1;
3391 continue;
3392
3393 case 'N':
3394 if ((options & PCRE_NOTEMPTY) != 0)
3395 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3396 else
3397 options |= PCRE_NOTEMPTY;
3398 continue;
3399
3400 case 'O':
3401 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3402 if (n > size_offsets_max)
3403 {
3404 size_offsets_max = n;
3405 free(offsets);
3406 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3407 if (offsets == NULL)
3408 {
3409 printf("** Failed to get %d bytes of memory for offsets vector\n",
3410 (int)(size_offsets_max * sizeof(int)));
3411 yield = 1;
3412 goto EXIT;
3413 }
3414 }
3415 use_size_offsets = n;
3416 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3417 continue;
3418
3419 case 'P':
3420 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3421 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3422 continue;
3423
3424 case 'Q':
3425 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3426 if (extra == NULL)
3427 {
3428 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3429 extra->flags = 0;
3430 }
3431 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3432 extra->match_limit_recursion = n;
3433 continue;
3434
3435 case 'q':
3436 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3437 if (extra == NULL)
3438 {
3439 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3440 extra->flags = 0;
3441 }
3442 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3443 extra->match_limit = n;
3444 continue;
3445
3446 #if !defined NODFA
3447 case 'R':
3448 options |= PCRE_DFA_RESTART;
3449 continue;
3450 #endif
3451
3452 case 'S':
3453 show_malloc = 1;
3454 continue;
3455
3456 case 'Y':
3457 options |= PCRE_NO_START_OPTIMIZE;
3458 continue;
3459
3460 case 'Z':
3461 options |= PCRE_NOTEOL;
3462 continue;
3463
3464 case '?':
3465 options |= PCRE_NO_UTF8_CHECK;
3466 continue;
3467
3468 case '<':
3469 {
3470 int x = check_newline(p, outfile);
3471 if (x == 0) goto NEXT_DATA;
3472 options |= x;
3473 while (*p++ != '>');
3474 }
3475 continue;
3476 }
3477 *q++ = c;
3478 }
3479 *q = 0;
3480 len = (int)(q - dbuffer);
3481
3482 /* Move the data to the end of the buffer so that a read over the end of
3483 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3484 we are using the POSIX interface, we must include the terminating zero. */
3485
3486 #if !defined NOPOSIX
3487 if (posix || do_posix)
3488 {
3489 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3490 bptr += buffer_size - len - 1;
3491 }
3492 else
3493 #endif
3494 {
3495 memmove(bptr + buffer_size - len, bptr, len);
3496 bptr += buffer_size - len;
3497 }
3498
3499 if ((all_use_dfa || use_dfa) && find_match_limit)
3500 {
3501 printf("**Match limit not relevant for DFA matching: ignored\n");
3502 find_match_limit = 0;
3503 }
3504
3505 /* Handle matching via the POSIX interface, which does not
3506 support timing or playing with the match limit or callout data. */
3507
3508 #if !defined NOPOSIX
3509 if (posix || do_posix)
3510 {
3511 int rc;
3512 int eflags = 0;
3513 regmatch_t *pmatch = NULL;
3514 if (use_size_offsets > 0)
3515 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3516 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3517 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3518 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3519
3520 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3521
3522 if (rc != 0)
3523 {
3524 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3525 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3526 }
3527 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3528 != 0)
3529 {
3530 fprintf(outfile, "Matched with REG_NOSUB\n");
3531 }
3532 else
3533 {
3534 size_t i;
3535 for (i = 0; i < (size_t)use_size_offsets; i++)
3536 {
3537 if (pmatch[i].rm_so >= 0)
3538 {
3539 fprintf(outfile, "%2d: ", (int)i);
3540 PCHARSV(dbuffer + pmatch[i].rm_so,
3541 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3542 fprintf(outfile, "\n");
3543 if (do_showcaprest || (i == 0 && do_showrest))
3544 {
3545 fprintf(outfile, "%2d+ ", (int)i);
3546 PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3547 outfile);
3548 fprintf(outfile, "\n");
3549 }
3550 }
3551 }
3552 }
3553 free(pmatch);
3554 goto NEXT_DATA;
3555 }
3556
3557 #endif /* !defined NOPOSIX */
3558
3559 /* Handle matching via the native interface - repeats for /g and /G */
3560
3561 #ifdef SUPPORT_PCRE16
3562 if (use_pcre16)
3563 {
3564 len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3565 if (len < 0)
3566 {
3567 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3568 "converted to UTF-16\n");
3569 goto NEXT_DATA;
3570 }
3571 bptr = (pcre_uint8 *)buffer16;
3572 }
3573 #endif
3574
3575 for (;; gmatched++) /* Loop for /g or /G */
3576 {
3577 markptr = NULL;
3578
3579 if (timeitm > 0)
3580 {
3581 register int i;
3582 clock_t time_taken;
3583 clock_t start_time = clock();
3584
3585 #if !defined NODFA
3586 if (all_use_dfa || use_dfa)
3587 {
3588 int workspace[1000];
3589 for (i = 0; i < timeitm; i++)
3590 {
3591 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3592 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3593 (sizeof(workspace)/sizeof(int)));
3594 }
3595 }
3596 else
3597 #endif
3598
3599 for (i = 0; i < timeitm; i++)
3600 {
3601 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3602 (options | g_notempty), use_offsets, use_size_offsets);
3603 }
3604 time_taken = clock() - start_time;
3605 fprintf(outfile, "Execute time %.4f milliseconds\n",
3606 (((double)time_taken * 1000.0) / (double)timeitm) /
3607 (double)CLOCKS_PER_SEC);
3608 }
3609
3610 /* If find_match_limit is set, we want to do repeated matches with
3611 varying limits in order to find the minimum value for the match limit and
3612 for the recursion limit. The match limits are relevant only to the normal
3613 running of pcre_exec(), so disable the JIT optimization. This makes it
3614 possible to run the same set of tests with and without JIT externally
3615 requested. */
3616
3617 if (find_match_limit)
3618 {
3619 if (extra == NULL)
3620 {
3621 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3622 extra->flags = 0;
3623 }
3624 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3625
3626 (void)check_match_limit(re, extra, bptr, len, start_offset,
3627 options|g_notempty, use_offsets, use_size_offsets,
3628 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3629 PCRE_ERROR_MATCHLIMIT, "match()");
3630
3631 count = check_match_limit(re, extra, bptr, len, start_offset,
3632 options|g_notempty, use_offsets, use_size_offsets,
3633 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3634 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3635 }
3636
3637 /* If callout_data is set, use the interface with additional data */
3638
3639 else if (callout_data_set)
3640 {
3641 if (extra == NULL)
3642 {
3643 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3644 extra->flags = 0;
3645 }
3646 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3647 extra->callout_data = &callout_data;
3648 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3649 options | g_notempty, use_offsets, use_size_offsets);
3650 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3651 }
3652
3653 /* The normal case is just to do the match once, with the default
3654 value of match_limit. */
3655
3656 #if !defined NODFA
3657 else if (all_use_dfa || use_dfa)
3658 {
3659 int workspace[1000];
3660 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3661 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3662 (sizeof(workspace)/sizeof(int)));
3663 if (count == 0)
3664 {
3665 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3666 count = use_size_offsets/2;
3667 }
3668 }
3669 #endif
3670
3671 else
3672 {
3673 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3674 options | g_notempty, use_offsets, use_size_offsets);
3675 if (count == 0)
3676 {
3677 fprintf(outfile, "Matched, but too many substrings\n");
3678 count = use_size_offsets/3;
3679 }
3680 }
3681
3682 /* Matched */
3683
3684 if (count >= 0)
3685 {
3686 int i, maxcount;
3687
3688 #if !defined NODFA
3689 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3690 #endif
3691 maxcount = use_size_offsets/3;
3692
3693 /* This is a check against a lunatic return value. */
3694
3695 if (count > maxcount)
3696 {
3697 fprintf(outfile,
3698 "** PCRE error: returned count %d is too big for offset size %d\n",
3699 count, use_size_offsets);
3700 count = use_size_offsets/3;
3701 if (do_g || do_G)
3702 {
3703 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3704 do_g = do_G = FALSE; /* Break g/G loop */
3705 }
3706 }
3707
3708 /* do_allcaps requests showing of all captures in the pattern, to check
3709 unset ones at the end. */
3710
3711 if (do_allcaps)
3712 {
3713 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3714 count++; /* Allow for full match */
3715 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3716 }
3717
3718 /* Output the captured substrings */
3719
3720 for (i = 0; i < count * 2; i += 2)
3721 {
3722 if (use_offsets[i] < 0)
3723 {
3724 if (use_offsets[i] != -1)
3725 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3726 use_offsets[i], i);
3727 if (use_offsets[i+1] != -1)
3728 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3729 use_offsets[i+1], i+1);
3730 fprintf(outfile, "%2d: <unset>\n", i/2);
3731 }
3732 else
3733 {
3734 fprintf(outfile, "%2d: ", i/2);
3735 PCHARSV(bptr + use_offsets[i],
3736 use_offsets[i+1] - use_offsets[i], outfile);
3737 fprintf(outfile, "\n");
3738 if (do_showcaprest || (i == 0 && do_showrest))
3739 {
3740 fprintf(outfile, "%2d+ ", i/2);
3741 PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3742 outfile);
3743 fprintf(outfile, "\n");
3744 }
3745 }
3746 }
3747
3748 if (markptr != NULL)
3749 {
3750 fprintf(outfile, "MK: ");
3751 PCHARSV(markptr, -1, outfile);
3752 fprintf(outfile, "\n");
3753 }
3754
3755 for (i = 0; i < 32; i++)
3756 {
3757 if ((copystrings & (1 << i)) != 0)
3758 {
3759 int rc;
3760 char copybuffer[256];
3761 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
3762 copybuffer, sizeof(copybuffer));
3763 if (rc < 0)
3764 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3765 else
3766 {
3767 fprintf(outfile, "%2dC ", i);
3768 PCHARSV(copybuffer, rc, outfile);
3769 fprintf(outfile, " (%d)\n", rc);
3770 }
3771 }
3772 }
3773
3774 for (copynamesptr = copynames;
3775
3776 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3777 use_pcre16?
3778 (*(PCRE_SCHAR16*)copynamesptr) != 0 : *copynamesptr != 0;
3779 #else
3780 *copynamesptr != 0;
3781 #endif
3782 copynamesptr += (int)(STRLEN(copynamesptr) + 1) * CHAR_SIZE)
3783 {
3784 int rc;
3785 char copybuffer[256];
3786 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
3787 copynamesptr, copybuffer, sizeof(copybuffer));
3788 if (rc < 0)
3789 {
3790 fprintf(outfile, "copy substring ");
3791 PCHARSV(copynamesptr, -1, outfile);
3792 fprintf(outfile, " failed %d\n", rc);
3793 }
3794 else
3795 {
3796 fprintf(outfile, " C ");
3797 PCHARSV(copybuffer, rc, outfile);
3798 fprintf(outfile, " (%d) ", rc);
3799 PCHARSV(copynamesptr, -1, outfile);
3800 putc('\n', outfile);
3801 }
3802 }
3803
3804 for (i = 0; i < 32; i++)
3805 {
3806 if ((getstrings & (1 << i)) != 0)
3807 {
3808 int rc;
3809 const char *substring;
3810 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
3811 if (rc < 0)
3812 fprintf(outfile, "get substring %d failed %d\n", i, rc);
3813 else
3814 {
3815 fprintf(outfile, "%2dG ", i);
3816 PCHARSV(substring, rc, outfile);
3817 fprintf(outfile, " (%d)\n", rc);
3818 PCRE_FREE_SUBSTRING(substring);
3819 }
3820 }
3821 }
3822
3823 for (getnamesptr = getnames;
3824 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3825 use_pcre16?
3826 (*(PCRE_SCHAR16*)getnamesptr) != 0 : *getnamesptr != 0;
3827 #else
3828 *getnamesptr != 0;
3829 #endif
3830 getnamesptr += (int)(STRLEN(getnamesptr) + 1) * CHAR_SIZE)
3831 {
3832 int rc;
3833 const char *substring;
3834 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
3835 getnamesptr, &substring);
3836 if (rc < 0)
3837 {
3838 fprintf(outfile, "get substring ");
3839 PCHARSV(getnamesptr, -1, outfile);
3840 fprintf(outfile, " failed %d\n", rc);
3841 }
3842 else
3843 {
3844 fprintf(outfile, " G ");
3845 PCHARSV(substring, rc, outfile);
3846 fprintf(outfile, " (%d) ", rc);
3847 PCHARSV(getnamesptr, -1, outfile);
3848 PCRE_FREE_SUBSTRING(substring);
3849 putc('\n', outfile);
3850 }
3851 }
3852
3853 if (getlist)
3854 {
3855 int rc;
3856 const char **stringlist;
3857 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
3858 if (rc < 0)
3859 fprintf(outfile, "get substring list failed %d\n", rc);
3860 else
3861 {
3862 for (i = 0; i < count; i++)
3863 {
3864 fprintf(outfile, "%2dL ", i);
3865 PCHARSV(stringlist[i], -1, outfile);
3866 putc('\n', outfile);
3867 }
3868 if (stringlist[i] != NULL)
3869 fprintf(outfile, "string list not terminated by NULL\n");
3870 PCRE_FREE_SUBSTRING_LIST(stringlist);
3871 }
3872 }
3873 }
3874
3875 /* There was a partial match */
3876
3877 else if (count == PCRE_ERROR_PARTIAL)
3878 {
3879 if (markptr == NULL) fprintf(outfile, "Partial match");
3880 else fprintf(outfile, "Partial match, mark=%s", markptr);
3881 if (use_size_offsets > 1)
3882 {
3883 fprintf(outfile, ": ");
3884 PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3885 outfile);
3886 }
3887 fprintf(outfile, "\n");
3888 break; /* Out of the /g loop */
3889 }
3890
3891 /* Failed to match. If this is a /g or /G loop and we previously set
3892 g_notempty after a null match, this is not necessarily the end. We want
3893 to advance the start offset, and continue. We won't be at the end of the
3894 string - that was checked before setting g_notempty.
3895
3896 Complication arises in the case when the newline convention is "any",
3897 "crlf", or "anycrlf". If the previous match was at the end of a line
3898 terminated by CRLF, an advance of one character just passes the \r,
3899 whereas we should prefer the longer newline sequence, as does the code in
3900 pcre_exec(). Fudge the offset value to achieve this. We check for a
3901 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
3902 find the default.
3903
3904 Otherwise, in the case of UTF-8 matching, the advance must be one
3905 character, not one byte. */
3906
3907 else
3908 {
3909 if (g_notempty != 0)
3910 {
3911 int onechar = 1;
3912 unsigned int obits = ((real_pcre *)re)->options;
3913 use_offsets[0] = start_offset;
3914 if ((obits & PCRE_NEWLINE_BITS) == 0)
3915 {
3916 int d;
3917 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
3918 /* Note that these values are always the ASCII ones, even in
3919 EBCDIC environments. CR = 13, NL = 10. */
3920 obits = (d == 13)? PCRE_NEWLINE_CR :
3921 (d == 10)? PCRE_NEWLINE_LF :
3922 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3923 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3924 (d == -1)? PCRE_NEWLINE_ANY : 0;
3925 }
3926 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3927 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3928 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3929 &&
3930 start_offset < len - 1 &&
3931 bptr[start_offset * CHAR_SIZE] == '\r' &&
3932 bptr[(start_offset + 1) * CHAR_SIZE] == '\n')
3933 onechar++;
3934 else if (use_utf)
3935 {
3936 while (start_offset + onechar < len)
3937 {
3938 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3939 onechar++;
3940 }
3941 }
3942 use_offsets[1] = start_offset + onechar;
3943 }
3944 else
3945 {
3946 switch(count)
3947 {
3948 case PCRE_ERROR_NOMATCH:
3949 if (gmatched == 0)
3950 {
3951 if (markptr == NULL) fprintf(outfile, "No match\n");
3952 else fprintf(outfile, "No match, mark = %s\n", markptr);
3953 }
3954 break;
3955
3956 case PCRE_ERROR_BADUTF8:
3957 case PCRE_ERROR_SHORTUTF8:
3958 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3959 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3960 if (use_size_offsets >= 2)
3961 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3962 use_offsets[1]);
3963 fprintf(outfile, "\n");
3964 break;
3965
3966 default:
3967 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3968 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3969 else
3970 fprintf(outfile, "Error %d (Unexpected value)\n", count);
3971 break;
3972 }
3973
3974 break; /* Out of the /g loop */
3975 }
3976 }
3977
3978 /* If not /g or /G we are done */
3979
3980 if (!do_g && !do_G) break;
3981
3982 /* If we have matched an empty string, first check to see if we are at
3983 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3984 Perl's /g options does. This turns out to be rather cunning. First we set
3985 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3986 same point. If this fails (picked up above) we advance to the next
3987 character. */
3988
3989 g_notempty = 0;
3990
3991 if (use_offsets[0] == use_offsets[1])
3992 {
3993 if (use_offsets[0] == len) break;
3994 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3995 }
3996
3997 /* For /g, update the start offset, leaving the rest alone */
3998
3999 if (do_g) start_offset = use_offsets[1];
4000
4001 /* For /G, update the pointer and length */
4002
4003 else
4004 {
4005 bptr += use_offsets[1] * CHAR_SIZE;
4006 len -= use_offsets[1];
4007 }
4008 } /* End of loop for /g and /G */
4009
4010 NEXT_DATA: continue;
4011 } /* End of loop for data lines */
4012
4013 CONTINUE:
4014
4015 #if !defined NOPOSIX
4016 if (posix || do_posix) regfree(&preg);
4017 #endif
4018
4019 if (re != NULL) new_free(re);
4020 if (extra != NULL)
4021 {
4022 PCRE_FREE_STUDY(extra);
4023 }
4024 if (locale_set)
4025 {
4026 new_free((void *)tables);
4027 setlocale(LC_CTYPE, "C");
4028 locale_set = 0;
4029 }
4030 if (jit_stack != NULL)
4031 {
4032 PCRE_JIT_STACK_FREE(jit_stack);
4033 jit_stack = NULL;
4034 }
4035 }
4036
4037 if (infile == stdin) fprintf(outfile, "\n");
4038
4039 EXIT:
4040
4041 if (infile != NULL && infile != stdin) fclose(infile);
4042 if (outfile != NULL && outfile != stdout) fclose(outfile);
4043
4044 free(buffer);
4045 free(dbuffer);
4046 free(pbuffer);
4047 free(offsets);
4048
4049 #ifdef SUPPORT_PCRE16
4050 if (buffer16 != NULL) free(buffer16);
4051 #endif
4052
4053 return yield;
4054 }
4055
4056 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5