/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 817 - (show annotations)
Thu Dec 22 07:03:34 2011 UTC (7 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 119984 byte(s)
More pcretest fixes for dual 8/16 bit mode
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <time.h>
49 #include <locale.h>
50 #include <errno.h>
51
52 #ifdef SUPPORT_LIBREADLINE
53 #ifdef HAVE_UNISTD_H
54 #include <unistd.h>
55 #endif
56 #include <readline/readline.h>
57 #include <readline/history.h>
58 #endif
59
60
61 /* A number of things vary for Windows builds. Originally, pcretest opened its
62 input and output without "b"; then I was told that "b" was needed in some
63 environments, so it was added for release 5.0 to both the input and output. (It
64 makes no difference on Unix-like systems.) Later I was told that it is wrong
65 for the input on Windows. I've now abstracted the modes into two macros that
66 are set here, to make it easier to fiddle with them, and removed "b" from the
67 input mode under Windows. */
68
69 #if defined(_WIN32) || defined(WIN32)
70 #include <io.h> /* For _setmode() */
71 #include <fcntl.h> /* For _O_BINARY */
72 #define INPUT_MODE "r"
73 #define OUTPUT_MODE "wb"
74
75 #ifndef isatty
76 #define isatty _isatty /* This is what Windows calls them, I'm told, */
77 #endif /* though in some environments they seem to */
78 /* be already defined, hence the #ifndefs. */
79 #ifndef fileno
80 #define fileno _fileno
81 #endif
82
83 /* A user sent this fix for Borland Builder 5 under Windows. */
84
85 #ifdef __BORLANDC__
86 #define _setmode(handle, mode) setmode(handle, mode)
87 #endif
88
89 /* Not Windows */
90
91 #else
92 #include <sys/time.h> /* These two includes are needed */
93 #include <sys/resource.h> /* for setrlimit(). */
94 #define INPUT_MODE "rb"
95 #define OUTPUT_MODE "wb"
96 #endif
97
98
99 /* We have to include pcre_internal.h because we need the internal info for
100 displaying the results of pcre_study() and we also need to know about the
101 internal macros, structures, and other internal data values; pcretest has
102 "inside information" compared to a program that strictly follows the PCRE API.
103
104 Although pcre_internal.h does itself include pcre.h, we explicitly include it
105 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106 appropriately for an application, not for building PCRE. */
107
108 #include "pcre.h"
109 #include "pcre_internal.h"
110
111 /* The pcre_printint() function, which prints the internal form of a compiled
112 regex, is held in a separate file so that (a) it can be compiled in either
113 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114 when that is compiled in debug mode. */
115
116 #ifdef SUPPORT_PCRE8
117 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118 #endif
119 #ifdef SUPPORT_PCRE16
120 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121 #endif
122
123 /* We need access to some of the data tables that PCRE uses. So as not to have
124 to keep two copies, we include the source file here, changing the names of the
125 external symbols to prevent clashes. */
126
127 #define _pcre_ucp_gentype ucp_gentype
128 #define _pcre_ucp_typerange ucp_typerange
129 #define _pcre_utf8_table1 utf8_table1
130 #define _pcre_utf8_table1_size utf8_table1_size
131 #define _pcre_utf8_table2 utf8_table2
132 #define _pcre_utf8_table3 utf8_table3
133 #define _pcre_utf8_table4 utf8_table4
134 #define _pcre_utt utt
135 #define _pcre_utt_size utt_size
136 #define _pcre_utt_names utt_names
137 #define _pcre_OP_lengths OP_lengths
138
139 #include "pcre_tables.c"
140
141 /* The definition of the macro PRINTABLE, which determines whether to print an
142 output character as-is or as a hex value when showing compiled patterns, is
143 the same as in the printint.src file. We uses it here in cases when the locale
144 has not been explicitly changed, so as to get consistent output from systems
145 that differ in their output from isprint() even in the "C" locale. */
146
147 #ifdef EBCDIC
148 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149 #else
150 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151 #endif
152
153 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154
155 /* It is possible to compile this test program without including support for
156 testing the POSIX interface, though this is not available via the standard
157 Makefile. */
158
159 #if !defined NOPOSIX
160 #include "pcreposix.h"
161 #endif
162
163 /* It is also possible, originally for the benefit of a version that was
164 imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165 without the interface to the DFA matcher (NODFA), and without the doublecheck
166 of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167 out the UTF8 support if PCRE is built without it. */
168
169 #ifndef SUPPORT_UTF8
170 #ifndef NOUTF8
171 #define NOUTF8
172 #endif
173 #endif
174
175 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177 only from one place and is handled differently). I couldn't dream up any way of
178 using a single macro to do this in a generic way, because of the many different
179 argument requirements. We know that at least one of SUPPORT_PCRE8 and
180 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181 use these in the definitions of generic macros. */
182
183 #ifdef SUPPORT_PCRE8
184
185 #define PCHARS8(lv, p, len, f) \
186 lv = pchars((pcre_uint8 *)p, len, f)
187
188 #define PCHARSV8(p, len, f) \
189 (void)pchars((pcre_uint8 *)p, len, f)
190
191 #define STRLEN8(p) ((int)strlen((char *)p))
192
193 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
194 re = pcre_compile((char *)pat, options, error, erroffset, tables)
195
196 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
197 namesptr, cbuffer, size) \
198 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
199 (char *)copynamesptr, cbuffer, size)
200
201 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
202 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
203
204 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
205 offsets, size_offsets, workspace, size_workspace) \
206 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
207 offsets, size_offsets, workspace, size_workspace)
208
209 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
210 offsets, size_offsets) \
211 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
212 offsets, size_offsets)
213
214 #define PCRE_FREE_STUDY8(extra) \
215 pcre_free_study(extra)
216
217 #define PCRE_FREE_SUBSTRING8(substring) \
218 pcre_free_substring(substring)
219
220 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
221 pcre_free_substring_list(listptr)
222
223 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
224 getnamesptr, subsptr) \
225 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
226 (char *)getnamesptr, subsptr)
227
228 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
229 n = pcre_get_stringnumber(re, (char *)ptr)
230
231 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
232 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
233
234 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
235 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
236
237 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
238 pcre_pattern_to_host_byte_order(re, extra, tables)
239
240 #define PCRE_STUDY8(extra, re, options, error) \
241 extra = pcre_study(re, options, error)
242
243 #define SET_PCRE_CALLOUT8(callout) \
244 pcre_callout = callout
245
246 #endif /* SUPPORT_PCRE8 */
247
248 /* -----------------------------------------------------------*/
249
250 #ifdef SUPPORT_PCRE16
251
252 #define PCHARS16(lv, p, len, f) \
253 lv = pchars16((PCRE_SPTR16)p, len, f)
254
255 #define PCHARSV16(p, len, f) \
256 (void)pchars16((PCRE_SPTR16)p, len, f)
257
258 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
259
260 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
261 re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
262
263 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
264 namesptr, cbuffer, size) \
265 rc = pcre16_copy_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
266 (PCRE_SPTR16)namesptr, (PCRE_SCHAR16 *)cbuffer, size/2)
267
268 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
269 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
270 (PCRE_SCHAR16 *)cbuffer, size/2)
271
272 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
273 offsets, size_offsets, workspace, size_workspace) \
274 count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
275 options, offsets, size_offsets, workspace, size_workspace)
276
277 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
278 offsets, size_offsets) \
279 count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
280 options, offsets, size_offsets)
281
282 #define PCRE_FREE_STUDY16(extra) \
283 pcre16_free_study(extra)
284
285 #define PCRE_FREE_SUBSTRING16(substring) \
286 pcre16_free_substring((PCRE_SPTR16)substring)
287
288 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
289 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
290
291 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
292 getnamesptr, subsptr) \
293 rc = pcre16_get_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
294 (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)subsptr)
295
296 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
297 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
298
299 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
300 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
301 (PCRE_SPTR16 *)subsptr)
302
303 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
304 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
305 (PCRE_SPTR16 **)listptr)
306
307 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
308 pcre16_pattern_to_host_byte_order(re, extra, tables)
309
310 #define PCRE_STUDY16(extra, re, options, error) \
311 extra = pcre16_study(re, options, error)
312
313 #define SET_PCRE_CALLOUT16(callout) \
314 pcre16_callout = callout
315
316 #endif /* SUPPORT_PCRE16 */
317
318
319 /* ----- Both modes are supported; a runtime test is needed ----- */
320
321 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
322
323 #define CHAR_SIZE (use_pcre16? 2:1)
324
325 #define PCHARS(lv, p, len, f) \
326 if (use_pcre16) \
327 PCHARS16(lv, p, len, f); \
328 else \
329 PCHARS8(lv, p, len, f)
330
331 #define PCHARSV(p, len, f) \
332 if (use_pcre16) \
333 PCHARSV16(p, len, f); \
334 else \
335 PCHARSV8(p, len, f)
336
337 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
338
339 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
340 if (use_pcre16) \
341 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
342 else \
343 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
344
345 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
346 namesptr, cbuffer, size) \
347 if (use_pcre16) \
348 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
349 namesptr, cbuffer, size); \
350 else \
351 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
352 namesptr, cbuffer, size)
353
354 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
355 if (use_pcre16) \
356 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
357 else \
358 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
359
360 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
361 offsets, size_offsets, workspace, size_workspace) \
362 if (use_pcre16) \
363 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
364 offsets, size_offsets, workspace, size_workspace); \
365 else \
366 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
367 offsets, size_offsets, workspace, size_workspace)
368
369 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
370 offsets, size_offsets) \
371 if (use_pcre16) \
372 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
373 offsets, size_offsets); \
374 else \
375 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
376 offsets, size_offsets)
377
378 #define PCRE_FREE_STUDY(extra) \
379 if (use_pcre16) \
380 PCRE_FREE_STUDY16(extra); \
381 else \
382 PCRE_FREE_STUDY8(extra)
383
384 #define PCRE_FREE_SUBSTRING(substring) \
385 if (use_pcre16) \
386 PCRE_FREE_SUBSTRING16(substring); \
387 else \
388 PCRE_FREE_SUBSTRING8(substring)
389
390 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
391 if (use_pcre16) \
392 PCRE_FREE_SUBSTRING_LIST16(listptr); \
393 else \
394 PCRE_FREE_SUBSTRING_LIST8(listptr)
395
396 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
397 getnamesptr, subsptr) \
398 if (use_pcre16) \
399 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
400 getnamesptr, subsptr); \
401 else \
402 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
403 getnamesptr, subsptr)
404
405 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
406 if (use_pcre16) \
407 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
408 else \
409 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
410
411 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
412 if (use_pcre16) \
413 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
414 else \
415 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
416
417 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
418 if (use_pcre16) \
419 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
420 else \
421 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
422
423 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
424 if (use_pcre16) \
425 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
426 else \
427 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
428
429 #define PCRE_STUDY(extra, re, options, error) \
430 if (use_pcre16) \
431 PCRE_STUDY16(extra, re, options, error); \
432 else \
433 PCRE_STUDY8(extra, re, options, error)
434
435 #define SET_PCRE_CALLOUT(callout) \
436 if (use_pcre16) \
437 SET_PCRE_CALLOUT16(callout); \
438 else \
439 SET_PCRE_CALLOUT8(callout)
440
441 /* ----- Only 8-bit mode is supported ----- */
442
443 #elif defined SUPPORT_PCRE8
444 #define CHAR_SIZE 1
445 #define PCHARS PCHARS8
446 #define PCHARSV PCHARSV8
447 #define STRLEN STRLEN8
448 #define PCRE_COMPILE PCRE_COMPILE8
449 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
450 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
451 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
452 #define PCRE_EXEC PCRE_EXEC8
453 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
454 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
455 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
456 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
457 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
458 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
459 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
460 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
461 #define PCRE_STUDY PCRE_STUDY8
462 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
463
464 /* ----- Only 16-bit mode is supported ----- */
465
466 #else
467 #define CHAR_SIZE 1
468 #define PCHARS PCHARS16
469 #define PCHARSV PCHARSV16
470 #define STRLEN STRLEN16
471 #define PCRE_COMPILE PCRE_COMPILE16
472 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
473 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
474 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
475 #define PCRE_EXEC PCRE_EXEC16
476 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
477 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
478 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
479 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
480 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
481 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
482 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
483 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
484 #define PCRE_STUDY PCRE_STUDY16
485 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
486 #endif
487
488 /* ----- End of mode-specific function call macros ----- */
489
490
491 /* Other parameters */
492
493 #ifndef CLOCKS_PER_SEC
494 #ifdef CLK_TCK
495 #define CLOCKS_PER_SEC CLK_TCK
496 #else
497 #define CLOCKS_PER_SEC 100
498 #endif
499 #endif
500
501 /* This is the default loop count for timing. */
502
503 #define LOOPREPEAT 500000
504
505 /* Static variables */
506
507 static FILE *outfile;
508 static int log_store = 0;
509 static int callout_count;
510 static int callout_extra;
511 static int callout_fail_count;
512 static int callout_fail_id;
513 static int debug_lengths;
514 static int first_callout;
515 static int locale_set = 0;
516 static int show_malloc;
517 static int use_utf;
518 static size_t gotten_store;
519 static size_t first_gotten_store = 0;
520 static const unsigned char *last_callout_mark = NULL;
521
522 /* The buffers grow automatically if very long input lines are encountered. */
523
524 static int buffer_size = 50000;
525 static pcre_uint8 *buffer = NULL;
526 static pcre_uint8 *dbuffer = NULL;
527 static pcre_uint8 *pbuffer = NULL;
528
529 /* Another buffer is needed translation to 16-bit character strings. It will
530 obtained and extended as required. */
531
532 #ifdef SUPPORT_PCRE16
533 static int buffer16_size = 0;
534 static pcre_uint16 *buffer16 = NULL;
535
536 /* We need the table of operator lengths that is used for 16-bit compiling, in
537 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
538 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
539 appropriately for the 16-bit world. Just as a safety check, make sure that
540 COMPILE_PCRE16 is *not* set. */
541
542 #ifdef COMPILE_PCRE16
543 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
544 #endif
545
546 #if LINK_SIZE == 2
547 #undef LINK_SIZE
548 #define LINK_SIZE 1
549 #elif LINK_SIZE == 3 || LINK_SIZE == 4
550 #undef LINK_SIZE
551 #define LINK_SIZE 2
552 #else
553 #error LINK_SIZE must be either 2, 3, or 4
554 #endif
555
556 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
557
558 #endif /* SUPPORT_PCRE16 */
559
560 /* If we have 8-bit support, default use_pcre16 to false; if there is also
561 16-bit support, it can be changed by an option. If there is no 8-bit support,
562 there must be 16-bit support, so default it to 1. */
563
564 #ifdef SUPPORT_PCRE8
565 static int use_pcre16 = 0;
566 #else
567 static int use_pcre16 = 1;
568 #endif
569
570 /* Textual explanations for runtime error codes */
571
572 static const char *errtexts[] = {
573 NULL, /* 0 is no error */
574 NULL, /* NOMATCH is handled specially */
575 "NULL argument passed",
576 "bad option value",
577 "magic number missing",
578 "unknown opcode - pattern overwritten?",
579 "no more memory",
580 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
581 "match limit exceeded",
582 "callout error code",
583 NULL, /* BADUTF8 is handled specially */
584 "bad UTF-8 offset",
585 NULL, /* PARTIAL is handled specially */
586 "not used - internal error",
587 "internal error - pattern overwritten?",
588 "bad count value",
589 "item unsupported for DFA matching",
590 "backreference condition or recursion test not supported for DFA matching",
591 "match limit not supported for DFA matching",
592 "workspace size exceeded in DFA matching",
593 "too much recursion for DFA matching",
594 "recursion limit exceeded",
595 "not used - internal error",
596 "invalid combination of newline options",
597 "bad offset value",
598 NULL, /* SHORTUTF8 is handled specially */
599 "nested recursion at the same subject position",
600 "JIT stack limit reached",
601 "pattern compiled in wrong mode (8-bit/16-bit error)"
602 };
603
604
605 /*************************************************
606 * Alternate character tables *
607 *************************************************/
608
609 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
610 using the default tables of the library. However, the T option can be used to
611 select alternate sets of tables, for different kinds of testing. Note also that
612 the L (locale) option also adjusts the tables. */
613
614 /* This is the set of tables distributed as default with PCRE. It recognizes
615 only ASCII characters. */
616
617 static const pcre_uint8 tables0[] = {
618
619 /* This table is a lower casing table. */
620
621 0, 1, 2, 3, 4, 5, 6, 7,
622 8, 9, 10, 11, 12, 13, 14, 15,
623 16, 17, 18, 19, 20, 21, 22, 23,
624 24, 25, 26, 27, 28, 29, 30, 31,
625 32, 33, 34, 35, 36, 37, 38, 39,
626 40, 41, 42, 43, 44, 45, 46, 47,
627 48, 49, 50, 51, 52, 53, 54, 55,
628 56, 57, 58, 59, 60, 61, 62, 63,
629 64, 97, 98, 99,100,101,102,103,
630 104,105,106,107,108,109,110,111,
631 112,113,114,115,116,117,118,119,
632 120,121,122, 91, 92, 93, 94, 95,
633 96, 97, 98, 99,100,101,102,103,
634 104,105,106,107,108,109,110,111,
635 112,113,114,115,116,117,118,119,
636 120,121,122,123,124,125,126,127,
637 128,129,130,131,132,133,134,135,
638 136,137,138,139,140,141,142,143,
639 144,145,146,147,148,149,150,151,
640 152,153,154,155,156,157,158,159,
641 160,161,162,163,164,165,166,167,
642 168,169,170,171,172,173,174,175,
643 176,177,178,179,180,181,182,183,
644 184,185,186,187,188,189,190,191,
645 192,193,194,195,196,197,198,199,
646 200,201,202,203,204,205,206,207,
647 208,209,210,211,212,213,214,215,
648 216,217,218,219,220,221,222,223,
649 224,225,226,227,228,229,230,231,
650 232,233,234,235,236,237,238,239,
651 240,241,242,243,244,245,246,247,
652 248,249,250,251,252,253,254,255,
653
654 /* This table is a case flipping table. */
655
656 0, 1, 2, 3, 4, 5, 6, 7,
657 8, 9, 10, 11, 12, 13, 14, 15,
658 16, 17, 18, 19, 20, 21, 22, 23,
659 24, 25, 26, 27, 28, 29, 30, 31,
660 32, 33, 34, 35, 36, 37, 38, 39,
661 40, 41, 42, 43, 44, 45, 46, 47,
662 48, 49, 50, 51, 52, 53, 54, 55,
663 56, 57, 58, 59, 60, 61, 62, 63,
664 64, 97, 98, 99,100,101,102,103,
665 104,105,106,107,108,109,110,111,
666 112,113,114,115,116,117,118,119,
667 120,121,122, 91, 92, 93, 94, 95,
668 96, 65, 66, 67, 68, 69, 70, 71,
669 72, 73, 74, 75, 76, 77, 78, 79,
670 80, 81, 82, 83, 84, 85, 86, 87,
671 88, 89, 90,123,124,125,126,127,
672 128,129,130,131,132,133,134,135,
673 136,137,138,139,140,141,142,143,
674 144,145,146,147,148,149,150,151,
675 152,153,154,155,156,157,158,159,
676 160,161,162,163,164,165,166,167,
677 168,169,170,171,172,173,174,175,
678 176,177,178,179,180,181,182,183,
679 184,185,186,187,188,189,190,191,
680 192,193,194,195,196,197,198,199,
681 200,201,202,203,204,205,206,207,
682 208,209,210,211,212,213,214,215,
683 216,217,218,219,220,221,222,223,
684 224,225,226,227,228,229,230,231,
685 232,233,234,235,236,237,238,239,
686 240,241,242,243,244,245,246,247,
687 248,249,250,251,252,253,254,255,
688
689 /* This table contains bit maps for various character classes. Each map is 32
690 bytes long and the bits run from the least significant end of each byte. The
691 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
692 graph, print, punct, and cntrl. Other classes are built from combinations. */
693
694 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
695 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
696 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
697 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
698
699 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
700 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
701 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
702 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
703
704 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
705 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
706 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
707 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
708
709 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
710 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
711 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
712 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
713
714 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
715 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
716 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
717 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
718
719 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
720 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
721 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
722 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
723
724 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
725 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
726 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
727 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
728
729 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
730 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
731 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
732 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
733
734 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
735 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
736 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
737 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
738
739 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
740 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
741 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
742 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
743
744 /* This table identifies various classes of character by individual bits:
745 0x01 white space character
746 0x02 letter
747 0x04 decimal digit
748 0x08 hexadecimal digit
749 0x10 alphanumeric or '_'
750 0x80 regular expression metacharacter or binary zero
751 */
752
753 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
754 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
755 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
756 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
757 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
758 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
759 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
760 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
761 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
762 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
763 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
764 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
765 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
766 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
767 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
768 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
769 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
770 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
771 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
772 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
773 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
774 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
775 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
776 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
777 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
778 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
779 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
780 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
781 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
782 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
783 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
784 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
785
786 /* This is a set of tables that came orginally from a Windows user. It seems to
787 be at least an approximation of ISO 8859. In particular, there are characters
788 greater than 128 that are marked as spaces, letters, etc. */
789
790 static const pcre_uint8 tables1[] = {
791 0,1,2,3,4,5,6,7,
792 8,9,10,11,12,13,14,15,
793 16,17,18,19,20,21,22,23,
794 24,25,26,27,28,29,30,31,
795 32,33,34,35,36,37,38,39,
796 40,41,42,43,44,45,46,47,
797 48,49,50,51,52,53,54,55,
798 56,57,58,59,60,61,62,63,
799 64,97,98,99,100,101,102,103,
800 104,105,106,107,108,109,110,111,
801 112,113,114,115,116,117,118,119,
802 120,121,122,91,92,93,94,95,
803 96,97,98,99,100,101,102,103,
804 104,105,106,107,108,109,110,111,
805 112,113,114,115,116,117,118,119,
806 120,121,122,123,124,125,126,127,
807 128,129,130,131,132,133,134,135,
808 136,137,138,139,140,141,142,143,
809 144,145,146,147,148,149,150,151,
810 152,153,154,155,156,157,158,159,
811 160,161,162,163,164,165,166,167,
812 168,169,170,171,172,173,174,175,
813 176,177,178,179,180,181,182,183,
814 184,185,186,187,188,189,190,191,
815 224,225,226,227,228,229,230,231,
816 232,233,234,235,236,237,238,239,
817 240,241,242,243,244,245,246,215,
818 248,249,250,251,252,253,254,223,
819 224,225,226,227,228,229,230,231,
820 232,233,234,235,236,237,238,239,
821 240,241,242,243,244,245,246,247,
822 248,249,250,251,252,253,254,255,
823 0,1,2,3,4,5,6,7,
824 8,9,10,11,12,13,14,15,
825 16,17,18,19,20,21,22,23,
826 24,25,26,27,28,29,30,31,
827 32,33,34,35,36,37,38,39,
828 40,41,42,43,44,45,46,47,
829 48,49,50,51,52,53,54,55,
830 56,57,58,59,60,61,62,63,
831 64,97,98,99,100,101,102,103,
832 104,105,106,107,108,109,110,111,
833 112,113,114,115,116,117,118,119,
834 120,121,122,91,92,93,94,95,
835 96,65,66,67,68,69,70,71,
836 72,73,74,75,76,77,78,79,
837 80,81,82,83,84,85,86,87,
838 88,89,90,123,124,125,126,127,
839 128,129,130,131,132,133,134,135,
840 136,137,138,139,140,141,142,143,
841 144,145,146,147,148,149,150,151,
842 152,153,154,155,156,157,158,159,
843 160,161,162,163,164,165,166,167,
844 168,169,170,171,172,173,174,175,
845 176,177,178,179,180,181,182,183,
846 184,185,186,187,188,189,190,191,
847 224,225,226,227,228,229,230,231,
848 232,233,234,235,236,237,238,239,
849 240,241,242,243,244,245,246,215,
850 248,249,250,251,252,253,254,223,
851 192,193,194,195,196,197,198,199,
852 200,201,202,203,204,205,206,207,
853 208,209,210,211,212,213,214,247,
854 216,217,218,219,220,221,222,255,
855 0,62,0,0,1,0,0,0,
856 0,0,0,0,0,0,0,0,
857 32,0,0,0,1,0,0,0,
858 0,0,0,0,0,0,0,0,
859 0,0,0,0,0,0,255,3,
860 126,0,0,0,126,0,0,0,
861 0,0,0,0,0,0,0,0,
862 0,0,0,0,0,0,0,0,
863 0,0,0,0,0,0,255,3,
864 0,0,0,0,0,0,0,0,
865 0,0,0,0,0,0,12,2,
866 0,0,0,0,0,0,0,0,
867 0,0,0,0,0,0,0,0,
868 254,255,255,7,0,0,0,0,
869 0,0,0,0,0,0,0,0,
870 255,255,127,127,0,0,0,0,
871 0,0,0,0,0,0,0,0,
872 0,0,0,0,254,255,255,7,
873 0,0,0,0,0,4,32,4,
874 0,0,0,128,255,255,127,255,
875 0,0,0,0,0,0,255,3,
876 254,255,255,135,254,255,255,7,
877 0,0,0,0,0,4,44,6,
878 255,255,127,255,255,255,127,255,
879 0,0,0,0,254,255,255,255,
880 255,255,255,255,255,255,255,127,
881 0,0,0,0,254,255,255,255,
882 255,255,255,255,255,255,255,255,
883 0,2,0,0,255,255,255,255,
884 255,255,255,255,255,255,255,127,
885 0,0,0,0,255,255,255,255,
886 255,255,255,255,255,255,255,255,
887 0,0,0,0,254,255,0,252,
888 1,0,0,248,1,0,0,120,
889 0,0,0,0,254,255,255,255,
890 0,0,128,0,0,0,128,0,
891 255,255,255,255,0,0,0,0,
892 0,0,0,0,0,0,0,128,
893 255,255,255,255,0,0,0,0,
894 0,0,0,0,0,0,0,0,
895 128,0,0,0,0,0,0,0,
896 0,1,1,0,1,1,0,0,
897 0,0,0,0,0,0,0,0,
898 0,0,0,0,0,0,0,0,
899 1,0,0,0,128,0,0,0,
900 128,128,128,128,0,0,128,0,
901 28,28,28,28,28,28,28,28,
902 28,28,0,0,0,0,0,128,
903 0,26,26,26,26,26,26,18,
904 18,18,18,18,18,18,18,18,
905 18,18,18,18,18,18,18,18,
906 18,18,18,128,128,0,128,16,
907 0,26,26,26,26,26,26,18,
908 18,18,18,18,18,18,18,18,
909 18,18,18,18,18,18,18,18,
910 18,18,18,128,128,0,0,0,
911 0,0,0,0,0,1,0,0,
912 0,0,0,0,0,0,0,0,
913 0,0,0,0,0,0,0,0,
914 0,0,0,0,0,0,0,0,
915 1,0,0,0,0,0,0,0,
916 0,0,18,0,0,0,0,0,
917 0,0,20,20,0,18,0,0,
918 0,20,18,0,0,0,0,0,
919 18,18,18,18,18,18,18,18,
920 18,18,18,18,18,18,18,18,
921 18,18,18,18,18,18,18,0,
922 18,18,18,18,18,18,18,18,
923 18,18,18,18,18,18,18,18,
924 18,18,18,18,18,18,18,18,
925 18,18,18,18,18,18,18,0,
926 18,18,18,18,18,18,18,18
927 };
928
929
930
931
932 #ifndef HAVE_STRERROR
933 /*************************************************
934 * Provide strerror() for non-ANSI libraries *
935 *************************************************/
936
937 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
938 in their libraries, but can provide the same facility by this simple
939 alternative function. */
940
941 extern int sys_nerr;
942 extern char *sys_errlist[];
943
944 char *
945 strerror(int n)
946 {
947 if (n < 0 || n >= sys_nerr) return "unknown error number";
948 return sys_errlist[n];
949 }
950 #endif /* HAVE_STRERROR */
951
952
953 /*************************************************
954 * JIT memory callback *
955 *************************************************/
956
957 static pcre_jit_stack* jit_callback(void *arg)
958 {
959 return (pcre_jit_stack *)arg;
960 }
961
962
963 /*************************************************
964 * Convert UTF-8 string to value *
965 *************************************************/
966
967 /* This function takes one or more bytes that represents a UTF-8 character,
968 and returns the value of the character.
969
970 Argument:
971 utf8bytes a pointer to the byte vector
972 vptr a pointer to an int to receive the value
973
974 Returns: > 0 => the number of bytes consumed
975 -6 to 0 => malformed UTF-8 character at offset = (-return)
976 */
977
978 #if !defined NOUTF8
979
980 static int
981 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
982 {
983 int c = *utf8bytes++;
984 int d = c;
985 int i, j, s;
986
987 for (i = -1; i < 6; i++) /* i is number of additional bytes */
988 {
989 if ((d & 0x80) == 0) break;
990 d <<= 1;
991 }
992
993 if (i == -1) { *vptr = c; return 1; } /* ascii character */
994 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
995
996 /* i now has a value in the range 1-5 */
997
998 s = 6*i;
999 d = (c & utf8_table3[i]) << s;
1000
1001 for (j = 0; j < i; j++)
1002 {
1003 c = *utf8bytes++;
1004 if ((c & 0xc0) != 0x80) return -(j+1);
1005 s -= 6;
1006 d |= (c & 0x3f) << s;
1007 }
1008
1009 /* Check that encoding was the correct unique one */
1010
1011 for (j = 0; j < utf8_table1_size; j++)
1012 if (d <= utf8_table1[j]) break;
1013 if (j != i) return -(i+1);
1014
1015 /* Valid value */
1016
1017 *vptr = d;
1018 return i+1;
1019 }
1020
1021 #endif
1022
1023
1024
1025 /*************************************************
1026 * Convert character value to UTF-8 *
1027 *************************************************/
1028
1029 /* This function takes an integer value in the range 0 - 0x7fffffff
1030 and encodes it as a UTF-8 character in 0 to 6 bytes.
1031
1032 Arguments:
1033 cvalue the character value
1034 utf8bytes pointer to buffer for result - at least 6 bytes long
1035
1036 Returns: number of characters placed in the buffer
1037 */
1038
1039 #if !defined NOUTF8
1040
1041 static int
1042 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1043 {
1044 register int i, j;
1045 for (i = 0; i < utf8_table1_size; i++)
1046 if (cvalue <= utf8_table1[i]) break;
1047 utf8bytes += i;
1048 for (j = i; j > 0; j--)
1049 {
1050 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1051 cvalue >>= 6;
1052 }
1053 *utf8bytes = utf8_table2[i] | cvalue;
1054 return i + 1;
1055 }
1056
1057 #endif
1058
1059
1060
1061 #ifdef SUPPORT_PCRE16
1062 /*************************************************
1063 * Convert a string to 16-bit *
1064 *************************************************/
1065
1066 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1067 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1068 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1069 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1070 result is always left in buffer16.
1071
1072 Arguments:
1073 p points to a byte string
1074 utf true if UTF-8 (to be converted to UTF-16)
1075 len number of bytes in the string (excluding trailing zero)
1076
1077 Returns: number of 16-bit data items used (excluding trailing zero)
1078 OR -1 if a UTF-8 string is malformed
1079 */
1080
1081 static int
1082 to16(pcre_uint8 *p, int utf, int len)
1083 {
1084 pcre_uint16 *pp;
1085
1086 if (buffer16_size < 2*len + 2)
1087 {
1088 if (buffer16 != NULL) free(buffer16);
1089 buffer16_size = 2*len + 2;
1090 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1091 if (buffer16 == NULL)
1092 {
1093 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1094 exit(1);
1095 }
1096 }
1097
1098 pp = buffer16;
1099
1100 if (!utf)
1101 {
1102 while (len-- > 0) *pp++ = *p++;
1103 }
1104
1105 else
1106 {
1107 int c;
1108 while (len > 0)
1109 {
1110 int chlen = utf82ord(p, &c);
1111 if (chlen <= 0) return -1;
1112 p += chlen;
1113 len -= chlen;
1114 if (c < 0x10000) *pp++ = c; else
1115 {
1116 c -= 0x10000;
1117 *pp++ = 0xD800 | (c >> 10);
1118 *pp++ = 0xDC00 | (c & 0x3ff);
1119 }
1120 }
1121 }
1122
1123 *pp = 0;
1124 return pp - buffer16;
1125 }
1126 #endif
1127
1128
1129 /*************************************************
1130 * Read or extend an input line *
1131 *************************************************/
1132
1133 /* Input lines are read into buffer, but both patterns and data lines can be
1134 continued over multiple input lines. In addition, if the buffer fills up, we
1135 want to automatically expand it so as to be able to handle extremely large
1136 lines that are needed for certain stress tests. When the input buffer is
1137 expanded, the other two buffers must also be expanded likewise, and the
1138 contents of pbuffer, which are a copy of the input for callouts, must be
1139 preserved (for when expansion happens for a data line). This is not the most
1140 optimal way of handling this, but hey, this is just a test program!
1141
1142 Arguments:
1143 f the file to read
1144 start where in buffer to start (this *must* be within buffer)
1145 prompt for stdin or readline()
1146
1147 Returns: pointer to the start of new data
1148 could be a copy of start, or could be moved
1149 NULL if no data read and EOF reached
1150 */
1151
1152 static pcre_uint8 *
1153 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1154 {
1155 pcre_uint8 *here = start;
1156
1157 for (;;)
1158 {
1159 int rlen = (int)(buffer_size - (here - buffer));
1160
1161 if (rlen > 1000)
1162 {
1163 int dlen;
1164
1165 /* If libreadline support is required, use readline() to read a line if the
1166 input is a terminal. Note that readline() removes the trailing newline, so
1167 we must put it back again, to be compatible with fgets(). */
1168
1169 #ifdef SUPPORT_LIBREADLINE
1170 if (isatty(fileno(f)))
1171 {
1172 size_t len;
1173 char *s = readline(prompt);
1174 if (s == NULL) return (here == start)? NULL : start;
1175 len = strlen(s);
1176 if (len > 0) add_history(s);
1177 if (len > rlen - 1) len = rlen - 1;
1178 memcpy(here, s, len);
1179 here[len] = '\n';
1180 here[len+1] = 0;
1181 free(s);
1182 }
1183 else
1184 #endif
1185
1186 /* Read the next line by normal means, prompting if the file is stdin. */
1187
1188 {
1189 if (f == stdin) printf("%s", prompt);
1190 if (fgets((char *)here, rlen, f) == NULL)
1191 return (here == start)? NULL : start;
1192 }
1193
1194 dlen = (int)strlen((char *)here);
1195 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1196 here += dlen;
1197 }
1198
1199 else
1200 {
1201 int new_buffer_size = 2*buffer_size;
1202 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1203 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1204 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1205
1206 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1207 {
1208 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1209 exit(1);
1210 }
1211
1212 memcpy(new_buffer, buffer, buffer_size);
1213 memcpy(new_pbuffer, pbuffer, buffer_size);
1214
1215 buffer_size = new_buffer_size;
1216
1217 start = new_buffer + (start - buffer);
1218 here = new_buffer + (here - buffer);
1219
1220 free(buffer);
1221 free(dbuffer);
1222 free(pbuffer);
1223
1224 buffer = new_buffer;
1225 dbuffer = new_dbuffer;
1226 pbuffer = new_pbuffer;
1227 }
1228 }
1229
1230 return NULL; /* Control never gets here */
1231 }
1232
1233
1234
1235 /*************************************************
1236 * Read number from string *
1237 *************************************************/
1238
1239 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1240 around with conditional compilation, just do the job by hand. It is only used
1241 for unpicking arguments, so just keep it simple.
1242
1243 Arguments:
1244 str string to be converted
1245 endptr where to put the end pointer
1246
1247 Returns: the unsigned long
1248 */
1249
1250 static int
1251 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1252 {
1253 int result = 0;
1254 while(*str != 0 && isspace(*str)) str++;
1255 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1256 *endptr = str;
1257 return(result);
1258 }
1259
1260
1261
1262 /*************************************************
1263 * Print one character *
1264 *************************************************/
1265
1266 /* Print a single character either literally, or as a hex escape. */
1267
1268 static int pchar(int c, FILE *f)
1269 {
1270 if (PRINTOK(c))
1271 {
1272 if (f != NULL) fprintf(f, "%c", c);
1273 return 1;
1274 }
1275
1276 if (c < 0x100)
1277 {
1278 if (use_utf)
1279 {
1280 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1281 return 6;
1282 }
1283 else
1284 {
1285 if (f != NULL) fprintf(f, "\\x%02x", c);
1286 return 4;
1287 }
1288 }
1289
1290 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1291 return (c <= 0x000000ff)? 6 :
1292 (c <= 0x00000fff)? 7 :
1293 (c <= 0x0000ffff)? 8 :
1294 (c <= 0x000fffff)? 9 : 10;
1295 }
1296
1297
1298
1299 #ifdef SUPPORT_PCRE8
1300 /*************************************************
1301 * Print 8-bit character string *
1302 *************************************************/
1303
1304 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1305 If handed a NULL file, just counts chars without printing. */
1306
1307 static int pchars(pcre_uint8 *p, int length, FILE *f)
1308 {
1309 int c = 0;
1310 int yield = 0;
1311
1312 if (length < 0)
1313 length = strlen((char *)p);
1314
1315 while (length-- > 0)
1316 {
1317 #if !defined NOUTF8
1318 if (use_utf)
1319 {
1320 int rc = utf82ord(p, &c);
1321 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1322 {
1323 length -= rc - 1;
1324 p += rc;
1325 yield += pchar(c, f);
1326 continue;
1327 }
1328 }
1329 #endif
1330 c = *p++;
1331 yield += pchar(c, f);
1332 }
1333
1334 return yield;
1335 }
1336 #endif
1337
1338
1339
1340 #ifdef SUPPORT_PCRE16
1341 /*************************************************
1342 * Find length of 0-terminated 16-bit string *
1343 *************************************************/
1344
1345 static int strlen16(PCRE_SPTR16 p)
1346 {
1347 int len = 0;
1348 while (*p++ != 0) len++;
1349 return len;
1350 }
1351
1352
1353
1354 /*************************************************
1355 * Print 16-bit character string *
1356 *************************************************/
1357
1358 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1359 If handed a NULL file, just counts chars without printing. */
1360
1361 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1362 {
1363 int yield = 0;
1364
1365 if (length < 0)
1366 length = strlen16(p);
1367
1368 while (length-- > 0)
1369 {
1370 int c = *p++ & 0xffff;
1371 #if !defined NOUTF8
1372 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1373 {
1374 int d = *p & 0xffff;
1375 if (d >= 0xDC00 && d < 0xDFFF)
1376 {
1377 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1378 length--;
1379 p++;
1380 }
1381 }
1382 #endif
1383 yield += pchar(c, f);
1384 }
1385
1386 return yield;
1387 }
1388 #endif
1389
1390
1391
1392 /*************************************************
1393 * Callout function *
1394 *************************************************/
1395
1396 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1397 the match. Yield zero unless more callouts than the fail count, or the callout
1398 data is not zero. */
1399
1400 static int callout(pcre_callout_block *cb)
1401 {
1402 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1403 int i, pre_start, post_start, subject_length;
1404
1405 if (callout_extra)
1406 {
1407 fprintf(f, "Callout %d: last capture = %d\n",
1408 cb->callout_number, cb->capture_last);
1409
1410 for (i = 0; i < cb->capture_top * 2; i += 2)
1411 {
1412 if (cb->offset_vector[i] < 0)
1413 fprintf(f, "%2d: <unset>\n", i/2);
1414 else
1415 {
1416 fprintf(f, "%2d: ", i/2);
1417 PCHARSV(cb->subject + cb->offset_vector[i],
1418 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1419 fprintf(f, "\n");
1420 }
1421 }
1422 }
1423
1424 /* Re-print the subject in canonical form, the first time or if giving full
1425 datails. On subsequent calls in the same match, we use pchars just to find the
1426 printed lengths of the substrings. */
1427
1428 if (f != NULL) fprintf(f, "--->");
1429
1430 PCHARS(pre_start, cb->subject, cb->start_match, f);
1431 PCHARS(post_start, cb->subject + cb->start_match,
1432 cb->current_position - cb->start_match, f);
1433
1434 PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1435
1436 PCHARSV(cb->subject + cb->current_position,
1437 cb->subject_length - cb->current_position, f);
1438
1439 if (f != NULL) fprintf(f, "\n");
1440
1441 /* Always print appropriate indicators, with callout number if not already
1442 shown. For automatic callouts, show the pattern offset. */
1443
1444 if (cb->callout_number == 255)
1445 {
1446 fprintf(outfile, "%+3d ", cb->pattern_position);
1447 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1448 }
1449 else
1450 {
1451 if (callout_extra) fprintf(outfile, " ");
1452 else fprintf(outfile, "%3d ", cb->callout_number);
1453 }
1454
1455 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1456 fprintf(outfile, "^");
1457
1458 if (post_start > 0)
1459 {
1460 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1461 fprintf(outfile, "^");
1462 }
1463
1464 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1465 fprintf(outfile, " ");
1466
1467 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1468 pbuffer + cb->pattern_position);
1469
1470 fprintf(outfile, "\n");
1471 first_callout = 0;
1472
1473 if (cb->mark != last_callout_mark)
1474 {
1475 fprintf(outfile, "Latest Mark: %s\n",
1476 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1477 last_callout_mark = cb->mark;
1478 }
1479
1480 if (cb->callout_data != NULL)
1481 {
1482 int callout_data = *((int *)(cb->callout_data));
1483 if (callout_data != 0)
1484 {
1485 fprintf(outfile, "Callout data = %d\n", callout_data);
1486 return callout_data;
1487 }
1488 }
1489
1490 return (cb->callout_number != callout_fail_id)? 0 :
1491 (++callout_count >= callout_fail_count)? 1 : 0;
1492 }
1493
1494
1495 /*************************************************
1496 * Local malloc functions *
1497 *************************************************/
1498
1499 /* Alternative malloc function, to test functionality and save the size of a
1500 compiled re, which is the first store request that pcre_compile() makes. The
1501 show_malloc variable is set only during matching. */
1502
1503 static void *new_malloc(size_t size)
1504 {
1505 void *block = malloc(size);
1506 gotten_store = size;
1507 if (first_gotten_store == 0) first_gotten_store = size;
1508 if (show_malloc)
1509 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1510 return block;
1511 }
1512
1513 static void new_free(void *block)
1514 {
1515 if (show_malloc)
1516 fprintf(outfile, "free %p\n", block);
1517 free(block);
1518 }
1519
1520 /* For recursion malloc/free, to test stacking calls */
1521
1522 static void *stack_malloc(size_t size)
1523 {
1524 void *block = malloc(size);
1525 if (show_malloc)
1526 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1527 return block;
1528 }
1529
1530 static void stack_free(void *block)
1531 {
1532 if (show_malloc)
1533 fprintf(outfile, "stack_free %p\n", block);
1534 free(block);
1535 }
1536
1537
1538 /*************************************************
1539 * Call pcre_fullinfo() *
1540 *************************************************/
1541
1542 /* Get one piece of information from the pcre_fullinfo() function. When only
1543 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1544 value, but the code is defensive. */
1545
1546 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1547 {
1548 int rc;
1549
1550 if (use_pcre16)
1551 #ifdef SUPPORT_PCRE16
1552 rc = pcre16_fullinfo(re, study, option, ptr);
1553 #else
1554 rc = PCRE_ERROR_BADMODE;
1555 #endif
1556 else
1557 #ifdef SUPPORT_PCRE8
1558 rc = pcre_fullinfo(re, study, option, ptr);
1559 #else
1560 rc = PCRE_ERROR_BADMODE;
1561 #endif
1562
1563 if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1564 use_pcre16? "16" : "", option);
1565 }
1566
1567
1568
1569 /*************************************************
1570 * Swap byte functions *
1571 *************************************************/
1572
1573 /* The following functions swap the bytes of a pcre_uint16
1574 and pcre_uint32 value.
1575
1576 Arguments:
1577 value any number
1578
1579 Returns: the byte swapped value
1580 */
1581
1582 static pcre_uint32
1583 swap_uint32(pcre_uint32 value)
1584 {
1585 return ((value & 0x000000ff) << 24) |
1586 ((value & 0x0000ff00) << 8) |
1587 ((value & 0x00ff0000) >> 8) |
1588 (value >> 24);
1589 }
1590
1591 static pcre_uint16
1592 swap_uint16(pcre_uint16 value)
1593 {
1594 return (value >> 8) | (value << 8);
1595 }
1596
1597
1598
1599 /*************************************************
1600 * Flip bytes in a compiled pattern *
1601 *************************************************/
1602
1603 /* This function is called if the 'F' option was present on a pattern that is
1604 to be written to a file. We flip the bytes of all the integer fields in the
1605 regex data block and the study block. In 16-bit mode this also flips relevant
1606 bytes in the pattern itself. This is to make it possible to test PCRE's
1607 ability to reload byte-flipped patterns, e.g. those compiled on a different
1608 architecture. */
1609
1610 static void
1611 regexflip(pcre *ere, pcre_extra *extra)
1612 {
1613 real_pcre *re = (real_pcre *)ere;
1614 int op;
1615
1616 #ifdef SUPPORT_PCRE16
1617 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1618 int length = re->name_count * re->name_entry_size;
1619 #ifdef SUPPORT_UTF
1620 BOOL utf = (re->options & PCRE_UTF16) != 0;
1621 BOOL utf16_char = FALSE;
1622 #endif /* SUPPORT_UTF */
1623 #endif /* SUPPORT_PCRE16 */
1624
1625 /* Always flip the bytes in the main data block and study blocks. */
1626
1627 re->magic_number = REVERSED_MAGIC_NUMBER;
1628 re->size = swap_uint32(re->size);
1629 re->options = swap_uint32(re->options);
1630 re->flags = swap_uint16(re->flags);
1631 re->top_bracket = swap_uint16(re->top_bracket);
1632 re->top_backref = swap_uint16(re->top_backref);
1633 re->first_char = swap_uint16(re->first_char);
1634 re->req_char = swap_uint16(re->req_char);
1635 re->name_table_offset = swap_uint16(re->name_table_offset);
1636 re->name_entry_size = swap_uint16(re->name_entry_size);
1637 re->name_count = swap_uint16(re->name_count);
1638
1639 if (extra != NULL)
1640 {
1641 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1642 rsd->size = swap_uint32(rsd->size);
1643 rsd->flags = swap_uint32(rsd->flags);
1644 rsd->minlength = swap_uint32(rsd->minlength);
1645 }
1646
1647 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1648 in the name table, if present, and then in the pattern itself. */
1649
1650 #ifdef SUPPORT_PCRE16
1651 if (!use_pcre16) return;
1652
1653 while(TRUE)
1654 {
1655 /* Swap previous characters. */
1656 while (length-- > 0)
1657 {
1658 *ptr = swap_uint16(*ptr);
1659 ptr++;
1660 }
1661 #ifdef SUPPORT_UTF
1662 if (utf16_char)
1663 {
1664 if ((ptr[-1] & 0xfc00) == 0xd800)
1665 {
1666 /* We know that there is only one extra character in UTF-16. */
1667 *ptr = swap_uint16(*ptr);
1668 ptr++;
1669 }
1670 }
1671 utf16_char = FALSE;
1672 #endif /* SUPPORT_UTF */
1673
1674 /* Get next opcode. */
1675
1676 length = 0;
1677 op = *ptr;
1678 *ptr++ = swap_uint16(op);
1679
1680 switch (op)
1681 {
1682 case OP_END:
1683 return;
1684
1685 #ifdef SUPPORT_UTF
1686 case OP_CHAR:
1687 case OP_CHARI:
1688 case OP_NOT:
1689 case OP_NOTI:
1690 case OP_STAR:
1691 case OP_MINSTAR:
1692 case OP_PLUS:
1693 case OP_MINPLUS:
1694 case OP_QUERY:
1695 case OP_MINQUERY:
1696 case OP_UPTO:
1697 case OP_MINUPTO:
1698 case OP_EXACT:
1699 case OP_POSSTAR:
1700 case OP_POSPLUS:
1701 case OP_POSQUERY:
1702 case OP_POSUPTO:
1703 case OP_STARI:
1704 case OP_MINSTARI:
1705 case OP_PLUSI:
1706 case OP_MINPLUSI:
1707 case OP_QUERYI:
1708 case OP_MINQUERYI:
1709 case OP_UPTOI:
1710 case OP_MINUPTOI:
1711 case OP_EXACTI:
1712 case OP_POSSTARI:
1713 case OP_POSPLUSI:
1714 case OP_POSQUERYI:
1715 case OP_POSUPTOI:
1716 case OP_NOTSTAR:
1717 case OP_NOTMINSTAR:
1718 case OP_NOTPLUS:
1719 case OP_NOTMINPLUS:
1720 case OP_NOTQUERY:
1721 case OP_NOTMINQUERY:
1722 case OP_NOTUPTO:
1723 case OP_NOTMINUPTO:
1724 case OP_NOTEXACT:
1725 case OP_NOTPOSSTAR:
1726 case OP_NOTPOSPLUS:
1727 case OP_NOTPOSQUERY:
1728 case OP_NOTPOSUPTO:
1729 case OP_NOTSTARI:
1730 case OP_NOTMINSTARI:
1731 case OP_NOTPLUSI:
1732 case OP_NOTMINPLUSI:
1733 case OP_NOTQUERYI:
1734 case OP_NOTMINQUERYI:
1735 case OP_NOTUPTOI:
1736 case OP_NOTMINUPTOI:
1737 case OP_NOTEXACTI:
1738 case OP_NOTPOSSTARI:
1739 case OP_NOTPOSPLUSI:
1740 case OP_NOTPOSQUERYI:
1741 case OP_NOTPOSUPTOI:
1742 if (utf) utf16_char = TRUE;
1743 #endif
1744 /* Fall through. */
1745
1746 default:
1747 length = OP_lengths16[op] - 1;
1748 break;
1749
1750 case OP_CLASS:
1751 case OP_NCLASS:
1752 /* Skip the character bit map. */
1753 ptr += 32/sizeof(pcre_uint16);
1754 length = 0;
1755 break;
1756
1757 case OP_XCLASS:
1758 /* Reverse the size of the XCLASS instance. */
1759 ptr++;
1760 *ptr = swap_uint16(*ptr);
1761 if (LINK_SIZE > 1)
1762 {
1763 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1764 ptr++;
1765 *ptr = swap_uint16(*ptr);
1766 }
1767 ptr++;
1768
1769 if (LINK_SIZE > 1)
1770 length = ((ptr[-LINK_SIZE] << 16) | ptr[-LINK_SIZE + 1]) -
1771 (1 + LINK_SIZE + 1);
1772 else
1773 length = ptr[-LINK_SIZE] - (1 + LINK_SIZE + 1);
1774
1775 op = *ptr;
1776 *ptr = swap_uint16(op);
1777 if ((op & XCL_MAP) != 0)
1778 {
1779 /* Skip the character bit map. */
1780 ptr += 32/sizeof(pcre_uint16);
1781 length -= 32/sizeof(pcre_uint16);
1782 }
1783 break;
1784 }
1785 }
1786 /* Control should never reach here in 16 bit mode. */
1787 #endif /* SUPPORT_PCRE16 */
1788 }
1789
1790
1791
1792 /*************************************************
1793 * Check match or recursion limit *
1794 *************************************************/
1795
1796 static int
1797 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1798 int start_offset, int options, int *use_offsets, int use_size_offsets,
1799 int flag, unsigned long int *limit, int errnumber, const char *msg)
1800 {
1801 int count;
1802 int min = 0;
1803 int mid = 64;
1804 int max = -1;
1805
1806 extra->flags |= flag;
1807
1808 for (;;)
1809 {
1810 *limit = mid;
1811
1812 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1813 use_offsets, use_size_offsets);
1814
1815 if (count == errnumber)
1816 {
1817 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1818 min = mid;
1819 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1820 }
1821
1822 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1823 count == PCRE_ERROR_PARTIAL)
1824 {
1825 if (mid == min + 1)
1826 {
1827 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1828 break;
1829 }
1830 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1831 max = mid;
1832 mid = (min + mid)/2;
1833 }
1834 else break; /* Some other error */
1835 }
1836
1837 extra->flags &= ~flag;
1838 return count;
1839 }
1840
1841
1842
1843 /*************************************************
1844 * Case-independent strncmp() function *
1845 *************************************************/
1846
1847 /*
1848 Arguments:
1849 s first string
1850 t second string
1851 n number of characters to compare
1852
1853 Returns: < 0, = 0, or > 0, according to the comparison
1854 */
1855
1856 static int
1857 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1858 {
1859 while (n--)
1860 {
1861 int c = tolower(*s++) - tolower(*t++);
1862 if (c) return c;
1863 }
1864 return 0;
1865 }
1866
1867
1868
1869 /*************************************************
1870 * Check newline indicator *
1871 *************************************************/
1872
1873 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1874 a message and return 0 if there is no match.
1875
1876 Arguments:
1877 p points after the leading '<'
1878 f file for error message
1879
1880 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1881 */
1882
1883 static int
1884 check_newline(pcre_uint8 *p, FILE *f)
1885 {
1886 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1887 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1888 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1889 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1890 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1891 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1892 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1893 fprintf(f, "Unknown newline type at: <%s\n", p);
1894 return 0;
1895 }
1896
1897
1898
1899 /*************************************************
1900 * Usage function *
1901 *************************************************/
1902
1903 static void
1904 usage(void)
1905 {
1906 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1907 printf("Input and output default to stdin and stdout.\n");
1908 #ifdef SUPPORT_LIBREADLINE
1909 printf("If input is a terminal, readline() is used to read from it.\n");
1910 #else
1911 printf("This version of pcretest is not linked with readline().\n");
1912 #endif
1913 printf("\nOptions:\n");
1914 #ifdef SUPPORT_PCRE16
1915 printf(" -16 use 16-bit interface\n");
1916 #endif
1917 printf(" -b show compiled code (bytecode)\n");
1918 printf(" -C show PCRE compile-time options and exit\n");
1919 printf(" -d debug: show compiled code and information (-b and -i)\n");
1920 #if !defined NODFA
1921 printf(" -dfa force DFA matching for all subjects\n");
1922 #endif
1923 printf(" -help show usage information\n");
1924 printf(" -i show information about compiled patterns\n"
1925 " -M find MATCH_LIMIT minimum for each subject\n"
1926 " -m output memory used information\n"
1927 " -o <n> set size of offsets vector to <n>\n");
1928 #if !defined NOPOSIX
1929 printf(" -p use POSIX interface\n");
1930 #endif
1931 printf(" -q quiet: do not output PCRE version number at start\n");
1932 printf(" -S <n> set stack size to <n> megabytes\n");
1933 printf(" -s force each pattern to be studied at basic level\n"
1934 " -s+ force each pattern to be studied, using JIT if available\n"
1935 " -t time compilation and execution\n");
1936 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1937 printf(" -tm time execution (matching) only\n");
1938 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1939 }
1940
1941
1942
1943 /*************************************************
1944 * Main Program *
1945 *************************************************/
1946
1947 /* Read lines from named file or stdin and write to named file or stdout; lines
1948 consist of a regular expression, in delimiters and optionally followed by
1949 options, followed by a set of test data, terminated by an empty line. */
1950
1951 int main(int argc, char **argv)
1952 {
1953 FILE *infile = stdin;
1954 const char *version;
1955 int options = 0;
1956 int study_options = 0;
1957 int default_find_match_limit = FALSE;
1958 int op = 1;
1959 int timeit = 0;
1960 int timeitm = 0;
1961 int showinfo = 0;
1962 int showstore = 0;
1963 int force_study = -1;
1964 int force_study_options = 0;
1965 int quiet = 0;
1966 int size_offsets = 45;
1967 int size_offsets_max;
1968 int *offsets = NULL;
1969 #if !defined NOPOSIX
1970 int posix = 0;
1971 #endif
1972 int debug = 0;
1973 int done = 0;
1974 int all_use_dfa = 0;
1975 int yield = 0;
1976 int stack_size;
1977
1978 pcre_jit_stack *jit_stack = NULL;
1979
1980 /* These vectors store, end-to-end, a list of captured substring names. Assume
1981 that 1024 is plenty long enough for the few names we'll be testing. */
1982
1983 pcre_uchar copynames[1024];
1984 pcre_uchar getnames[1024];
1985
1986 pcre_uchar *copynamesptr;
1987 pcre_uchar *getnamesptr;
1988
1989 /* Get buffers from malloc() so that valgrind will check their misuse when
1990 debugging. They grow automatically when very long lines are read. The 16-bit
1991 buffer (buffer16) is obtained only if needed. */
1992
1993 buffer = (pcre_uint8 *)malloc(buffer_size);
1994 dbuffer = (pcre_uint8 *)malloc(buffer_size);
1995 pbuffer = (pcre_uint8 *)malloc(buffer_size);
1996
1997 /* The outfile variable is static so that new_malloc can use it. */
1998
1999 outfile = stdout;
2000
2001 /* The following _setmode() stuff is some Windows magic that tells its runtime
2002 library to translate CRLF into a single LF character. At least, that's what
2003 I've been told: never having used Windows I take this all on trust. Originally
2004 it set 0x8000, but then I was advised that _O_BINARY was better. */
2005
2006 #if defined(_WIN32) || defined(WIN32)
2007 _setmode( _fileno( stdout ), _O_BINARY );
2008 #endif
2009
2010 /* Get the version number: both pcre_version() and pcre16_version() give the
2011 same answer. We just need to ensure that we call one that is availab.e */
2012
2013 #ifdef SUPPORT_PCRE8
2014 version = pcre_version();
2015 #else
2016 version = pcre16_version();
2017 #endif
2018
2019 /* Scan options */
2020
2021 while (argc > 1 && argv[op][0] == '-')
2022 {
2023 pcre_uint8 *endptr;
2024
2025 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2026 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2027 else if (strcmp(argv[op], "-s+") == 0)
2028 {
2029 force_study = 1;
2030 force_study_options = PCRE_STUDY_JIT_COMPILE;
2031 }
2032 #ifdef SUPPORT_PCRE16
2033 else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
2034 #endif
2035
2036 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2037 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2038 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2039 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2040 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2041 #if !defined NODFA
2042 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2043 #endif
2044 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2045 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2046 *endptr == 0))
2047 {
2048 op++;
2049 argc--;
2050 }
2051 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2052 {
2053 int both = argv[op][2] == 0;
2054 int temp;
2055 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2056 *endptr == 0))
2057 {
2058 timeitm = temp;
2059 op++;
2060 argc--;
2061 }
2062 else timeitm = LOOPREPEAT;
2063 if (both) timeit = timeitm;
2064 }
2065 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2066 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2067 *endptr == 0))
2068 {
2069 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2070 printf("PCRE: -S not supported on this OS\n");
2071 exit(1);
2072 #else
2073 int rc;
2074 struct rlimit rlim;
2075 getrlimit(RLIMIT_STACK, &rlim);
2076 rlim.rlim_cur = stack_size * 1024 * 1024;
2077 rc = setrlimit(RLIMIT_STACK, &rlim);
2078 if (rc != 0)
2079 {
2080 printf("PCRE: setrlimit() failed with error %d\n", rc);
2081 exit(1);
2082 }
2083 op++;
2084 argc--;
2085 #endif
2086 }
2087 #if !defined NOPOSIX
2088 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2089 #endif
2090 else if (strcmp(argv[op], "-C") == 0)
2091 {
2092 int rc;
2093 unsigned long int lrc;
2094 printf("PCRE version %s\n", version);
2095 printf("Compiled with\n");
2096
2097 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2098 are set, either both UTFs are supported or both are not supported. */
2099
2100 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2101 printf(" 8-bit and 16-bit support\n");
2102 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2103 if (rc)
2104 printf(" UTF-8 and UTF-16 support\n");
2105 else
2106 printf(" No UTF-8 or UTF-16 support\n");
2107 #elif defined SUPPORT_PCRE8
2108 printf(" 8-bit support only\n");
2109 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2110 printf(" %sUTF-8 support\n", rc? "" : "No ");
2111 #else
2112 printf(" 16-bit support only\n");
2113 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2114 printf(" %sUTF-16 support\n", rc? "" : "No ");
2115 #endif
2116
2117 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2118 printf(" %sUnicode properties support\n", rc? "" : "No ");
2119 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
2120 if (rc)
2121 printf(" Just-in-time compiler support\n");
2122 else
2123 printf(" No just-in-time compiler support\n");
2124 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
2125 /* Note that these values are always the ASCII values, even
2126 in EBCDIC environments. CR is 13 and NL is 10. */
2127 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2128 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2129 (rc == -2)? "ANYCRLF" :
2130 (rc == -1)? "ANY" : "???");
2131 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
2132 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2133 "all Unicode newlines");
2134 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
2135 printf(" Internal link size = %d\n", rc);
2136 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2137 printf(" POSIX malloc threshold = %d\n", rc);
2138 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2139 printf(" Default match limit = %ld\n", lrc);
2140 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2141 printf(" Default recursion depth limit = %ld\n", lrc);
2142 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
2143 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
2144 goto EXIT;
2145 }
2146 else if (strcmp(argv[op], "-help") == 0 ||
2147 strcmp(argv[op], "--help") == 0)
2148 {
2149 usage();
2150 goto EXIT;
2151 }
2152 else
2153 {
2154 printf("** Unknown or malformed option %s\n", argv[op]);
2155 usage();
2156 yield = 1;
2157 goto EXIT;
2158 }
2159 op++;
2160 argc--;
2161 }
2162
2163 /* Get the store for the offsets vector, and remember what it was */
2164
2165 size_offsets_max = size_offsets;
2166 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2167 if (offsets == NULL)
2168 {
2169 printf("** Failed to get %d bytes of memory for offsets vector\n",
2170 (int)(size_offsets_max * sizeof(int)));
2171 yield = 1;
2172 goto EXIT;
2173 }
2174
2175 /* Sort out the input and output files */
2176
2177 if (argc > 1)
2178 {
2179 infile = fopen(argv[op], INPUT_MODE);
2180 if (infile == NULL)
2181 {
2182 printf("** Failed to open %s\n", argv[op]);
2183 yield = 1;
2184 goto EXIT;
2185 }
2186 }
2187
2188 if (argc > 2)
2189 {
2190 outfile = fopen(argv[op+1], OUTPUT_MODE);
2191 if (outfile == NULL)
2192 {
2193 printf("** Failed to open %s\n", argv[op+1]);
2194 yield = 1;
2195 goto EXIT;
2196 }
2197 }
2198
2199 /* Set alternative malloc function */
2200
2201 #ifdef SUPPORT_PCRE8
2202 pcre_malloc = new_malloc;
2203 pcre_free = new_free;
2204 pcre_stack_malloc = stack_malloc;
2205 pcre_stack_free = stack_free;
2206 #endif
2207
2208 #ifdef SUPPORT_PCRE16
2209 pcre16_malloc = new_malloc;
2210 pcre16_free = new_free;
2211 pcre16_stack_malloc = stack_malloc;
2212 pcre16_stack_free = stack_free;
2213 #endif
2214
2215 /* Heading line unless quiet, then prompt for first regex if stdin */
2216
2217 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2218
2219 /* Main loop */
2220
2221 while (!done)
2222 {
2223 pcre *re = NULL;
2224 pcre_extra *extra = NULL;
2225
2226 #if !defined NOPOSIX /* There are still compilers that require no indent */
2227 regex_t preg;
2228 int do_posix = 0;
2229 #endif
2230
2231 const char *error;
2232 pcre_uint8 *markptr;
2233 pcre_uint8 *p, *pp, *ppp;
2234 pcre_uint8 *to_file = NULL;
2235 const pcre_uint8 *tables = NULL;
2236 unsigned long int true_size, true_study_size = 0;
2237 size_t size, regex_gotten_store;
2238 int do_allcaps = 0;
2239 int do_mark = 0;
2240 int do_study = 0;
2241 int no_force_study = 0;
2242 int do_debug = debug;
2243 int do_G = 0;
2244 int do_g = 0;
2245 int do_showinfo = showinfo;
2246 int do_showrest = 0;
2247 int do_showcaprest = 0;
2248 int do_flip = 0;
2249 int erroroffset, len, delimiter, poffset;
2250
2251 use_utf = 0;
2252 debug_lengths = 1;
2253
2254 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2255 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2256 fflush(outfile);
2257
2258 p = buffer;
2259 while (isspace(*p)) p++;
2260 if (*p == 0) continue;
2261
2262 /* See if the pattern is to be loaded pre-compiled from a file. */
2263
2264 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2265 {
2266 unsigned long int magic, get_options;
2267 pcre_uint8 sbuf[8];
2268 FILE *f;
2269
2270 p++;
2271 pp = p + (int)strlen((char *)p);
2272 while (isspace(pp[-1])) pp--;
2273 *pp = 0;
2274
2275 f = fopen((char *)p, "rb");
2276 if (f == NULL)
2277 {
2278 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2279 continue;
2280 }
2281
2282 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2283
2284 true_size =
2285 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2286 true_study_size =
2287 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2288
2289 re = (real_pcre *)new_malloc(true_size);
2290 regex_gotten_store = first_gotten_store;
2291
2292 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2293
2294 magic = ((real_pcre *)re)->magic_number;
2295 if (magic != MAGIC_NUMBER)
2296 {
2297 if (swap_uint32(magic) == MAGIC_NUMBER)
2298 {
2299 do_flip = 1;
2300 }
2301 else
2302 {
2303 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2304 fclose(f);
2305 continue;
2306 }
2307 }
2308
2309 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2310 do_flip? " (byte-inverted)" : "", p);
2311
2312 /* Now see if there is any following study data. */
2313
2314 if (true_study_size != 0)
2315 {
2316 pcre_study_data *psd;
2317
2318 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2319 extra->flags = PCRE_EXTRA_STUDY_DATA;
2320
2321 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2322 extra->study_data = psd;
2323
2324 if (fread(psd, 1, true_study_size, f) != true_study_size)
2325 {
2326 FAIL_READ:
2327 fprintf(outfile, "Failed to read data from %s\n", p);
2328 if (extra != NULL)
2329 {
2330 PCRE_FREE_STUDY(extra);
2331 }
2332 if (re != NULL) new_free(re);
2333 fclose(f);
2334 continue;
2335 }
2336 fprintf(outfile, "Study data loaded from %s\n", p);
2337 do_study = 1; /* To get the data output if requested */
2338 }
2339 else fprintf(outfile, "No study data\n");
2340
2341 /* Flip the necessary bytes. */
2342 if (do_flip)
2343 {
2344 PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
2345 }
2346
2347 /* Need to know if UTF-8 for printing data strings */
2348
2349 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2350 use_utf = (get_options & PCRE_UTF8) != 0;
2351
2352 fclose(f);
2353 goto SHOW_INFO;
2354 }
2355
2356 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2357 the pattern; if it isn't complete, read more. */
2358
2359 delimiter = *p++;
2360
2361 if (isalnum(delimiter) || delimiter == '\\')
2362 {
2363 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2364 goto SKIP_DATA;
2365 }
2366
2367 pp = p;
2368 poffset = (int)(p - buffer);
2369
2370 for(;;)
2371 {
2372 while (*pp != 0)
2373 {
2374 if (*pp == '\\' && pp[1] != 0) pp++;
2375 else if (*pp == delimiter) break;
2376 pp++;
2377 }
2378 if (*pp != 0) break;
2379 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2380 {
2381 fprintf(outfile, "** Unexpected EOF\n");
2382 done = 1;
2383 goto CONTINUE;
2384 }
2385 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2386 }
2387
2388 /* The buffer may have moved while being extended; reset the start of data
2389 pointer to the correct relative point in the buffer. */
2390
2391 p = buffer + poffset;
2392
2393 /* If the first character after the delimiter is backslash, make
2394 the pattern end with backslash. This is purely to provide a way
2395 of testing for the error message when a pattern ends with backslash. */
2396
2397 if (pp[1] == '\\') *pp++ = '\\';
2398
2399 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2400 for callouts. */
2401
2402 *pp++ = 0;
2403 strcpy((char *)pbuffer, (char *)p);
2404
2405 /* Look for options after final delimiter */
2406
2407 options = 0;
2408 study_options = 0;
2409 log_store = showstore; /* default from command line */
2410
2411 while (*pp != 0)
2412 {
2413 switch (*pp++)
2414 {
2415 case 'f': options |= PCRE_FIRSTLINE; break;
2416 case 'g': do_g = 1; break;
2417 case 'i': options |= PCRE_CASELESS; break;
2418 case 'm': options |= PCRE_MULTILINE; break;
2419 case 's': options |= PCRE_DOTALL; break;
2420 case 'x': options |= PCRE_EXTENDED; break;
2421
2422 case '+':
2423 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2424 break;
2425
2426 case '=': do_allcaps = 1; break;
2427 case 'A': options |= PCRE_ANCHORED; break;
2428 case 'B': do_debug = 1; break;
2429 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2430 case 'D': do_debug = do_showinfo = 1; break;
2431 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2432 case 'F': do_flip = 1; break;
2433 case 'G': do_G = 1; break;
2434 case 'I': do_showinfo = 1; break;
2435 case 'J': options |= PCRE_DUPNAMES; break;
2436 case 'K': do_mark = 1; break;
2437 case 'M': log_store = 1; break;
2438 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2439
2440 #if !defined NOPOSIX
2441 case 'P': do_posix = 1; break;
2442 #endif
2443
2444 case 'S':
2445 if (do_study == 0)
2446 {
2447 do_study = 1;
2448 if (*pp == '+')
2449 {
2450 study_options |= PCRE_STUDY_JIT_COMPILE;
2451 pp++;
2452 }
2453 }
2454 else
2455 {
2456 do_study = 0;
2457 no_force_study = 1;
2458 }
2459 break;
2460
2461 case 'U': options |= PCRE_UNGREEDY; break;
2462 case 'W': options |= PCRE_UCP; break;
2463 case 'X': options |= PCRE_EXTRA; break;
2464 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2465 case 'Z': debug_lengths = 0; break;
2466 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2467 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2468
2469 case 'T':
2470 switch (*pp++)
2471 {
2472 case '0': tables = tables0; break;
2473 case '1': tables = tables1; break;
2474
2475 case '\r':
2476 case '\n':
2477 case ' ':
2478 case 0:
2479 fprintf(outfile, "** Missing table number after /T\n");
2480 goto SKIP_DATA;
2481
2482 default:
2483 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2484 goto SKIP_DATA;
2485 }
2486 break;
2487
2488 case 'L':
2489 ppp = pp;
2490 /* The '\r' test here is so that it works on Windows. */
2491 /* The '0' test is just in case this is an unterminated line. */
2492 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2493 *ppp = 0;
2494 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2495 {
2496 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2497 goto SKIP_DATA;
2498 }
2499 locale_set = 1;
2500 tables = pcre_maketables();
2501 pp = ppp;
2502 break;
2503
2504 case '>':
2505 to_file = pp;
2506 while (*pp != 0) pp++;
2507 while (isspace(pp[-1])) pp--;
2508 *pp = 0;
2509 break;
2510
2511 case '<':
2512 {
2513 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2514 {
2515 options |= PCRE_JAVASCRIPT_COMPAT;
2516 pp += 3;
2517 }
2518 else
2519 {
2520 int x = check_newline(pp, outfile);
2521 if (x == 0) goto SKIP_DATA;
2522 options |= x;
2523 while (*pp++ != '>');
2524 }
2525 }
2526 break;
2527
2528 case '\r': /* So that it works in Windows */
2529 case '\n':
2530 case ' ':
2531 break;
2532
2533 default:
2534 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2535 goto SKIP_DATA;
2536 }
2537 }
2538
2539 /* Handle compiling via the POSIX interface, which doesn't support the
2540 timing, showing, or debugging options, nor the ability to pass over
2541 local character tables. Neither does it have 16-bit support. */
2542
2543 #if !defined NOPOSIX
2544 if (posix || do_posix)
2545 {
2546 int rc;
2547 int cflags = 0;
2548
2549 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2550 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2551 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2552 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2553 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2554 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2555 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2556
2557 first_gotten_store = 0;
2558 rc = regcomp(&preg, (char *)p, cflags);
2559
2560 /* Compilation failed; go back for another re, skipping to blank line
2561 if non-interactive. */
2562
2563 if (rc != 0)
2564 {
2565 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2566 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2567 goto SKIP_DATA;
2568 }
2569 }
2570
2571 /* Handle compiling via the native interface */
2572
2573 else
2574 #endif /* !defined NOPOSIX */
2575
2576 {
2577 unsigned long int get_options;
2578
2579 /* In 16-bit mode, convert the input. */
2580
2581 #ifdef SUPPORT_PCRE16
2582 if (use_pcre16)
2583 {
2584 if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2585 {
2586 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2587 "converted to UTF-16\n");
2588 goto SKIP_DATA;
2589 }
2590 p = (pcre_uint8 *)buffer16;
2591 }
2592 #endif
2593
2594 /* Compile many times when timing */
2595
2596 if (timeit > 0)
2597 {
2598 register int i;
2599 clock_t time_taken;
2600 clock_t start_time = clock();
2601 for (i = 0; i < timeit; i++)
2602 {
2603 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2604 if (re != NULL) free(re);
2605 }
2606 time_taken = clock() - start_time;
2607 fprintf(outfile, "Compile time %.4f milliseconds\n",
2608 (((double)time_taken * 1000.0) / (double)timeit) /
2609 (double)CLOCKS_PER_SEC);
2610 }
2611
2612 first_gotten_store = 0;
2613 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2614
2615 /* Compilation failed; go back for another re, skipping to blank line
2616 if non-interactive. */
2617
2618 if (re == NULL)
2619 {
2620 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2621 SKIP_DATA:
2622 if (infile != stdin)
2623 {
2624 for (;;)
2625 {
2626 if (extend_inputline(infile, buffer, NULL) == NULL)
2627 {
2628 done = 1;
2629 goto CONTINUE;
2630 }
2631 len = (int)strlen((char *)buffer);
2632 while (len > 0 && isspace(buffer[len-1])) len--;
2633 if (len == 0) break;
2634 }
2635 fprintf(outfile, "\n");
2636 }
2637 goto CONTINUE;
2638 }
2639
2640 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2641 within the regex; check for this so that we know how to process the data
2642 lines. */
2643
2644 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2645 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2646
2647 /* Extract the size for possible writing before possibly flipping it,
2648 and remember the store that was got. */
2649
2650 true_size = ((real_pcre *)re)->size;
2651 regex_gotten_store = first_gotten_store;
2652
2653 /* Output code size information if requested */
2654
2655 if (log_store)
2656 fprintf(outfile, "Memory allocation (code space): %d\n",
2657 (int)(first_gotten_store -
2658 sizeof(real_pcre) -
2659 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2660
2661 /* If -s or /S was present, study the regex to generate additional info to
2662 help with the matching, unless the pattern has the SS option, which
2663 suppresses the effect of /S (used for a few test patterns where studying is
2664 never sensible). */
2665
2666 if (do_study || (force_study >= 0 && !no_force_study))
2667 {
2668 if (timeit > 0)
2669 {
2670 register int i;
2671 clock_t time_taken;
2672 clock_t start_time = clock();
2673 for (i = 0; i < timeit; i++)
2674 {
2675 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2676 }
2677 time_taken = clock() - start_time;
2678 if (extra != NULL)
2679 {
2680 PCRE_FREE_STUDY(extra);
2681 }
2682 fprintf(outfile, " Study time %.4f milliseconds\n",
2683 (((double)time_taken * 1000.0) / (double)timeit) /
2684 (double)CLOCKS_PER_SEC);
2685 }
2686 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2687 if (error != NULL)
2688 fprintf(outfile, "Failed to study: %s\n", error);
2689 else if (extra != NULL)
2690 {
2691 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2692 if (log_store)
2693 {
2694 size_t jitsize;
2695 new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2696 if (jitsize != 0)
2697 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
2698 }
2699 }
2700 }
2701
2702 /* If /K was present, we set up for handling MARK data. */
2703
2704 if (do_mark)
2705 {
2706 if (extra == NULL)
2707 {
2708 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2709 extra->flags = 0;
2710 }
2711 extra->mark = &markptr;
2712 extra->flags |= PCRE_EXTRA_MARK;
2713 }
2714
2715 /* Extract and display information from the compiled data if required. */
2716
2717 SHOW_INFO:
2718
2719 if (do_debug)
2720 {
2721 fprintf(outfile, "------------------------------------------------------------------\n");
2722 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2723 if (use_pcre16)
2724 pcre16_printint(re, outfile, debug_lengths);
2725 else
2726 pcre_printint(re, outfile, debug_lengths);
2727 #elif defined SUPPORT_PCRE8
2728 pcre_printint(re, outfile, debug_lengths);
2729 #else
2730 pcre16_printint(re, outfile, debug_lengths);
2731 #endif
2732 }
2733
2734 /* We already have the options in get_options (see above) */
2735
2736 if (do_showinfo)
2737 {
2738 unsigned long int all_options;
2739 #if !defined NOINFOCHECK
2740 int old_first_char, old_options, old_count;
2741 #endif
2742 int count, backrefmax, first_char, need_char, okpartial, jchanged,
2743 hascrorlf;
2744 int nameentrysize, namecount;
2745 const pcre_uchar *nametable;
2746
2747 new_info(re, NULL, PCRE_INFO_SIZE, &size);
2748 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2749 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
2750 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
2751 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
2752 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2753 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2754 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2755 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2756 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2757 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2758
2759 /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2760 that it gives the same results as the new function. */
2761
2762 #if !defined NOINFOCHECK
2763 if (!use_pcre16)
2764 {
2765 old_count = pcre_info(re, &old_options, &old_first_char);
2766 if (count < 0) fprintf(outfile,
2767 "Error %d from pcre_info()\n", count);
2768 else
2769 {
2770 if (old_count != count) fprintf(outfile,
2771 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2772 old_count);
2773
2774 if (old_first_char != first_char) fprintf(outfile,
2775 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2776 first_char, old_first_char);
2777
2778 if (old_options != (int)get_options) fprintf(outfile,
2779 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2780 get_options, old_options);
2781 }
2782 }
2783 #endif
2784
2785 if (size != regex_gotten_store) fprintf(outfile,
2786 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2787 (int)size, (int)regex_gotten_store);
2788
2789 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2790 if (backrefmax > 0)
2791 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2792
2793 if (namecount > 0)
2794 {
2795 fprintf(outfile, "Named capturing subpatterns:\n");
2796 while (namecount-- > 0)
2797 {
2798 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2799 int imm2_size = use_pcre16 ? 1 : 2;
2800 #else
2801 int imm2_size = IMM2_SIZE;
2802 #endif
2803 int length = (int)STRLEN(nametable + imm2_size);
2804 fprintf(outfile, " ");
2805 PCHARSV(nametable + imm2_size, length, outfile);
2806 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
2807 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2808 fprintf(outfile, "%3d\n", use_pcre16?
2809 (int)nametable[0] : ((int)nametable[0] << 8) | (int)nametable[1]);
2810 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
2811 #else
2812 fprintf(outfile, "%3d\n", GET2(nametable, 0));
2813 nametable += nameentrysize;
2814 #endif
2815 }
2816 }
2817
2818 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2819 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2820
2821 all_options = ((real_pcre *)re)->options;
2822 if (do_flip) all_options = swap_uint32(all_options);
2823
2824 if (get_options == 0) fprintf(outfile, "No options\n");
2825 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2826 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2827 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2828 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2829 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2830 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2831 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2832 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2833 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2834 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2835 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2836 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2837 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2838 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2839 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2840 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2841 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2842 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2843
2844 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2845
2846 switch (get_options & PCRE_NEWLINE_BITS)
2847 {
2848 case PCRE_NEWLINE_CR:
2849 fprintf(outfile, "Forced newline sequence: CR\n");
2850 break;
2851
2852 case PCRE_NEWLINE_LF:
2853 fprintf(outfile, "Forced newline sequence: LF\n");
2854 break;
2855
2856 case PCRE_NEWLINE_CRLF:
2857 fprintf(outfile, "Forced newline sequence: CRLF\n");
2858 break;
2859
2860 case PCRE_NEWLINE_ANYCRLF:
2861 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2862 break;
2863
2864 case PCRE_NEWLINE_ANY:
2865 fprintf(outfile, "Forced newline sequence: ANY\n");
2866 break;
2867
2868 default:
2869 break;
2870 }
2871
2872 if (first_char == -1)
2873 {
2874 fprintf(outfile, "First char at start or follows newline\n");
2875 }
2876 else if (first_char < 0)
2877 {
2878 fprintf(outfile, "No first char\n");
2879 }
2880 else
2881 {
2882 const char *caseless =
2883 ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2884 "" : " (caseless)";
2885
2886 if (PRINTOK(first_char))
2887 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2888 else
2889 {
2890 fprintf(outfile, "First char = ");
2891 pchar(first_char, outfile);
2892 fprintf(outfile, "%s\n", caseless);
2893 }
2894 }
2895
2896 if (need_char < 0)
2897 {
2898 fprintf(outfile, "No need char\n");
2899 }
2900 else
2901 {
2902 const char *caseless =
2903 ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2904 "" : " (caseless)";
2905
2906 if (PRINTOK(need_char))
2907 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2908 else
2909 {
2910 fprintf(outfile, "Need char = ");
2911 pchar(need_char, outfile);
2912 fprintf(outfile, "%s\n", caseless);
2913 }
2914 }
2915
2916 /* Don't output study size; at present it is in any case a fixed
2917 value, but it varies, depending on the computer architecture, and
2918 so messes up the test suite. (And with the /F option, it might be
2919 flipped.) If study was forced by an external -s, don't show this
2920 information unless -i or -d was also present. This means that, except
2921 when auto-callouts are involved, the output from runs with and without
2922 -s should be identical. */
2923
2924 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2925 {
2926 if (extra == NULL)
2927 fprintf(outfile, "Study returned NULL\n");
2928 else
2929 {
2930 pcre_uint8 *start_bits = NULL;
2931 int minlength;
2932
2933 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2934 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2935
2936 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2937 if (start_bits == NULL)
2938 fprintf(outfile, "No set of starting bytes\n");
2939 else
2940 {
2941 int i;
2942 int c = 24;
2943 fprintf(outfile, "Starting byte set: ");
2944 for (i = 0; i < 256; i++)
2945 {
2946 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2947 {
2948 if (c > 75)
2949 {
2950 fprintf(outfile, "\n ");
2951 c = 2;
2952 }
2953 if (PRINTOK(i) && i != ' ')
2954 {
2955 fprintf(outfile, "%c ", i);
2956 c += 2;
2957 }
2958 else
2959 {
2960 fprintf(outfile, "\\x%02x ", i);
2961 c += 5;
2962 }
2963 }
2964 }
2965 fprintf(outfile, "\n");
2966 }
2967 }
2968
2969 /* Show this only if the JIT was set by /S, not by -s. */
2970
2971 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2972 {
2973 int jit;
2974 new_info(re, extra, PCRE_INFO_JIT, &jit);
2975 if (jit)
2976 fprintf(outfile, "JIT study was successful\n");
2977 else
2978 #ifdef SUPPORT_JIT
2979 fprintf(outfile, "JIT study was not successful\n");
2980 #else
2981 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2982 #endif
2983 }
2984 }
2985 }
2986
2987 /* If the '>' option was present, we write out the regex to a file, and
2988 that is all. The first 8 bytes of the file are the regex length and then
2989 the study length, in big-endian order. */
2990
2991 if (to_file != NULL)
2992 {
2993 FILE *f = fopen((char *)to_file, "wb");
2994 if (f == NULL)
2995 {
2996 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2997 }
2998 else
2999 {
3000 pcre_uint8 sbuf[8];
3001
3002 if (do_flip) regexflip(re, extra);
3003 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3004 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3005 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3006 sbuf[3] = (pcre_uint8)((true_size) & 255);
3007 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3008 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3009 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3010 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3011
3012 if (fwrite(sbuf, 1, 8, f) < 8 ||
3013 fwrite(re, 1, true_size, f) < true_size)
3014 {
3015 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3016 }
3017 else
3018 {
3019 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3020
3021 /* If there is study data, write it. */
3022
3023 if (extra != NULL)
3024 {
3025 if (fwrite(extra->study_data, 1, true_study_size, f) <
3026 true_study_size)
3027 {
3028 fprintf(outfile, "Write error on %s: %s\n", to_file,
3029 strerror(errno));
3030 }
3031 else fprintf(outfile, "Study data written to %s\n", to_file);
3032 }
3033 }
3034 fclose(f);
3035 }
3036
3037 new_free(re);
3038 if (extra != NULL)
3039 {
3040 PCRE_FREE_STUDY(extra);
3041 }
3042 if (locale_set)
3043 {
3044 new_free((void *)tables);
3045 setlocale(LC_CTYPE, "C");
3046 locale_set = 0;
3047 }
3048 continue; /* With next regex */
3049 }
3050 } /* End of non-POSIX compile */
3051
3052 /* Read data lines and test them */
3053
3054 for (;;)
3055 {
3056 pcre_uint8 *q;
3057 pcre_uint8 *bptr;
3058 int *use_offsets = offsets;
3059 int use_size_offsets = size_offsets;
3060 int callout_data = 0;
3061 int callout_data_set = 0;
3062 int count, c;
3063 int copystrings = 0;
3064 int find_match_limit = default_find_match_limit;
3065 int getstrings = 0;
3066 int getlist = 0;
3067 int gmatched = 0;
3068 int start_offset = 0;
3069 int start_offset_sign = 1;
3070 int g_notempty = 0;
3071 int use_dfa = 0;
3072
3073 options = 0;
3074
3075 *copynames = 0;
3076 *getnames = 0;
3077
3078 copynamesptr = copynames;
3079 getnamesptr = getnames;
3080
3081 SET_PCRE_CALLOUT(callout);
3082 first_callout = 1;
3083 last_callout_mark = NULL;
3084 callout_extra = 0;
3085 callout_count = 0;
3086 callout_fail_count = 999999;
3087 callout_fail_id = -1;
3088 show_malloc = 0;
3089
3090 if (extra != NULL) extra->flags &=
3091 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3092
3093 len = 0;
3094 for (;;)
3095 {
3096 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3097 {
3098 if (len > 0) /* Reached EOF without hitting a newline */
3099 {
3100 fprintf(outfile, "\n");
3101 break;
3102 }
3103 done = 1;
3104 goto CONTINUE;
3105 }
3106 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3107 len = (int)strlen((char *)buffer);
3108 if (buffer[len-1] == '\n') break;
3109 }
3110
3111 while (len > 0 && isspace(buffer[len-1])) len--;
3112 buffer[len] = 0;
3113 if (len == 0) break;
3114
3115 p = buffer;
3116 while (isspace(*p)) p++;
3117
3118 bptr = q = dbuffer;
3119 while ((c = *p++) != 0)
3120 {
3121 int i = 0;
3122 int n = 0;
3123
3124 if (c == '\\') switch ((c = *p++))
3125 {
3126 case 'a': c = 7; break;
3127 case 'b': c = '\b'; break;
3128 case 'e': c = 27; break;
3129 case 'f': c = '\f'; break;
3130 case 'n': c = '\n'; break;
3131 case 'r': c = '\r'; break;
3132 case 't': c = '\t'; break;
3133 case 'v': c = '\v'; break;
3134
3135 case '0': case '1': case '2': case '3':
3136 case '4': case '5': case '6': case '7':
3137 c -= '0';
3138 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3139 c = c * 8 + *p++ - '0';
3140
3141 #if !defined NOUTF8
3142 if (use_utf && c > 255)
3143 {
3144 pcre_uint8 buff8[8];
3145 int ii, utn;
3146 utn = ord2utf8(c, buff8);
3147 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
3148 c = buff8[ii]; /* Last byte */
3149 }
3150 #endif
3151 break;
3152
3153 case 'x':
3154
3155 /* Handle \x{..} specially - new Perl thing for utf8 */
3156
3157 #if !defined NOUTF8
3158 if (*p == '{')
3159 {
3160 pcre_uint8 *pt = p;
3161 c = 0;
3162
3163 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3164 when isxdigit() is a macro that refers to its argument more than
3165 once. This is banned by the C Standard, but apparently happens in at
3166 least one MacOS environment. */
3167
3168 for (pt++; isxdigit(*pt); pt++)
3169 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3170 if (*pt == '}')
3171 {
3172 pcre_uint8 buff8[8];
3173 int ii, utn;
3174 if (use_utf)
3175 {
3176 utn = ord2utf8(c, buff8);
3177 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
3178 c = buff8[ii]; /* Last byte */
3179 }
3180 else
3181 {
3182 if (c > 255)
3183 {
3184 if (use_pcre16)
3185 fprintf(outfile, "** Character \\x{%x} is greater than 255.\n"
3186 "** Because its input is first processed as 8-bit, pcretest "
3187 "does not\n** support such characters in 16-bit mode when "
3188 "UTF-16 is not set.\n", c);
3189 else
3190 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3191 "and UTF-8 mode is not enabled.\n", c);
3192
3193 fprintf(outfile, "** Truncation will probably give the wrong "
3194 "result.\n");
3195 }
3196 }
3197 p = pt + 1;
3198 break;
3199 }
3200 /* Not correct form; fall through */
3201 }
3202 #endif
3203
3204 /* Ordinary \x */
3205
3206 c = 0;
3207 while (i++ < 2 && isxdigit(*p))
3208 {
3209 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3210 p++;
3211 }
3212 break;
3213
3214 case 0: /* \ followed by EOF allows for an empty line */
3215 p--;
3216 continue;
3217
3218 case '>':
3219 if (*p == '-')
3220 {
3221 start_offset_sign = -1;
3222 p++;
3223 }
3224 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3225 start_offset *= start_offset_sign;
3226 continue;
3227
3228 case 'A': /* Option setting */
3229 options |= PCRE_ANCHORED;
3230 continue;
3231
3232 case 'B':
3233 options |= PCRE_NOTBOL;
3234 continue;
3235
3236 case 'C':
3237 if (isdigit(*p)) /* Set copy string */
3238 {
3239 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3240 copystrings |= 1 << n;
3241 }
3242 else if (isalnum(*p))
3243 {
3244 pcre_uchar *namestart = copynamesptr;
3245 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3246 if (use_pcre16)
3247 {
3248 PCRE_SCHAR16 *npp = (PCRE_SCHAR16 *)copynamesptr;
3249 while (isalnum(*p)) *npp++ = *p++;
3250 *npp++ = 0;
3251 *npp = 0;
3252 PCRE_GET_STRINGNUMBER(n, re, copynamesptr);
3253 copynamesptr = (pcre_uchar *)npp;
3254 }
3255 else
3256 {
3257 #endif
3258 pcre_uchar *npp = copynamesptr;
3259 while (isalnum(*p)) *npp++ = *p++;
3260 *npp++ = 0;
3261 *npp = 0;
3262 PCRE_GET_STRINGNUMBER(n, re, copynamesptr);
3263 copynamesptr = npp;
3264 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3265 }
3266 #endif
3267 if (n < 0)
3268 {
3269 fprintf(outfile, "no parentheses with name \"");
3270 PCHARSV(namestart, -1, outfile);
3271 fprintf(outfile, "\"\n");
3272 }
3273 }
3274 else if (*p == '+')
3275 {
3276 callout_extra = 1;
3277 p++;
3278 }
3279 else if (*p == '-')
3280 {
3281 SET_PCRE_CALLOUT(NULL);
3282 p++;
3283 }
3284 else if (*p == '!')
3285 {
3286 callout_fail_id = 0;
3287 p++;
3288 while(isdigit(*p))
3289 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3290 callout_fail_count = 0;
3291 if (*p == '!')
3292 {
3293 p++;
3294 while(isdigit(*p))
3295 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3296 }
3297 }
3298 else if (*p == '*')
3299 {
3300 int sign = 1;
3301 callout_data = 0;
3302 if (*(++p) == '-') { sign = -1; p++; }
3303 while(isdigit(*p))
3304 callout_data = callout_data * 10 + *p++ - '0';
3305 callout_data *= sign;
3306 callout_data_set = 1;
3307 }
3308 continue;
3309
3310 #if !defined NODFA
3311 case 'D':
3312 #if !defined NOPOSIX
3313 if (posix || do_posix)
3314 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3315 else
3316 #endif
3317 use_dfa = 1;
3318 continue;
3319 #endif
3320
3321 #if !defined NODFA
3322 case 'F':
3323 options |= PCRE_DFA_SHORTEST;
3324 continue;
3325 #endif
3326
3327 case 'G':
3328 if (isdigit(*p))
3329 {
3330 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3331 getstrings |= 1 << n;
3332 }
3333 else if (isalnum(*p))
3334 {
3335 pcre_uchar *namestart = getnamesptr;
3336 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3337 if (use_pcre16)
3338 {
3339 PCRE_SCHAR16 *npp = (PCRE_SCHAR16 *)getnamesptr;
3340 while (isalnum(*p)) *npp++ = *p++;
3341 *npp++ = 0;
3342 *npp = 0;
3343 PCRE_GET_STRINGNUMBER(n, re, getnamesptr);
3344 getnamesptr = (pcre_uchar *)npp;
3345 }
3346 else
3347 {
3348 #endif
3349 pcre_uchar *npp = getnamesptr;
3350 while (isalnum(*p)) *npp++ = *p++;
3351 *npp++ = 0;
3352 *npp = 0;
3353 PCRE_GET_STRINGNUMBER(n, re, getnamesptr);
3354 getnamesptr = npp;
3355 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3356 }
3357 #endif
3358 if (n < 0)
3359 {
3360 fprintf(outfile, "no parentheses with name \"");
3361 PCHARSV(namestart, -1, outfile);
3362 fprintf(outfile, "\"\n");
3363 }
3364 }
3365 continue;
3366
3367 case 'J':
3368 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3369 if (extra != NULL
3370 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3371 && extra->executable_jit != NULL)
3372 {
3373 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3374 jit_stack = pcre_jit_stack_alloc(1, n * 1024);
3375 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
3376 }
3377 continue;
3378
3379 case 'L':
3380 getlist = 1;
3381 continue;
3382
3383 case 'M':
3384 find_match_limit = 1;
3385 continue;
3386
3387 case 'N':
3388 if ((options & PCRE_NOTEMPTY) != 0)
3389 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3390 else
3391 options |= PCRE_NOTEMPTY;
3392 continue;
3393
3394 case 'O':
3395 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3396 if (n > size_offsets_max)
3397 {
3398 size_offsets_max = n;
3399 free(offsets);
3400 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3401 if (offsets == NULL)
3402 {
3403 printf("** Failed to get %d bytes of memory for offsets vector\n",
3404 (int)(size_offsets_max * sizeof(int)));
3405 yield = 1;
3406 goto EXIT;
3407 }
3408 }
3409 use_size_offsets = n;
3410 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3411 continue;
3412
3413 case 'P':
3414 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3415 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3416 continue;
3417
3418 case 'Q':
3419 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3420 if (extra == NULL)
3421 {
3422 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3423 extra->flags = 0;
3424 }
3425 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3426 extra->match_limit_recursion = n;
3427 continue;
3428
3429 case 'q':
3430 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3431 if (extra == NULL)
3432 {
3433 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3434 extra->flags = 0;
3435 }
3436 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3437 extra->match_limit = n;
3438 continue;
3439
3440 #if !defined NODFA
3441 case 'R':
3442 options |= PCRE_DFA_RESTART;
3443 continue;
3444 #endif
3445
3446 case 'S':
3447 show_malloc = 1;
3448 continue;
3449
3450 case 'Y':
3451 options |= PCRE_NO_START_OPTIMIZE;
3452 continue;
3453
3454 case 'Z':
3455 options |= PCRE_NOTEOL;
3456 continue;
3457
3458 case '?':
3459 options |= PCRE_NO_UTF8_CHECK;
3460 continue;
3461
3462 case '<':
3463 {
3464 int x = check_newline(p, outfile);
3465 if (x == 0) goto NEXT_DATA;
3466 options |= x;
3467 while (*p++ != '>');
3468 }
3469 continue;
3470 }
3471 *q++ = c;
3472 }
3473 *q = 0;
3474 len = (int)(q - dbuffer);
3475
3476 /* Move the data to the end of the buffer so that a read over the end of
3477 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3478 we are using the POSIX interface, we must include the terminating zero. */
3479
3480 #if !defined NOPOSIX
3481 if (posix || do_posix)
3482 {
3483 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3484 bptr += buffer_size - len - 1;
3485 }
3486 else
3487 #endif
3488 {
3489 memmove(bptr + buffer_size - len, bptr, len);
3490 bptr += buffer_size - len;
3491 }
3492
3493 if ((all_use_dfa || use_dfa) && find_match_limit)
3494 {
3495 printf("**Match limit not relevant for DFA matching: ignored\n");
3496 find_match_limit = 0;
3497 }
3498
3499 /* Handle matching via the POSIX interface, which does not
3500 support timing or playing with the match limit or callout data. */
3501
3502 #if !defined NOPOSIX
3503 if (posix || do_posix)
3504 {
3505 int rc;
3506 int eflags = 0;
3507 regmatch_t *pmatch = NULL;
3508 if (use_size_offsets > 0)
3509 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3510 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3511 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3512 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3513
3514 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3515
3516 if (rc != 0)
3517 {
3518 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3519 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3520 }
3521 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3522 != 0)
3523 {
3524 fprintf(outfile, "Matched with REG_NOSUB\n");
3525 }
3526 else
3527 {
3528 size_t i;
3529 for (i = 0; i < (size_t)use_size_offsets; i++)
3530 {
3531 if (pmatch[i].rm_so >= 0)
3532 {
3533 fprintf(outfile, "%2d: ", (int)i);
3534 PCHARSV(dbuffer + pmatch[i].rm_so,
3535 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3536 fprintf(outfile, "\n");
3537 if (do_showcaprest || (i == 0 && do_showrest))
3538 {
3539 fprintf(outfile, "%2d+ ", (int)i);
3540 PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3541 outfile);
3542 fprintf(outfile, "\n");
3543 }
3544 }
3545 }
3546 }
3547 free(pmatch);
3548 goto NEXT_DATA;
3549 }
3550
3551 #endif /* !defined NOPOSIX */
3552
3553 /* Handle matching via the native interface - repeats for /g and /G */
3554
3555 #ifdef SUPPORT_PCRE16
3556 if (use_pcre16)
3557 {
3558 len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3559 if (len < 0)
3560 {
3561 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3562 "converted to UTF-16\n");
3563 goto NEXT_DATA;
3564 }
3565 bptr = (pcre_uint8 *)buffer16;
3566 }
3567 #endif
3568
3569 for (;; gmatched++) /* Loop for /g or /G */
3570 {
3571 markptr = NULL;
3572
3573 if (timeitm > 0)
3574 {
3575 register int i;
3576 clock_t time_taken;
3577 clock_t start_time = clock();
3578
3579 #if !defined NODFA
3580 if (all_use_dfa || use_dfa)
3581 {
3582 int workspace[1000];
3583 for (i = 0; i < timeitm; i++)
3584 {
3585 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3586 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3587 (sizeof(workspace)/sizeof(int)));
3588 }
3589 }
3590 else
3591 #endif
3592
3593 for (i = 0; i < timeitm; i++)
3594 {
3595 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3596 (options | g_notempty), use_offsets, use_size_offsets);
3597 }
3598 time_taken = clock() - start_time;
3599 fprintf(outfile, "Execute time %.4f milliseconds\n",
3600 (((double)time_taken * 1000.0) / (double)timeitm) /
3601 (double)CLOCKS_PER_SEC);
3602 }
3603
3604 /* If find_match_limit is set, we want to do repeated matches with
3605 varying limits in order to find the minimum value for the match limit and
3606 for the recursion limit. The match limits are relevant only to the normal
3607 running of pcre_exec(), so disable the JIT optimization. This makes it
3608 possible to run the same set of tests with and without JIT externally
3609 requested. */
3610
3611 if (find_match_limit)
3612 {
3613 if (extra == NULL)
3614 {
3615 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3616 extra->flags = 0;
3617 }
3618 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3619
3620 (void)check_match_limit(re, extra, bptr, len, start_offset,
3621 options|g_notempty, use_offsets, use_size_offsets,
3622 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3623 PCRE_ERROR_MATCHLIMIT, "match()");
3624
3625 count = check_match_limit(re, extra, bptr, len, start_offset,
3626 options|g_notempty, use_offsets, use_size_offsets,
3627 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3628 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3629 }
3630
3631 /* If callout_data is set, use the interface with additional data */
3632
3633 else if (callout_data_set)
3634 {
3635 if (extra == NULL)
3636 {
3637 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3638 extra->flags = 0;
3639 }
3640 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3641 extra->callout_data = &callout_data;
3642 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3643 options | g_notempty, use_offsets, use_size_offsets);
3644 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3645 }
3646
3647 /* The normal case is just to do the match once, with the default
3648 value of match_limit. */
3649
3650 #if !defined NODFA
3651 else if (all_use_dfa || use_dfa)
3652 {
3653 int workspace[1000];
3654 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3655 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3656 (sizeof(workspace)/sizeof(int)));
3657 if (count == 0)
3658 {
3659 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3660 count = use_size_offsets/2;
3661 }
3662 }
3663 #endif
3664
3665 else
3666 {
3667 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3668 options | g_notempty, use_offsets, use_size_offsets);
3669 if (count == 0)
3670 {
3671 fprintf(outfile, "Matched, but too many substrings\n");
3672 count = use_size_offsets/3;
3673 }
3674 }
3675
3676 /* Matched */
3677
3678 if (count >= 0)
3679 {
3680 int i, maxcount;
3681
3682 #if !defined NODFA
3683 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3684 #endif
3685 maxcount = use_size_offsets/3;
3686
3687 /* This is a check against a lunatic return value. */
3688
3689 if (count > maxcount)
3690 {
3691 fprintf(outfile,
3692 "** PCRE error: returned count %d is too big for offset size %d\n",
3693 count, use_size_offsets);
3694 count = use_size_offsets/3;
3695 if (do_g || do_G)
3696 {
3697 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3698 do_g = do_G = FALSE; /* Break g/G loop */
3699 }
3700 }
3701
3702 /* do_allcaps requests showing of all captures in the pattern, to check
3703 unset ones at the end. */
3704
3705 if (do_allcaps)
3706 {
3707 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3708 count++; /* Allow for full match */
3709 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3710 }
3711
3712 /* Output the captured substrings */
3713
3714 for (i = 0; i < count * 2; i += 2)
3715 {
3716 if (use_offsets[i] < 0)
3717 {
3718 if (use_offsets[i] != -1)
3719 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3720 use_offsets[i], i);
3721 if (use_offsets[i+1] != -1)
3722 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3723 use_offsets[i+1], i+1);
3724 fprintf(outfile, "%2d: <unset>\n", i/2);
3725 }
3726 else
3727 {
3728 fprintf(outfile, "%2d: ", i/2);
3729 PCHARSV(bptr + use_offsets[i],
3730 use_offsets[i+1] - use_offsets[i], outfile);
3731 fprintf(outfile, "\n");
3732 if (do_showcaprest || (i == 0 && do_showrest))
3733 {
3734 fprintf(outfile, "%2d+ ", i/2);
3735 PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3736 outfile);
3737 fprintf(outfile, "\n");
3738 }
3739 }
3740 }
3741
3742 if (markptr != NULL)
3743 {
3744 fprintf(outfile, "MK: ");
3745 PCHARSV(markptr, -1, outfile);
3746 fprintf(outfile, "\n");
3747 }
3748
3749 for (i = 0; i < 32; i++)
3750 {
3751 if ((copystrings & (1 << i)) != 0)
3752 {
3753 int rc;
3754 char copybuffer[256];
3755 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
3756 copybuffer, sizeof(copybuffer));
3757 if (rc < 0)
3758 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3759 else
3760 {
3761 fprintf(outfile, "%2dC ", i);
3762 PCHARSV(copybuffer, rc, outfile);
3763 fprintf(outfile, " (%d)\n", rc);
3764 }
3765 }
3766 }
3767
3768 for (copynamesptr = copynames;
3769 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3770 use_pcre16?
3771 (*(PCRE_SCHAR16*)copynamesptr) != 0 : *copynamesptr != 0;
3772 #else
3773 *copynamesptr != 0;
3774 #endif
3775 copynamesptr += (int)(STRLEN(copynamesptr) + 1) * CHAR_SIZE)
3776 {
3777 int rc;
3778 char copybuffer[256];
3779 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
3780 copynamesptr, copybuffer, sizeof(copybuffer));
3781 if (rc < 0)
3782 {
3783 fprintf(outfile, "copy substring ");
3784 PCHARSV(copynamesptr, -1, outfile);
3785 fprintf(outfile, " failed %d\n", rc);
3786 }
3787 else
3788 {
3789 fprintf(outfile, " C ");
3790 PCHARSV(copybuffer, rc, outfile);
3791 fprintf(outfile, " (%d) ", rc);
3792 PCHARSV(copynamesptr, -1, outfile);
3793 putc('\n', outfile);
3794 }
3795 }
3796
3797 for (i = 0; i < 32; i++)
3798 {
3799 if ((getstrings & (1 << i)) != 0)
3800 {
3801 int rc;
3802 const char *substring;
3803 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
3804 if (rc < 0)
3805 fprintf(outfile, "get substring %d failed %d\n", i, rc);
3806 else
3807 {
3808 fprintf(outfile, "%2dG ", i);
3809 PCHARSV(substring, rc, outfile);
3810 fprintf(outfile, " (%d)\n", rc);
3811 PCRE_FREE_SUBSTRING(substring);
3812 }
3813 }
3814 }
3815
3816 for (getnamesptr = getnames;
3817 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3818 use_pcre16?
3819 (*(PCRE_SCHAR16*)getnamesptr) != 0 : *getnamesptr != 0;
3820 #else
3821 *getnamesptr != 0;
3822 #endif
3823 getnamesptr += (int)(STRLEN(getnamesptr) + 1) * CHAR_SIZE)
3824 {
3825 int rc;
3826 const char *substring;
3827 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
3828 getnamesptr, &substring);
3829 if (rc < 0)
3830 {
3831 fprintf(outfile, "get substring ");
3832 PCHARSV(getnamesptr, -1, outfile);
3833 fprintf(outfile, " failed %d\n", rc);
3834 }
3835 else
3836 {
3837 fprintf(outfile, " G ");
3838 PCHARSV(substring, rc, outfile);
3839 fprintf(outfile, " (%d) ", rc);
3840 PCHARSV(getnamesptr, -1, outfile);
3841 PCRE_FREE_SUBSTRING(substring);
3842 putc('\n', outfile);
3843 }
3844 }
3845
3846 if (getlist)
3847 {
3848 int rc;
3849 const char **stringlist;
3850 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
3851 if (rc < 0)
3852 fprintf(outfile, "get substring list failed %d\n", rc);
3853 else
3854 {
3855 for (i = 0; i < count; i++)
3856 {
3857 fprintf(outfile, "%2dL ", i);
3858 PCHARSV(stringlist[i], -1, outfile);
3859 putc('\n', outfile);
3860 }
3861 if (stringlist[i] != NULL)
3862 fprintf(outfile, "string list not terminated by NULL\n");
3863 PCRE_FREE_SUBSTRING_LIST(stringlist);
3864 }
3865 }
3866 }
3867
3868 /* There was a partial match */
3869
3870 else if (count == PCRE_ERROR_PARTIAL)
3871 {
3872 if (markptr == NULL) fprintf(outfile, "Partial match");
3873 else fprintf(outfile, "Partial match, mark=%s", markptr);
3874 if (use_size_offsets > 1)
3875 {
3876 fprintf(outfile, ": ");
3877 PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3878 outfile);
3879 }
3880 fprintf(outfile, "\n");
3881 break; /* Out of the /g loop */
3882 }
3883
3884 /* Failed to match. If this is a /g or /G loop and we previously set
3885 g_notempty after a null match, this is not necessarily the end. We want
3886 to advance the start offset, and continue. We won't be at the end of the
3887 string - that was checked before setting g_notempty.
3888
3889 Complication arises in the case when the newline convention is "any",
3890 "crlf", or "anycrlf". If the previous match was at the end of a line
3891 terminated by CRLF, an advance of one character just passes the \r,
3892 whereas we should prefer the longer newline sequence, as does the code in
3893 pcre_exec(). Fudge the offset value to achieve this. We check for a
3894 newline setting in the pattern; if none was set, use pcre_config() to
3895 find the default.
3896
3897 Otherwise, in the case of UTF-8 matching, the advance must be one
3898 character, not one byte. */
3899
3900 else
3901 {
3902 if (g_notempty != 0)
3903 {
3904 int onechar = 1;
3905 unsigned int obits = ((real_pcre *)re)->options;
3906 use_offsets[0] = start_offset;
3907 if ((obits & PCRE_NEWLINE_BITS) == 0)
3908 {
3909 int d;
3910 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
3911 /* Note that these values are always the ASCII ones, even in
3912 EBCDIC environments. CR = 13, NL = 10. */
3913 obits = (d == 13)? PCRE_NEWLINE_CR :
3914 (d == 10)? PCRE_NEWLINE_LF :
3915 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3916 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3917 (d == -1)? PCRE_NEWLINE_ANY : 0;
3918 }
3919 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3920 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3921 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3922 &&
3923 start_offset < len - 1 &&
3924 bptr[start_offset * CHAR_SIZE] == '\r' &&
3925 bptr[(start_offset + 1) * CHAR_SIZE] == '\n')
3926 onechar++;
3927 else if (use_utf)
3928 {
3929 while (start_offset + onechar < len)
3930 {
3931 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3932 onechar++;
3933 }
3934 }
3935 use_offsets[1] = start_offset + onechar;
3936 }
3937 else
3938 {
3939 switch(count)
3940 {
3941 case PCRE_ERROR_NOMATCH:
3942 if (gmatched == 0)
3943 {
3944 if (markptr == NULL) fprintf(outfile, "No match\n");
3945 else fprintf(outfile, "No match, mark = %s\n", markptr);
3946 }
3947 break;
3948
3949 case PCRE_ERROR_BADUTF8:
3950 case PCRE_ERROR_SHORTUTF8:
3951 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3952 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3953 if (use_size_offsets >= 2)
3954 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3955 use_offsets[1]);
3956 fprintf(outfile, "\n");
3957 break;
3958
3959 default:
3960 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3961 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3962 else
3963 fprintf(outfile, "Error %d (Unexpected value)\n", count);
3964 break;
3965 }
3966
3967 break; /* Out of the /g loop */
3968 }
3969 }
3970
3971 /* If not /g or /G we are done */
3972
3973 if (!do_g && !do_G) break;
3974
3975 /* If we have matched an empty string, first check to see if we are at
3976 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3977 Perl's /g options does. This turns out to be rather cunning. First we set
3978 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3979 same point. If this fails (picked up above) we advance to the next
3980 character. */
3981
3982 g_notempty = 0;
3983
3984 if (use_offsets[0] == use_offsets[1])
3985 {
3986 if (use_offsets[0] == len) break;
3987 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3988 }
3989
3990 /* For /g, update the start offset, leaving the rest alone */
3991
3992 if (do_g) start_offset = use_offsets[1];
3993
3994 /* For /G, update the pointer and length */
3995
3996 else
3997 {
3998 bptr += use_offsets[1] * CHAR_SIZE;
3999 len -= use_offsets[1];
4000 }
4001 } /* End of loop for /g and /G */
4002
4003 NEXT_DATA: continue;
4004 } /* End of loop for data lines */
4005
4006 CONTINUE:
4007
4008 #if !defined NOPOSIX
4009 if (posix || do_posix) regfree(&preg);
4010 #endif
4011
4012 if (re != NULL) new_free(re);
4013 if (extra != NULL)
4014 {
4015 PCRE_FREE_STUDY(extra);
4016 }
4017 if (locale_set)
4018 {
4019 new_free((void *)tables);
4020 setlocale(LC_CTYPE, "C");
4021 locale_set = 0;
4022 }
4023 if (jit_stack != NULL)
4024 {
4025 pcre_jit_stack_free(jit_stack);
4026 jit_stack = NULL;
4027 }
4028 }
4029
4030 if (infile == stdin) fprintf(outfile, "\n");
4031
4032 EXIT:
4033
4034 if (infile != NULL && infile != stdin) fclose(infile);
4035 if (outfile != NULL && outfile != stdout) fclose(outfile);
4036
4037 free(buffer);
4038 free(dbuffer);
4039 free(pbuffer);
4040 free(offsets);
4041
4042 #ifdef SUPPORT_PCRE16
4043 if (buffer16 != NULL) free(buffer16);
4044 #endif
4045
4046 return yield;
4047 }
4048
4049 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5