/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 821 - (show annotations)
Fri Dec 23 16:38:13 2011 UTC (7 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 121366 byte(s)
More pcretest 16-bit updates; also a bug fix in pcre_exec.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <time.h>
49 #include <locale.h>
50 #include <errno.h>
51
52 #ifdef SUPPORT_LIBREADLINE
53 #ifdef HAVE_UNISTD_H
54 #include <unistd.h>
55 #endif
56 #include <readline/readline.h>
57 #include <readline/history.h>
58 #endif
59
60
61 /* A number of things vary for Windows builds. Originally, pcretest opened its
62 input and output without "b"; then I was told that "b" was needed in some
63 environments, so it was added for release 5.0 to both the input and output. (It
64 makes no difference on Unix-like systems.) Later I was told that it is wrong
65 for the input on Windows. I've now abstracted the modes into two macros that
66 are set here, to make it easier to fiddle with them, and removed "b" from the
67 input mode under Windows. */
68
69 #if defined(_WIN32) || defined(WIN32)
70 #include <io.h> /* For _setmode() */
71 #include <fcntl.h> /* For _O_BINARY */
72 #define INPUT_MODE "r"
73 #define OUTPUT_MODE "wb"
74
75 #ifndef isatty
76 #define isatty _isatty /* This is what Windows calls them, I'm told, */
77 #endif /* though in some environments they seem to */
78 /* be already defined, hence the #ifndefs. */
79 #ifndef fileno
80 #define fileno _fileno
81 #endif
82
83 /* A user sent this fix for Borland Builder 5 under Windows. */
84
85 #ifdef __BORLANDC__
86 #define _setmode(handle, mode) setmode(handle, mode)
87 #endif
88
89 /* Not Windows */
90
91 #else
92 #include <sys/time.h> /* These two includes are needed */
93 #include <sys/resource.h> /* for setrlimit(). */
94 #define INPUT_MODE "rb"
95 #define OUTPUT_MODE "wb"
96 #endif
97
98
99 /* We have to include pcre_internal.h because we need the internal info for
100 displaying the results of pcre_study() and we also need to know about the
101 internal macros, structures, and other internal data values; pcretest has
102 "inside information" compared to a program that strictly follows the PCRE API.
103
104 Although pcre_internal.h does itself include pcre.h, we explicitly include it
105 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106 appropriately for an application, not for building PCRE. */
107
108 #include "pcre.h"
109 #include "pcre_internal.h"
110
111 /* The pcre_printint() function, which prints the internal form of a compiled
112 regex, is held in a separate file so that (a) it can be compiled in either
113 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114 when that is compiled in debug mode. */
115
116 #ifdef SUPPORT_PCRE8
117 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118 #endif
119 #ifdef SUPPORT_PCRE16
120 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121 #endif
122
123 /* We need access to some of the data tables that PCRE uses. So as not to have
124 to keep two copies, we include the source file here, changing the names of the
125 external symbols to prevent clashes. */
126
127 #define _pcre_ucp_gentype ucp_gentype
128 #define _pcre_ucp_typerange ucp_typerange
129 #define _pcre_utf8_table1 utf8_table1
130 #define _pcre_utf8_table1_size utf8_table1_size
131 #define _pcre_utf8_table2 utf8_table2
132 #define _pcre_utf8_table3 utf8_table3
133 #define _pcre_utf8_table4 utf8_table4
134 #define _pcre_utt utt
135 #define _pcre_utt_size utt_size
136 #define _pcre_utt_names utt_names
137 #define _pcre_OP_lengths OP_lengths
138
139 #include "pcre_tables.c"
140
141 /* The definition of the macro PRINTABLE, which determines whether to print an
142 output character as-is or as a hex value when showing compiled patterns, is
143 the same as in the printint.src file. We uses it here in cases when the locale
144 has not been explicitly changed, so as to get consistent output from systems
145 that differ in their output from isprint() even in the "C" locale. */
146
147 #ifdef EBCDIC
148 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149 #else
150 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151 #endif
152
153 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154
155 /* It is possible to compile this test program without including support for
156 testing the POSIX interface, though this is not available via the standard
157 Makefile. */
158
159 #if !defined NOPOSIX
160 #include "pcreposix.h"
161 #endif
162
163 /* It is also possible, originally for the benefit of a version that was
164 imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165 without the interface to the DFA matcher (NODFA). In fact, we automatically cut
166 out the UTF8 support if PCRE is built without it. */
167
168 #ifndef SUPPORT_UTF8
169 #ifndef NOUTF8
170 #define NOUTF8
171 #endif
172 #endif
173
174 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
175 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
176 only from one place and is handled differently). I couldn't dream up any way of
177 using a single macro to do this in a generic way, because of the many different
178 argument requirements. We know that at least one of SUPPORT_PCRE8 and
179 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
180 use these in the definitions of generic macros. */
181
182 #ifdef SUPPORT_PCRE8
183
184 #define PCHARS8(lv, p, offset, len, f) \
185 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
186
187 #define PCHARSV8(p, offset, len, f) \
188 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
189
190 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
191 p = read_capture_name8(p, cn8, re)
192
193 #define SET_PCRE_CALLOUT8(callout) \
194 pcre_callout = callout
195
196 #define STRLEN8(p) ((int)strlen((char *)p))
197
198
199 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
200 re = pcre_compile((char *)pat, options, error, erroffset, tables)
201
202 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
203 namesptr, cbuffer, size) \
204 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
205 (char *)namesptr, cbuffer, size)
206
207 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
208 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
209
210 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
211 offsets, size_offsets, workspace, size_workspace) \
212 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
213 offsets, size_offsets, workspace, size_workspace)
214
215 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
216 offsets, size_offsets) \
217 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
218 offsets, size_offsets)
219
220 #define PCRE_FREE_STUDY8(extra) \
221 pcre_free_study(extra)
222
223 #define PCRE_FREE_SUBSTRING8(substring) \
224 pcre_free_substring(substring)
225
226 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
227 pcre_free_substring_list(listptr)
228
229 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
230 getnamesptr, subsptr) \
231 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
232 (char *)getnamesptr, subsptr)
233
234 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
235 n = pcre_get_stringnumber(re, (char *)ptr)
236
237 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
238 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
239
240 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
241 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
242
243 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
244 pcre_pattern_to_host_byte_order(re, extra, tables)
245
246 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
247 pcre_printint(re, outfile, debug_lengths)
248
249 #define PCRE_STUDY8(extra, re, options, error) \
250 extra = pcre_study(re, options, error)
251
252 #endif /* SUPPORT_PCRE8 */
253
254 /* -----------------------------------------------------------*/
255
256 #ifdef SUPPORT_PCRE16
257
258 #define PCHARS16(lv, p, offset, len, f) \
259 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
260
261 #define PCHARSV16(p, offset, len, f) \
262 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
263
264 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
265 p = read_capture_name16(p, cn16, re)
266
267 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
268
269 #define SET_PCRE_CALLOUT16(callout) \
270 pcre16_callout = callout
271
272
273 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
274 re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
275
276 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
277 namesptr, cbuffer, size) \
278 rc = pcre16_copy_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
279 (PCRE_SPTR16)namesptr, (PCRE_SCHAR16 *)cbuffer, size/2)
280
281 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
282 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
283 (PCRE_SCHAR16 *)cbuffer, size/2)
284
285 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
286 offsets, size_offsets, workspace, size_workspace) \
287 count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
288 options, offsets, size_offsets, workspace, size_workspace)
289
290 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
291 offsets, size_offsets) \
292 count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
293 options, offsets, size_offsets)
294
295 #define PCRE_FREE_STUDY16(extra) \
296 pcre16_free_study(extra)
297
298 #define PCRE_FREE_SUBSTRING16(substring) \
299 pcre16_free_substring((PCRE_SPTR16)substring)
300
301 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
302 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
303
304 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
305 getnamesptr, subsptr) \
306 rc = pcre16_get_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
307 (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
308
309 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
310 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
311
312 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
313 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
314 (PCRE_SPTR16 *)(void*)subsptr)
315
316 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
317 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
318 (PCRE_SPTR16 **)(void*)listptr)
319
320 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
321 pcre16_pattern_to_host_byte_order(re, extra, tables)
322
323 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
324 pcre16_printint(re, outfile, debug_lengths)
325
326 #define PCRE_STUDY16(extra, re, options, error) \
327 extra = pcre16_study(re, options, error)
328
329 #endif /* SUPPORT_PCRE16 */
330
331
332 /* ----- Both modes are supported; a runtime test is needed, except for
333 pcre_config(), and the JIT stack functions, when it doesn't matter which
334 version is called. ----- */
335
336 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
337
338 #define CHAR_SIZE (use_pcre16? 2:1)
339
340 #define PCHARS(lv, p, offset, len, f) \
341 if (use_pcre16) \
342 PCHARS16(lv, p, offset, len, f); \
343 else \
344 PCHARS8(lv, p, offset, len, f)
345
346 #define PCHARSV(p, offset, len, f) \
347 if (use_pcre16) \
348 PCHARSV16(p, offset, len, f); \
349 else \
350 PCHARSV8(p, offset, len, f)
351
352 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
353 if (use_pcre16) \
354 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
355 else \
356 READ_CAPTURE_NAME8(p, cn8, cn16, re)
357
358 #define SET_PCRE_CALLOUT(callout) \
359 if (use_pcre16) \
360 SET_PCRE_CALLOUT16(callout); \
361 else \
362 SET_PCRE_CALLOUT8(callout)
363
364 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
365
366 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
367
368 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
369 if (use_pcre16) \
370 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
371 else \
372 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
373
374 #define PCRE_CONFIG pcre_config
375
376 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
377 namesptr, cbuffer, size) \
378 if (use_pcre16) \
379 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
380 namesptr, cbuffer, size); \
381 else \
382 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
383 namesptr, cbuffer, size)
384
385 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
386 if (use_pcre16) \
387 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
388 else \
389 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
390
391 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
392 offsets, size_offsets, workspace, size_workspace) \
393 if (use_pcre16) \
394 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
395 offsets, size_offsets, workspace, size_workspace); \
396 else \
397 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
398 offsets, size_offsets, workspace, size_workspace)
399
400 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
401 offsets, size_offsets) \
402 if (use_pcre16) \
403 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
404 offsets, size_offsets); \
405 else \
406 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
407 offsets, size_offsets)
408
409 #define PCRE_FREE_STUDY(extra) \
410 if (use_pcre16) \
411 PCRE_FREE_STUDY16(extra); \
412 else \
413 PCRE_FREE_STUDY8(extra)
414
415 #define PCRE_FREE_SUBSTRING(substring) \
416 if (use_pcre16) \
417 PCRE_FREE_SUBSTRING16(substring); \
418 else \
419 PCRE_FREE_SUBSTRING8(substring)
420
421 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
422 if (use_pcre16) \
423 PCRE_FREE_SUBSTRING_LIST16(listptr); \
424 else \
425 PCRE_FREE_SUBSTRING_LIST8(listptr)
426
427 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
428 getnamesptr, subsptr) \
429 if (use_pcre16) \
430 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
431 getnamesptr, subsptr); \
432 else \
433 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
434 getnamesptr, subsptr)
435
436 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
437 if (use_pcre16) \
438 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
439 else \
440 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
441
442 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
443 if (use_pcre16) \
444 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
445 else \
446 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
447
448 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
449 if (use_pcre16) \
450 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
451 else \
452 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
453
454 #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
455 #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
456
457 #define PCRE_MAKETABLES \
458 (use_pcre16? pcre16_maketables() : pcre_maketables())
459
460 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
461 if (use_pcre16) \
462 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
463 else \
464 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
465
466 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
467 if (use_pcre16) \
468 PCRE_PRINTINT16(re, outfile, debug_lengths); \
469 else \
470 PCRE_PRINTINT8(re, outfile, debug_lengths)
471
472 #define PCRE_STUDY(extra, re, options, error) \
473 if (use_pcre16) \
474 PCRE_STUDY16(extra, re, options, error); \
475 else \
476 PCRE_STUDY8(extra, re, options, error)
477
478 /* ----- Only 8-bit mode is supported ----- */
479
480 #elif defined SUPPORT_PCRE8
481 #define CHAR_SIZE 1
482 #define PCHARS PCHARS8
483 #define PCHARSV PCHARSV8
484 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
485 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
486 #define STRLEN STRLEN8
487 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
488 #define PCRE_COMPILE PCRE_COMPILE8
489 #define PCRE_CONFIG pcre_config
490 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
491 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
492 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
493 #define PCRE_EXEC PCRE_EXEC8
494 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
495 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
496 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
497 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
498 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
499 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
500 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
501 #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
502 #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
503 #define PCRE_MAKETABLES pcre_maketables()
504 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
505 #define PCRE_PRINTINT PCRE_PRINTINT8
506 #define PCRE_STUDY PCRE_STUDY8
507
508 /* ----- Only 16-bit mode is supported ----- */
509
510 #else
511 #define CHAR_SIZE 1
512 #define PCHARS PCHARS16
513 #define PCHARSV PCHARSV16
514 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
515 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
516 #define STRLEN STRLEN16
517 #define PCRE_ASSIGN_JIT_STACK pcre16_assign_jit_stack
518 #define PCRE_COMPILE PCRE_COMPILE16
519 #define PCRE_CONFIG pcre16_config
520 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
521 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
522 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
523 #define PCRE_EXEC PCRE_EXEC16
524 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
525 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
526 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
527 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
528 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
529 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
530 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
531 #define PCRE_JIT_STACK_ALLOC pcre16_jit_stack_alloc
532 #define PCRE_JIT_STACK_FREE pcre16_jit_stack_free
533 #define PCRE_MAKETABLES pcre16_maketables()
534 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
535 #define PCRE_PRINTINT PCRE_PRINTINT16
536 #define PCRE_STUDY PCRE_STUDY16
537 #endif
538
539 /* ----- End of mode-specific function call macros ----- */
540
541
542 /* Other parameters */
543
544 #ifndef CLOCKS_PER_SEC
545 #ifdef CLK_TCK
546 #define CLOCKS_PER_SEC CLK_TCK
547 #else
548 #define CLOCKS_PER_SEC 100
549 #endif
550 #endif
551
552 /* This is the default loop count for timing. */
553
554 #define LOOPREPEAT 500000
555
556 /* Static variables */
557
558 static FILE *outfile;
559 static int log_store = 0;
560 static int callout_count;
561 static int callout_extra;
562 static int callout_fail_count;
563 static int callout_fail_id;
564 static int debug_lengths;
565 static int first_callout;
566 static int locale_set = 0;
567 static int show_malloc;
568 static int use_utf;
569 static size_t gotten_store;
570 static size_t first_gotten_store = 0;
571 static const unsigned char *last_callout_mark = NULL;
572
573 /* The buffers grow automatically if very long input lines are encountered. */
574
575 static int buffer_size = 50000;
576 static pcre_uint8 *buffer = NULL;
577 static pcre_uint8 *dbuffer = NULL;
578 static pcre_uint8 *pbuffer = NULL;
579
580 /* Another buffer is needed translation to 16-bit character strings. It will
581 obtained and extended as required. */
582
583 #ifdef SUPPORT_PCRE16
584 static int buffer16_size = 0;
585 static pcre_uint16 *buffer16 = NULL;
586
587 /* We need the table of operator lengths that is used for 16-bit compiling, in
588 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
589 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
590 appropriately for the 16-bit world. Just as a safety check, make sure that
591 COMPILE_PCRE16 is *not* set. */
592
593 #ifdef COMPILE_PCRE16
594 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
595 #endif
596
597 #if LINK_SIZE == 2
598 #undef LINK_SIZE
599 #define LINK_SIZE 1
600 #elif LINK_SIZE == 3 || LINK_SIZE == 4
601 #undef LINK_SIZE
602 #define LINK_SIZE 2
603 #else
604 #error LINK_SIZE must be either 2, 3, or 4
605 #endif
606
607 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
608
609 #endif /* SUPPORT_PCRE16 */
610
611 /* If we have 8-bit support, default use_pcre16 to false; if there is also
612 16-bit support, it can be changed by an option. If there is no 8-bit support,
613 there must be 16-bit support, so default it to 1. */
614
615 #ifdef SUPPORT_PCRE8
616 static int use_pcre16 = 0;
617 #else
618 static int use_pcre16 = 1;
619 #endif
620
621 /* Textual explanations for runtime error codes */
622
623 static const char *errtexts[] = {
624 NULL, /* 0 is no error */
625 NULL, /* NOMATCH is handled specially */
626 "NULL argument passed",
627 "bad option value",
628 "magic number missing",
629 "unknown opcode - pattern overwritten?",
630 "no more memory",
631 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
632 "match limit exceeded",
633 "callout error code",
634 NULL, /* BADUTF8 is handled specially */
635 "bad UTF-8 offset",
636 NULL, /* PARTIAL is handled specially */
637 "not used - internal error",
638 "internal error - pattern overwritten?",
639 "bad count value",
640 "item unsupported for DFA matching",
641 "backreference condition or recursion test not supported for DFA matching",
642 "match limit not supported for DFA matching",
643 "workspace size exceeded in DFA matching",
644 "too much recursion for DFA matching",
645 "recursion limit exceeded",
646 "not used - internal error",
647 "invalid combination of newline options",
648 "bad offset value",
649 NULL, /* SHORTUTF8 is handled specially */
650 "nested recursion at the same subject position",
651 "JIT stack limit reached",
652 "pattern compiled in wrong mode (8-bit/16-bit error)"
653 };
654
655
656 /*************************************************
657 * Alternate character tables *
658 *************************************************/
659
660 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
661 using the default tables of the library. However, the T option can be used to
662 select alternate sets of tables, for different kinds of testing. Note also that
663 the L (locale) option also adjusts the tables. */
664
665 /* This is the set of tables distributed as default with PCRE. It recognizes
666 only ASCII characters. */
667
668 static const pcre_uint8 tables0[] = {
669
670 /* This table is a lower casing table. */
671
672 0, 1, 2, 3, 4, 5, 6, 7,
673 8, 9, 10, 11, 12, 13, 14, 15,
674 16, 17, 18, 19, 20, 21, 22, 23,
675 24, 25, 26, 27, 28, 29, 30, 31,
676 32, 33, 34, 35, 36, 37, 38, 39,
677 40, 41, 42, 43, 44, 45, 46, 47,
678 48, 49, 50, 51, 52, 53, 54, 55,
679 56, 57, 58, 59, 60, 61, 62, 63,
680 64, 97, 98, 99,100,101,102,103,
681 104,105,106,107,108,109,110,111,
682 112,113,114,115,116,117,118,119,
683 120,121,122, 91, 92, 93, 94, 95,
684 96, 97, 98, 99,100,101,102,103,
685 104,105,106,107,108,109,110,111,
686 112,113,114,115,116,117,118,119,
687 120,121,122,123,124,125,126,127,
688 128,129,130,131,132,133,134,135,
689 136,137,138,139,140,141,142,143,
690 144,145,146,147,148,149,150,151,
691 152,153,154,155,156,157,158,159,
692 160,161,162,163,164,165,166,167,
693 168,169,170,171,172,173,174,175,
694 176,177,178,179,180,181,182,183,
695 184,185,186,187,188,189,190,191,
696 192,193,194,195,196,197,198,199,
697 200,201,202,203,204,205,206,207,
698 208,209,210,211,212,213,214,215,
699 216,217,218,219,220,221,222,223,
700 224,225,226,227,228,229,230,231,
701 232,233,234,235,236,237,238,239,
702 240,241,242,243,244,245,246,247,
703 248,249,250,251,252,253,254,255,
704
705 /* This table is a case flipping table. */
706
707 0, 1, 2, 3, 4, 5, 6, 7,
708 8, 9, 10, 11, 12, 13, 14, 15,
709 16, 17, 18, 19, 20, 21, 22, 23,
710 24, 25, 26, 27, 28, 29, 30, 31,
711 32, 33, 34, 35, 36, 37, 38, 39,
712 40, 41, 42, 43, 44, 45, 46, 47,
713 48, 49, 50, 51, 52, 53, 54, 55,
714 56, 57, 58, 59, 60, 61, 62, 63,
715 64, 97, 98, 99,100,101,102,103,
716 104,105,106,107,108,109,110,111,
717 112,113,114,115,116,117,118,119,
718 120,121,122, 91, 92, 93, 94, 95,
719 96, 65, 66, 67, 68, 69, 70, 71,
720 72, 73, 74, 75, 76, 77, 78, 79,
721 80, 81, 82, 83, 84, 85, 86, 87,
722 88, 89, 90,123,124,125,126,127,
723 128,129,130,131,132,133,134,135,
724 136,137,138,139,140,141,142,143,
725 144,145,146,147,148,149,150,151,
726 152,153,154,155,156,157,158,159,
727 160,161,162,163,164,165,166,167,
728 168,169,170,171,172,173,174,175,
729 176,177,178,179,180,181,182,183,
730 184,185,186,187,188,189,190,191,
731 192,193,194,195,196,197,198,199,
732 200,201,202,203,204,205,206,207,
733 208,209,210,211,212,213,214,215,
734 216,217,218,219,220,221,222,223,
735 224,225,226,227,228,229,230,231,
736 232,233,234,235,236,237,238,239,
737 240,241,242,243,244,245,246,247,
738 248,249,250,251,252,253,254,255,
739
740 /* This table contains bit maps for various character classes. Each map is 32
741 bytes long and the bits run from the least significant end of each byte. The
742 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
743 graph, print, punct, and cntrl. Other classes are built from combinations. */
744
745 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
746 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
747 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
748 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
749
750 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
751 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
752 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
753 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
754
755 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
756 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
757 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
758 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
759
760 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
761 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
762 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
763 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
764
765 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
766 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
767 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
768 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
769
770 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
771 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
772 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
773 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
774
775 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
776 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
777 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
778 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
779
780 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
781 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
782 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
783 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
784
785 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
786 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
787 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
788 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
789
790 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
791 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
792 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
793 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
794
795 /* This table identifies various classes of character by individual bits:
796 0x01 white space character
797 0x02 letter
798 0x04 decimal digit
799 0x08 hexadecimal digit
800 0x10 alphanumeric or '_'
801 0x80 regular expression metacharacter or binary zero
802 */
803
804 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
805 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
806 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
807 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
808 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
809 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
810 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
811 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
812 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
813 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
814 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
815 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
816 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
817 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
818 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
819 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
820 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
821 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
822 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
823 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
824 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
825 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
826 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
827 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
828 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
829 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
830 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
831 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
832 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
833 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
834 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
835 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
836
837 /* This is a set of tables that came orginally from a Windows user. It seems to
838 be at least an approximation of ISO 8859. In particular, there are characters
839 greater than 128 that are marked as spaces, letters, etc. */
840
841 static const pcre_uint8 tables1[] = {
842 0,1,2,3,4,5,6,7,
843 8,9,10,11,12,13,14,15,
844 16,17,18,19,20,21,22,23,
845 24,25,26,27,28,29,30,31,
846 32,33,34,35,36,37,38,39,
847 40,41,42,43,44,45,46,47,
848 48,49,50,51,52,53,54,55,
849 56,57,58,59,60,61,62,63,
850 64,97,98,99,100,101,102,103,
851 104,105,106,107,108,109,110,111,
852 112,113,114,115,116,117,118,119,
853 120,121,122,91,92,93,94,95,
854 96,97,98,99,100,101,102,103,
855 104,105,106,107,108,109,110,111,
856 112,113,114,115,116,117,118,119,
857 120,121,122,123,124,125,126,127,
858 128,129,130,131,132,133,134,135,
859 136,137,138,139,140,141,142,143,
860 144,145,146,147,148,149,150,151,
861 152,153,154,155,156,157,158,159,
862 160,161,162,163,164,165,166,167,
863 168,169,170,171,172,173,174,175,
864 176,177,178,179,180,181,182,183,
865 184,185,186,187,188,189,190,191,
866 224,225,226,227,228,229,230,231,
867 232,233,234,235,236,237,238,239,
868 240,241,242,243,244,245,246,215,
869 248,249,250,251,252,253,254,223,
870 224,225,226,227,228,229,230,231,
871 232,233,234,235,236,237,238,239,
872 240,241,242,243,244,245,246,247,
873 248,249,250,251,252,253,254,255,
874 0,1,2,3,4,5,6,7,
875 8,9,10,11,12,13,14,15,
876 16,17,18,19,20,21,22,23,
877 24,25,26,27,28,29,30,31,
878 32,33,34,35,36,37,38,39,
879 40,41,42,43,44,45,46,47,
880 48,49,50,51,52,53,54,55,
881 56,57,58,59,60,61,62,63,
882 64,97,98,99,100,101,102,103,
883 104,105,106,107,108,109,110,111,
884 112,113,114,115,116,117,118,119,
885 120,121,122,91,92,93,94,95,
886 96,65,66,67,68,69,70,71,
887 72,73,74,75,76,77,78,79,
888 80,81,82,83,84,85,86,87,
889 88,89,90,123,124,125,126,127,
890 128,129,130,131,132,133,134,135,
891 136,137,138,139,140,141,142,143,
892 144,145,146,147,148,149,150,151,
893 152,153,154,155,156,157,158,159,
894 160,161,162,163,164,165,166,167,
895 168,169,170,171,172,173,174,175,
896 176,177,178,179,180,181,182,183,
897 184,185,186,187,188,189,190,191,
898 224,225,226,227,228,229,230,231,
899 232,233,234,235,236,237,238,239,
900 240,241,242,243,244,245,246,215,
901 248,249,250,251,252,253,254,223,
902 192,193,194,195,196,197,198,199,
903 200,201,202,203,204,205,206,207,
904 208,209,210,211,212,213,214,247,
905 216,217,218,219,220,221,222,255,
906 0,62,0,0,1,0,0,0,
907 0,0,0,0,0,0,0,0,
908 32,0,0,0,1,0,0,0,
909 0,0,0,0,0,0,0,0,
910 0,0,0,0,0,0,255,3,
911 126,0,0,0,126,0,0,0,
912 0,0,0,0,0,0,0,0,
913 0,0,0,0,0,0,0,0,
914 0,0,0,0,0,0,255,3,
915 0,0,0,0,0,0,0,0,
916 0,0,0,0,0,0,12,2,
917 0,0,0,0,0,0,0,0,
918 0,0,0,0,0,0,0,0,
919 254,255,255,7,0,0,0,0,
920 0,0,0,0,0,0,0,0,
921 255,255,127,127,0,0,0,0,
922 0,0,0,0,0,0,0,0,
923 0,0,0,0,254,255,255,7,
924 0,0,0,0,0,4,32,4,
925 0,0,0,128,255,255,127,255,
926 0,0,0,0,0,0,255,3,
927 254,255,255,135,254,255,255,7,
928 0,0,0,0,0,4,44,6,
929 255,255,127,255,255,255,127,255,
930 0,0,0,0,254,255,255,255,
931 255,255,255,255,255,255,255,127,
932 0,0,0,0,254,255,255,255,
933 255,255,255,255,255,255,255,255,
934 0,2,0,0,255,255,255,255,
935 255,255,255,255,255,255,255,127,
936 0,0,0,0,255,255,255,255,
937 255,255,255,255,255,255,255,255,
938 0,0,0,0,254,255,0,252,
939 1,0,0,248,1,0,0,120,
940 0,0,0,0,254,255,255,255,
941 0,0,128,0,0,0,128,0,
942 255,255,255,255,0,0,0,0,
943 0,0,0,0,0,0,0,128,
944 255,255,255,255,0,0,0,0,
945 0,0,0,0,0,0,0,0,
946 128,0,0,0,0,0,0,0,
947 0,1,1,0,1,1,0,0,
948 0,0,0,0,0,0,0,0,
949 0,0,0,0,0,0,0,0,
950 1,0,0,0,128,0,0,0,
951 128,128,128,128,0,0,128,0,
952 28,28,28,28,28,28,28,28,
953 28,28,0,0,0,0,0,128,
954 0,26,26,26,26,26,26,18,
955 18,18,18,18,18,18,18,18,
956 18,18,18,18,18,18,18,18,
957 18,18,18,128,128,0,128,16,
958 0,26,26,26,26,26,26,18,
959 18,18,18,18,18,18,18,18,
960 18,18,18,18,18,18,18,18,
961 18,18,18,128,128,0,0,0,
962 0,0,0,0,0,1,0,0,
963 0,0,0,0,0,0,0,0,
964 0,0,0,0,0,0,0,0,
965 0,0,0,0,0,0,0,0,
966 1,0,0,0,0,0,0,0,
967 0,0,18,0,0,0,0,0,
968 0,0,20,20,0,18,0,0,
969 0,20,18,0,0,0,0,0,
970 18,18,18,18,18,18,18,18,
971 18,18,18,18,18,18,18,18,
972 18,18,18,18,18,18,18,0,
973 18,18,18,18,18,18,18,18,
974 18,18,18,18,18,18,18,18,
975 18,18,18,18,18,18,18,18,
976 18,18,18,18,18,18,18,0,
977 18,18,18,18,18,18,18,18
978 };
979
980
981
982
983 #ifndef HAVE_STRERROR
984 /*************************************************
985 * Provide strerror() for non-ANSI libraries *
986 *************************************************/
987
988 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
989 in their libraries, but can provide the same facility by this simple
990 alternative function. */
991
992 extern int sys_nerr;
993 extern char *sys_errlist[];
994
995 char *
996 strerror(int n)
997 {
998 if (n < 0 || n >= sys_nerr) return "unknown error number";
999 return sys_errlist[n];
1000 }
1001 #endif /* HAVE_STRERROR */
1002
1003
1004 /*************************************************
1005 * JIT memory callback *
1006 *************************************************/
1007
1008 static pcre_jit_stack* jit_callback(void *arg)
1009 {
1010 return (pcre_jit_stack *)arg;
1011 }
1012
1013
1014 /*************************************************
1015 * Convert UTF-8 string to value *
1016 *************************************************/
1017
1018 /* This function takes one or more bytes that represents a UTF-8 character,
1019 and returns the value of the character.
1020
1021 Argument:
1022 utf8bytes a pointer to the byte vector
1023 vptr a pointer to an int to receive the value
1024
1025 Returns: > 0 => the number of bytes consumed
1026 -6 to 0 => malformed UTF-8 character at offset = (-return)
1027 */
1028
1029 #if !defined NOUTF8
1030
1031 static int
1032 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1033 {
1034 int c = *utf8bytes++;
1035 int d = c;
1036 int i, j, s;
1037
1038 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1039 {
1040 if ((d & 0x80) == 0) break;
1041 d <<= 1;
1042 }
1043
1044 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1045 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1046
1047 /* i now has a value in the range 1-5 */
1048
1049 s = 6*i;
1050 d = (c & utf8_table3[i]) << s;
1051
1052 for (j = 0; j < i; j++)
1053 {
1054 c = *utf8bytes++;
1055 if ((c & 0xc0) != 0x80) return -(j+1);
1056 s -= 6;
1057 d |= (c & 0x3f) << s;
1058 }
1059
1060 /* Check that encoding was the correct unique one */
1061
1062 for (j = 0; j < utf8_table1_size; j++)
1063 if (d <= utf8_table1[j]) break;
1064 if (j != i) return -(i+1);
1065
1066 /* Valid value */
1067
1068 *vptr = d;
1069 return i+1;
1070 }
1071
1072 #endif
1073
1074
1075
1076 /*************************************************
1077 * Convert character value to UTF-8 *
1078 *************************************************/
1079
1080 /* This function takes an integer value in the range 0 - 0x7fffffff
1081 and encodes it as a UTF-8 character in 0 to 6 bytes.
1082
1083 Arguments:
1084 cvalue the character value
1085 utf8bytes pointer to buffer for result - at least 6 bytes long
1086
1087 Returns: number of characters placed in the buffer
1088 */
1089
1090 #if !defined NOUTF8
1091
1092 static int
1093 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1094 {
1095 register int i, j;
1096 for (i = 0; i < utf8_table1_size; i++)
1097 if (cvalue <= utf8_table1[i]) break;
1098 utf8bytes += i;
1099 for (j = i; j > 0; j--)
1100 {
1101 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1102 cvalue >>= 6;
1103 }
1104 *utf8bytes = utf8_table2[i] | cvalue;
1105 return i + 1;
1106 }
1107
1108 #endif
1109
1110
1111
1112 #ifdef SUPPORT_PCRE16
1113 /*************************************************
1114 * Convert a string to 16-bit *
1115 *************************************************/
1116
1117 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1118 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1119 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1120 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1121 result is always left in buffer16.
1122
1123 Arguments:
1124 p points to a byte string
1125 utf true if UTF-8 (to be converted to UTF-16)
1126 len number of bytes in the string (excluding trailing zero)
1127
1128 Returns: number of 16-bit data items used (excluding trailing zero)
1129 OR -1 if a UTF-8 string is malformed
1130 */
1131
1132 static int
1133 to16(pcre_uint8 *p, int utf, int len)
1134 {
1135 pcre_uint16 *pp;
1136
1137 if (buffer16_size < 2*len + 2)
1138 {
1139 if (buffer16 != NULL) free(buffer16);
1140 buffer16_size = 2*len + 2;
1141 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1142 if (buffer16 == NULL)
1143 {
1144 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1145 exit(1);
1146 }
1147 }
1148
1149 pp = buffer16;
1150
1151 if (!utf)
1152 {
1153 while (len-- > 0) *pp++ = *p++;
1154 }
1155
1156 else
1157 {
1158 int c = 0;
1159 while (len > 0)
1160 {
1161 int chlen = utf82ord(p, &c);
1162 if (chlen <= 0) return -1;
1163 p += chlen;
1164 len -= chlen;
1165 if (c < 0x10000) *pp++ = c; else
1166 {
1167 c -= 0x10000;
1168 *pp++ = 0xD800 | (c >> 10);
1169 *pp++ = 0xDC00 | (c & 0x3ff);
1170 }
1171 }
1172 }
1173
1174 *pp = 0;
1175 return pp - buffer16;
1176 }
1177 #endif
1178
1179
1180 /*************************************************
1181 * Read or extend an input line *
1182 *************************************************/
1183
1184 /* Input lines are read into buffer, but both patterns and data lines can be
1185 continued over multiple input lines. In addition, if the buffer fills up, we
1186 want to automatically expand it so as to be able to handle extremely large
1187 lines that are needed for certain stress tests. When the input buffer is
1188 expanded, the other two buffers must also be expanded likewise, and the
1189 contents of pbuffer, which are a copy of the input for callouts, must be
1190 preserved (for when expansion happens for a data line). This is not the most
1191 optimal way of handling this, but hey, this is just a test program!
1192
1193 Arguments:
1194 f the file to read
1195 start where in buffer to start (this *must* be within buffer)
1196 prompt for stdin or readline()
1197
1198 Returns: pointer to the start of new data
1199 could be a copy of start, or could be moved
1200 NULL if no data read and EOF reached
1201 */
1202
1203 static pcre_uint8 *
1204 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1205 {
1206 pcre_uint8 *here = start;
1207
1208 for (;;)
1209 {
1210 int rlen = (int)(buffer_size - (here - buffer));
1211
1212 if (rlen > 1000)
1213 {
1214 int dlen;
1215
1216 /* If libreadline support is required, use readline() to read a line if the
1217 input is a terminal. Note that readline() removes the trailing newline, so
1218 we must put it back again, to be compatible with fgets(). */
1219
1220 #ifdef SUPPORT_LIBREADLINE
1221 if (isatty(fileno(f)))
1222 {
1223 size_t len;
1224 char *s = readline(prompt);
1225 if (s == NULL) return (here == start)? NULL : start;
1226 len = strlen(s);
1227 if (len > 0) add_history(s);
1228 if (len > rlen - 1) len = rlen - 1;
1229 memcpy(here, s, len);
1230 here[len] = '\n';
1231 here[len+1] = 0;
1232 free(s);
1233 }
1234 else
1235 #endif
1236
1237 /* Read the next line by normal means, prompting if the file is stdin. */
1238
1239 {
1240 if (f == stdin) printf("%s", prompt);
1241 if (fgets((char *)here, rlen, f) == NULL)
1242 return (here == start)? NULL : start;
1243 }
1244
1245 dlen = (int)strlen((char *)here);
1246 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1247 here += dlen;
1248 }
1249
1250 else
1251 {
1252 int new_buffer_size = 2*buffer_size;
1253 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1254 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1255 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1256
1257 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1258 {
1259 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1260 exit(1);
1261 }
1262
1263 memcpy(new_buffer, buffer, buffer_size);
1264 memcpy(new_pbuffer, pbuffer, buffer_size);
1265
1266 buffer_size = new_buffer_size;
1267
1268 start = new_buffer + (start - buffer);
1269 here = new_buffer + (here - buffer);
1270
1271 free(buffer);
1272 free(dbuffer);
1273 free(pbuffer);
1274
1275 buffer = new_buffer;
1276 dbuffer = new_dbuffer;
1277 pbuffer = new_pbuffer;
1278 }
1279 }
1280
1281 return NULL; /* Control never gets here */
1282 }
1283
1284
1285
1286 /*************************************************
1287 * Read number from string *
1288 *************************************************/
1289
1290 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1291 around with conditional compilation, just do the job by hand. It is only used
1292 for unpicking arguments, so just keep it simple.
1293
1294 Arguments:
1295 str string to be converted
1296 endptr where to put the end pointer
1297
1298 Returns: the unsigned long
1299 */
1300
1301 static int
1302 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1303 {
1304 int result = 0;
1305 while(*str != 0 && isspace(*str)) str++;
1306 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1307 *endptr = str;
1308 return(result);
1309 }
1310
1311
1312
1313 /*************************************************
1314 * Print one character *
1315 *************************************************/
1316
1317 /* Print a single character either literally, or as a hex escape. */
1318
1319 static int pchar(int c, FILE *f)
1320 {
1321 if (PRINTOK(c))
1322 {
1323 if (f != NULL) fprintf(f, "%c", c);
1324 return 1;
1325 }
1326
1327 if (c < 0x100)
1328 {
1329 if (use_utf)
1330 {
1331 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1332 return 6;
1333 }
1334 else
1335 {
1336 if (f != NULL) fprintf(f, "\\x%02x", c);
1337 return 4;
1338 }
1339 }
1340
1341 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1342 return (c <= 0x000000ff)? 6 :
1343 (c <= 0x00000fff)? 7 :
1344 (c <= 0x0000ffff)? 8 :
1345 (c <= 0x000fffff)? 9 : 10;
1346 }
1347
1348
1349
1350 #ifdef SUPPORT_PCRE8
1351 /*************************************************
1352 * Print 8-bit character string *
1353 *************************************************/
1354
1355 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1356 If handed a NULL file, just counts chars without printing. */
1357
1358 static int pchars(pcre_uint8 *p, int length, FILE *f)
1359 {
1360 int c = 0;
1361 int yield = 0;
1362
1363 if (length < 0)
1364 length = strlen((char *)p);
1365
1366 while (length-- > 0)
1367 {
1368 #if !defined NOUTF8
1369 if (use_utf)
1370 {
1371 int rc = utf82ord(p, &c);
1372 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1373 {
1374 length -= rc - 1;
1375 p += rc;
1376 yield += pchar(c, f);
1377 continue;
1378 }
1379 }
1380 #endif
1381 c = *p++;
1382 yield += pchar(c, f);
1383 }
1384
1385 return yield;
1386 }
1387 #endif
1388
1389
1390
1391 #ifdef SUPPORT_PCRE16
1392 /*************************************************
1393 * Find length of 0-terminated 16-bit string *
1394 *************************************************/
1395
1396 static int strlen16(PCRE_SPTR16 p)
1397 {
1398 int len = 0;
1399 while (*p++ != 0) len++;
1400 return len;
1401 }
1402
1403
1404
1405 /*************************************************
1406 * Print 16-bit character string *
1407 *************************************************/
1408
1409 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1410 If handed a NULL file, just counts chars without printing. */
1411
1412 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1413 {
1414 int yield = 0;
1415
1416 if (length < 0)
1417 length = strlen16(p);
1418
1419 while (length-- > 0)
1420 {
1421 int c = *p++ & 0xffff;
1422 #if !defined NOUTF8
1423 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1424 {
1425 int d = *p & 0xffff;
1426 if (d >= 0xDC00 && d < 0xDFFF)
1427 {
1428 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1429 length--;
1430 p++;
1431 }
1432 }
1433 #endif
1434 yield += pchar(c, f);
1435 }
1436
1437 return yield;
1438 }
1439 #endif
1440
1441
1442
1443 #ifdef SUPPORT_PCRE8
1444 /*************************************************
1445 * Read a capture name (8-bit) and check it *
1446 *************************************************/
1447
1448 static pcre_uint8 *
1449 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1450 {
1451 pcre_uint8 *npp = *pp;
1452 while (isalnum(*p)) *npp++ = *p++;
1453 *npp++ = 0;
1454 *npp = 0;
1455 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1456 {
1457 fprintf(outfile, "no parentheses with name \"");
1458 PCHARSV(*pp, 0, -1, outfile);
1459 fprintf(outfile, "\"\n");
1460 }
1461
1462 *pp = npp;
1463 return p;
1464 }
1465 #endif
1466
1467
1468
1469 #ifdef SUPPORT_PCRE16
1470 /*************************************************
1471 * Read a capture name (16-bit) and check it *
1472 *************************************************/
1473
1474 /* Note that the text being read is 8-bit. */
1475
1476 static pcre_uint8 *
1477 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1478 {
1479 pcre_uint16 *npp = *pp;
1480 while (isalnum(*p)) *npp++ = *p++;
1481 *npp++ = 0;
1482 *npp = 0;
1483 if (pcre16_get_stringnumber(re, (PCRE_SPTR16)(*pp)) < 0)
1484 {
1485 fprintf(outfile, "no parentheses with name \"");
1486 PCHARSV(*pp, 0, -1, outfile);
1487 fprintf(outfile, "\"\n");
1488 }
1489 *pp = npp;
1490 return p;
1491 }
1492 #endif
1493
1494
1495
1496 /*************************************************
1497 * Callout function *
1498 *************************************************/
1499
1500 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1501 the match. Yield zero unless more callouts than the fail count, or the callout
1502 data is not zero. */
1503
1504 static int callout(pcre_callout_block *cb)
1505 {
1506 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1507 int i, pre_start, post_start, subject_length;
1508
1509 if (callout_extra)
1510 {
1511 fprintf(f, "Callout %d: last capture = %d\n",
1512 cb->callout_number, cb->capture_last);
1513
1514 for (i = 0; i < cb->capture_top * 2; i += 2)
1515 {
1516 if (cb->offset_vector[i] < 0)
1517 fprintf(f, "%2d: <unset>\n", i/2);
1518 else
1519 {
1520 fprintf(f, "%2d: ", i/2);
1521 PCHARSV(cb->subject, cb->offset_vector[i],
1522 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1523 fprintf(f, "\n");
1524 }
1525 }
1526 }
1527
1528 /* Re-print the subject in canonical form, the first time or if giving full
1529 datails. On subsequent calls in the same match, we use pchars just to find the
1530 printed lengths of the substrings. */
1531
1532 if (f != NULL) fprintf(f, "--->");
1533
1534 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1535 PCHARS(post_start, cb->subject, cb->start_match,
1536 cb->current_position - cb->start_match, f);
1537
1538 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1539
1540 PCHARSV(cb->subject, cb->current_position,
1541 cb->subject_length - cb->current_position, f);
1542
1543 if (f != NULL) fprintf(f, "\n");
1544
1545 /* Always print appropriate indicators, with callout number if not already
1546 shown. For automatic callouts, show the pattern offset. */
1547
1548 if (cb->callout_number == 255)
1549 {
1550 fprintf(outfile, "%+3d ", cb->pattern_position);
1551 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1552 }
1553 else
1554 {
1555 if (callout_extra) fprintf(outfile, " ");
1556 else fprintf(outfile, "%3d ", cb->callout_number);
1557 }
1558
1559 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1560 fprintf(outfile, "^");
1561
1562 if (post_start > 0)
1563 {
1564 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1565 fprintf(outfile, "^");
1566 }
1567
1568 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1569 fprintf(outfile, " ");
1570
1571 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1572 pbuffer + cb->pattern_position);
1573
1574 fprintf(outfile, "\n");
1575 first_callout = 0;
1576
1577 if (cb->mark != last_callout_mark)
1578 {
1579 if (cb->mark == NULL)
1580 fprintf(outfile, "Latest Mark: <unset>\n");
1581 else
1582 {
1583 fprintf(outfile, "Latest Mark: ");
1584 PCHARSV(cb->mark, 0, -1, outfile);
1585 putc('\n', outfile);
1586 }
1587 last_callout_mark = cb->mark;
1588 }
1589
1590 if (cb->callout_data != NULL)
1591 {
1592 int callout_data = *((int *)(cb->callout_data));
1593 if (callout_data != 0)
1594 {
1595 fprintf(outfile, "Callout data = %d\n", callout_data);
1596 return callout_data;
1597 }
1598 }
1599
1600 return (cb->callout_number != callout_fail_id)? 0 :
1601 (++callout_count >= callout_fail_count)? 1 : 0;
1602 }
1603
1604
1605 /*************************************************
1606 * Local malloc functions *
1607 *************************************************/
1608
1609 /* Alternative malloc function, to test functionality and save the size of a
1610 compiled re, which is the first store request that pcre_compile() makes. The
1611 show_malloc variable is set only during matching. */
1612
1613 static void *new_malloc(size_t size)
1614 {
1615 void *block = malloc(size);
1616 gotten_store = size;
1617 if (first_gotten_store == 0) first_gotten_store = size;
1618 if (show_malloc)
1619 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1620 return block;
1621 }
1622
1623 static void new_free(void *block)
1624 {
1625 if (show_malloc)
1626 fprintf(outfile, "free %p\n", block);
1627 free(block);
1628 }
1629
1630 /* For recursion malloc/free, to test stacking calls */
1631
1632 static void *stack_malloc(size_t size)
1633 {
1634 void *block = malloc(size);
1635 if (show_malloc)
1636 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1637 return block;
1638 }
1639
1640 static void stack_free(void *block)
1641 {
1642 if (show_malloc)
1643 fprintf(outfile, "stack_free %p\n", block);
1644 free(block);
1645 }
1646
1647
1648 /*************************************************
1649 * Call pcre_fullinfo() *
1650 *************************************************/
1651
1652 /* Get one piece of information from the pcre_fullinfo() function. When only
1653 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1654 value, but the code is defensive. */
1655
1656 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1657 {
1658 int rc;
1659
1660 if (use_pcre16)
1661 #ifdef SUPPORT_PCRE16
1662 rc = pcre16_fullinfo(re, study, option, ptr);
1663 #else
1664 rc = PCRE_ERROR_BADMODE;
1665 #endif
1666 else
1667 #ifdef SUPPORT_PCRE8
1668 rc = pcre_fullinfo(re, study, option, ptr);
1669 #else
1670 rc = PCRE_ERROR_BADMODE;
1671 #endif
1672
1673 if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1674 use_pcre16? "16" : "", option);
1675 }
1676
1677
1678
1679 /*************************************************
1680 * Swap byte functions *
1681 *************************************************/
1682
1683 /* The following functions swap the bytes of a pcre_uint16
1684 and pcre_uint32 value.
1685
1686 Arguments:
1687 value any number
1688
1689 Returns: the byte swapped value
1690 */
1691
1692 static pcre_uint32
1693 swap_uint32(pcre_uint32 value)
1694 {
1695 return ((value & 0x000000ff) << 24) |
1696 ((value & 0x0000ff00) << 8) |
1697 ((value & 0x00ff0000) >> 8) |
1698 (value >> 24);
1699 }
1700
1701 static pcre_uint16
1702 swap_uint16(pcre_uint16 value)
1703 {
1704 return (value >> 8) | (value << 8);
1705 }
1706
1707
1708
1709 /*************************************************
1710 * Flip bytes in a compiled pattern *
1711 *************************************************/
1712
1713 /* This function is called if the 'F' option was present on a pattern that is
1714 to be written to a file. We flip the bytes of all the integer fields in the
1715 regex data block and the study block. In 16-bit mode this also flips relevant
1716 bytes in the pattern itself. This is to make it possible to test PCRE's
1717 ability to reload byte-flipped patterns, e.g. those compiled on a different
1718 architecture. */
1719
1720 static void
1721 regexflip(pcre *ere, pcre_extra *extra)
1722 {
1723 real_pcre *re = (real_pcre *)ere;
1724 int op;
1725
1726 #ifdef SUPPORT_PCRE16
1727 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1728 int length = re->name_count * re->name_entry_size;
1729 #ifdef SUPPORT_UTF
1730 BOOL utf = (re->options & PCRE_UTF16) != 0;
1731 BOOL utf16_char = FALSE;
1732 #endif /* SUPPORT_UTF */
1733 #endif /* SUPPORT_PCRE16 */
1734
1735 /* Always flip the bytes in the main data block and study blocks. */
1736
1737 re->magic_number = REVERSED_MAGIC_NUMBER;
1738 re->size = swap_uint32(re->size);
1739 re->options = swap_uint32(re->options);
1740 re->flags = swap_uint16(re->flags);
1741 re->top_bracket = swap_uint16(re->top_bracket);
1742 re->top_backref = swap_uint16(re->top_backref);
1743 re->first_char = swap_uint16(re->first_char);
1744 re->req_char = swap_uint16(re->req_char);
1745 re->name_table_offset = swap_uint16(re->name_table_offset);
1746 re->name_entry_size = swap_uint16(re->name_entry_size);
1747 re->name_count = swap_uint16(re->name_count);
1748
1749 if (extra != NULL)
1750 {
1751 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1752 rsd->size = swap_uint32(rsd->size);
1753 rsd->flags = swap_uint32(rsd->flags);
1754 rsd->minlength = swap_uint32(rsd->minlength);
1755 }
1756
1757 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1758 in the name table, if present, and then in the pattern itself. */
1759
1760 #ifdef SUPPORT_PCRE16
1761 if (!use_pcre16) return;
1762
1763 while(TRUE)
1764 {
1765 /* Swap previous characters. */
1766 while (length-- > 0)
1767 {
1768 *ptr = swap_uint16(*ptr);
1769 ptr++;
1770 }
1771 #ifdef SUPPORT_UTF
1772 if (utf16_char)
1773 {
1774 if ((ptr[-1] & 0xfc00) == 0xd800)
1775 {
1776 /* We know that there is only one extra character in UTF-16. */
1777 *ptr = swap_uint16(*ptr);
1778 ptr++;
1779 }
1780 }
1781 utf16_char = FALSE;
1782 #endif /* SUPPORT_UTF */
1783
1784 /* Get next opcode. */
1785
1786 length = 0;
1787 op = *ptr;
1788 *ptr++ = swap_uint16(op);
1789
1790 switch (op)
1791 {
1792 case OP_END:
1793 return;
1794
1795 #ifdef SUPPORT_UTF
1796 case OP_CHAR:
1797 case OP_CHARI:
1798 case OP_NOT:
1799 case OP_NOTI:
1800 case OP_STAR:
1801 case OP_MINSTAR:
1802 case OP_PLUS:
1803 case OP_MINPLUS:
1804 case OP_QUERY:
1805 case OP_MINQUERY:
1806 case OP_UPTO:
1807 case OP_MINUPTO:
1808 case OP_EXACT:
1809 case OP_POSSTAR:
1810 case OP_POSPLUS:
1811 case OP_POSQUERY:
1812 case OP_POSUPTO:
1813 case OP_STARI:
1814 case OP_MINSTARI:
1815 case OP_PLUSI:
1816 case OP_MINPLUSI:
1817 case OP_QUERYI:
1818 case OP_MINQUERYI:
1819 case OP_UPTOI:
1820 case OP_MINUPTOI:
1821 case OP_EXACTI:
1822 case OP_POSSTARI:
1823 case OP_POSPLUSI:
1824 case OP_POSQUERYI:
1825 case OP_POSUPTOI:
1826 case OP_NOTSTAR:
1827 case OP_NOTMINSTAR:
1828 case OP_NOTPLUS:
1829 case OP_NOTMINPLUS:
1830 case OP_NOTQUERY:
1831 case OP_NOTMINQUERY:
1832 case OP_NOTUPTO:
1833 case OP_NOTMINUPTO:
1834 case OP_NOTEXACT:
1835 case OP_NOTPOSSTAR:
1836 case OP_NOTPOSPLUS:
1837 case OP_NOTPOSQUERY:
1838 case OP_NOTPOSUPTO:
1839 case OP_NOTSTARI:
1840 case OP_NOTMINSTARI:
1841 case OP_NOTPLUSI:
1842 case OP_NOTMINPLUSI:
1843 case OP_NOTQUERYI:
1844 case OP_NOTMINQUERYI:
1845 case OP_NOTUPTOI:
1846 case OP_NOTMINUPTOI:
1847 case OP_NOTEXACTI:
1848 case OP_NOTPOSSTARI:
1849 case OP_NOTPOSPLUSI:
1850 case OP_NOTPOSQUERYI:
1851 case OP_NOTPOSUPTOI:
1852 if (utf) utf16_char = TRUE;
1853 #endif
1854 /* Fall through. */
1855
1856 default:
1857 length = OP_lengths16[op] - 1;
1858 break;
1859
1860 case OP_CLASS:
1861 case OP_NCLASS:
1862 /* Skip the character bit map. */
1863 ptr += 32/sizeof(pcre_uint16);
1864 length = 0;
1865 break;
1866
1867 case OP_XCLASS:
1868 /* Reverse the size of the XCLASS instance. */
1869 ptr++;
1870 *ptr = swap_uint16(*ptr);
1871 if (LINK_SIZE > 1)
1872 {
1873 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1874 ptr++;
1875 *ptr = swap_uint16(*ptr);
1876 }
1877 ptr++;
1878
1879 if (LINK_SIZE > 1)
1880 length = ((ptr[-LINK_SIZE] << 16) | ptr[-LINK_SIZE + 1]) -
1881 (1 + LINK_SIZE + 1);
1882 else
1883 length = ptr[-LINK_SIZE] - (1 + LINK_SIZE + 1);
1884
1885 op = *ptr;
1886 *ptr = swap_uint16(op);
1887 if ((op & XCL_MAP) != 0)
1888 {
1889 /* Skip the character bit map. */
1890 ptr += 32/sizeof(pcre_uint16);
1891 length -= 32/sizeof(pcre_uint16);
1892 }
1893 break;
1894 }
1895 }
1896 /* Control should never reach here in 16 bit mode. */
1897 #endif /* SUPPORT_PCRE16 */
1898 }
1899
1900
1901
1902 /*************************************************
1903 * Check match or recursion limit *
1904 *************************************************/
1905
1906 static int
1907 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1908 int start_offset, int options, int *use_offsets, int use_size_offsets,
1909 int flag, unsigned long int *limit, int errnumber, const char *msg)
1910 {
1911 int count;
1912 int min = 0;
1913 int mid = 64;
1914 int max = -1;
1915
1916 extra->flags |= flag;
1917
1918 for (;;)
1919 {
1920 *limit = mid;
1921
1922 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1923 use_offsets, use_size_offsets);
1924
1925 if (count == errnumber)
1926 {
1927 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1928 min = mid;
1929 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1930 }
1931
1932 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1933 count == PCRE_ERROR_PARTIAL)
1934 {
1935 if (mid == min + 1)
1936 {
1937 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1938 break;
1939 }
1940 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1941 max = mid;
1942 mid = (min + mid)/2;
1943 }
1944 else break; /* Some other error */
1945 }
1946
1947 extra->flags &= ~flag;
1948 return count;
1949 }
1950
1951
1952
1953 /*************************************************
1954 * Case-independent strncmp() function *
1955 *************************************************/
1956
1957 /*
1958 Arguments:
1959 s first string
1960 t second string
1961 n number of characters to compare
1962
1963 Returns: < 0, = 0, or > 0, according to the comparison
1964 */
1965
1966 static int
1967 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1968 {
1969 while (n--)
1970 {
1971 int c = tolower(*s++) - tolower(*t++);
1972 if (c) return c;
1973 }
1974 return 0;
1975 }
1976
1977
1978
1979 /*************************************************
1980 * Check newline indicator *
1981 *************************************************/
1982
1983 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1984 a message and return 0 if there is no match.
1985
1986 Arguments:
1987 p points after the leading '<'
1988 f file for error message
1989
1990 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1991 */
1992
1993 static int
1994 check_newline(pcre_uint8 *p, FILE *f)
1995 {
1996 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1997 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1998 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1999 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2000 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2001 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2002 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2003 fprintf(f, "Unknown newline type at: <%s\n", p);
2004 return 0;
2005 }
2006
2007
2008
2009 /*************************************************
2010 * Usage function *
2011 *************************************************/
2012
2013 static void
2014 usage(void)
2015 {
2016 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2017 printf("Input and output default to stdin and stdout.\n");
2018 #ifdef SUPPORT_LIBREADLINE
2019 printf("If input is a terminal, readline() is used to read from it.\n");
2020 #else
2021 printf("This version of pcretest is not linked with readline().\n");
2022 #endif
2023 printf("\nOptions:\n");
2024 #ifdef SUPPORT_PCRE16
2025 printf(" -16 use 16-bit interface\n");
2026 #endif
2027 printf(" -b show compiled code (bytecode)\n");
2028 printf(" -C show PCRE compile-time options and exit\n");
2029 printf(" -d debug: show compiled code and information (-b and -i)\n");
2030 #if !defined NODFA
2031 printf(" -dfa force DFA matching for all subjects\n");
2032 #endif
2033 printf(" -help show usage information\n");
2034 printf(" -i show information about compiled patterns\n"
2035 " -M find MATCH_LIMIT minimum for each subject\n"
2036 " -m output memory used information\n"
2037 " -o <n> set size of offsets vector to <n>\n");
2038 #if !defined NOPOSIX
2039 printf(" -p use POSIX interface\n");
2040 #endif
2041 printf(" -q quiet: do not output PCRE version number at start\n");
2042 printf(" -S <n> set stack size to <n> megabytes\n");
2043 printf(" -s force each pattern to be studied at basic level\n"
2044 " -s+ force each pattern to be studied, using JIT if available\n"
2045 " -t time compilation and execution\n");
2046 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2047 printf(" -tm time execution (matching) only\n");
2048 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2049 }
2050
2051
2052
2053 /*************************************************
2054 * Main Program *
2055 *************************************************/
2056
2057 /* Read lines from named file or stdin and write to named file or stdout; lines
2058 consist of a regular expression, in delimiters and optionally followed by
2059 options, followed by a set of test data, terminated by an empty line. */
2060
2061 int main(int argc, char **argv)
2062 {
2063 FILE *infile = stdin;
2064 const char *version;
2065 int options = 0;
2066 int study_options = 0;
2067 int default_find_match_limit = FALSE;
2068 int op = 1;
2069 int timeit = 0;
2070 int timeitm = 0;
2071 int showinfo = 0;
2072 int showstore = 0;
2073 int force_study = -1;
2074 int force_study_options = 0;
2075 int quiet = 0;
2076 int size_offsets = 45;
2077 int size_offsets_max;
2078 int *offsets = NULL;
2079 #if !defined NOPOSIX
2080 int posix = 0;
2081 #endif
2082 int debug = 0;
2083 int done = 0;
2084 int all_use_dfa = 0;
2085 int yield = 0;
2086 int stack_size;
2087
2088 pcre_jit_stack *jit_stack = NULL;
2089
2090 /* These vectors store, end-to-end, a list of zero-terminated captured
2091 substring names, each list itself being terminated by an empty name. Assume
2092 that 1024 is plenty long enough for the few names we'll be testing. It is
2093 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2094 for the actual memory, to ensure alignment. By defining these variables always
2095 (whether or not 8-bit or 16-bit is supported), we avoid too much mess with
2096 #ifdefs in the code. */
2097
2098 pcre_uint16 copynames[1024];
2099 pcre_uint16 getnames[1024];
2100
2101 pcre_uint16 *cn16ptr;
2102 pcre_uint16 *gn16ptr;
2103
2104 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2105 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2106 pcre_uint8 *cn8ptr;
2107 pcre_uint8 *gn8ptr;
2108
2109 /* Get buffers from malloc() so that valgrind will check their misuse when
2110 debugging. They grow automatically when very long lines are read. The 16-bit
2111 buffer (buffer16) is obtained only if needed. */
2112
2113 buffer = (pcre_uint8 *)malloc(buffer_size);
2114 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2115 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2116
2117 /* The outfile variable is static so that new_malloc can use it. */
2118
2119 outfile = stdout;
2120
2121 /* The following _setmode() stuff is some Windows magic that tells its runtime
2122 library to translate CRLF into a single LF character. At least, that's what
2123 I've been told: never having used Windows I take this all on trust. Originally
2124 it set 0x8000, but then I was advised that _O_BINARY was better. */
2125
2126 #if defined(_WIN32) || defined(WIN32)
2127 _setmode( _fileno( stdout ), _O_BINARY );
2128 #endif
2129
2130 /* Get the version number: both pcre_version() and pcre16_version() give the
2131 same answer. We just need to ensure that we call one that is availab.e */
2132
2133 #ifdef SUPPORT_PCRE8
2134 version = pcre_version();
2135 #else
2136 version = pcre16_version();
2137 #endif
2138
2139 /* Scan options */
2140
2141 while (argc > 1 && argv[op][0] == '-')
2142 {
2143 pcre_uint8 *endptr;
2144
2145 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2146 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2147 else if (strcmp(argv[op], "-s+") == 0)
2148 {
2149 force_study = 1;
2150 force_study_options = PCRE_STUDY_JIT_COMPILE;
2151 }
2152 else if (strcmp(argv[op], "-16") == 0)
2153 {
2154 #ifdef SUPPORT_PCRE16
2155 use_pcre16 = 1;
2156 #else
2157 printf("** This version of PCRE was built without 16-bit support\n");
2158 exit(1);
2159 #endif
2160 }
2161 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2162 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2163 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2164 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2165 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2166 #if !defined NODFA
2167 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2168 #endif
2169 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2170 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2171 *endptr == 0))
2172 {
2173 op++;
2174 argc--;
2175 }
2176 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2177 {
2178 int both = argv[op][2] == 0;
2179 int temp;
2180 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2181 *endptr == 0))
2182 {
2183 timeitm = temp;
2184 op++;
2185 argc--;
2186 }
2187 else timeitm = LOOPREPEAT;
2188 if (both) timeit = timeitm;
2189 }
2190 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2191 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2192 *endptr == 0))
2193 {
2194 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2195 printf("PCRE: -S not supported on this OS\n");
2196 exit(1);
2197 #else
2198 int rc;
2199 struct rlimit rlim;
2200 getrlimit(RLIMIT_STACK, &rlim);
2201 rlim.rlim_cur = stack_size * 1024 * 1024;
2202 rc = setrlimit(RLIMIT_STACK, &rlim);
2203 if (rc != 0)
2204 {
2205 printf("PCRE: setrlimit() failed with error %d\n", rc);
2206 exit(1);
2207 }
2208 op++;
2209 argc--;
2210 #endif
2211 }
2212 #if !defined NOPOSIX
2213 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2214 #endif
2215 else if (strcmp(argv[op], "-C") == 0)
2216 {
2217 int rc;
2218 unsigned long int lrc;
2219 printf("PCRE version %s\n", version);
2220 printf("Compiled with\n");
2221
2222 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2223 are set, either both UTFs are supported or both are not supported. */
2224
2225 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2226 printf(" 8-bit and 16-bit support\n");
2227 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2228 if (rc)
2229 printf(" UTF-8 and UTF-16 support\n");
2230 else
2231 printf(" No UTF-8 or UTF-16 support\n");
2232 #elif defined SUPPORT_PCRE8
2233 printf(" 8-bit support only\n");
2234 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2235 printf(" %sUTF-8 support\n", rc? "" : "No ");
2236 #else
2237 printf(" 16-bit support only\n");
2238 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2239 printf(" %sUTF-16 support\n", rc? "" : "No ");
2240 #endif
2241
2242 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2243 printf(" %sUnicode properties support\n", rc? "" : "No ");
2244 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2245 if (rc)
2246 printf(" Just-in-time compiler support\n");
2247 else
2248 printf(" No just-in-time compiler support\n");
2249 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2250 /* Note that these values are always the ASCII values, even
2251 in EBCDIC environments. CR is 13 and NL is 10. */
2252 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2253 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2254 (rc == -2)? "ANYCRLF" :
2255 (rc == -1)? "ANY" : "???");
2256 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2257 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2258 "all Unicode newlines");
2259 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2260 printf(" Internal link size = %d\n", rc);
2261 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2262 printf(" POSIX malloc threshold = %d\n", rc);
2263 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2264 printf(" Default match limit = %ld\n", lrc);
2265 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2266 printf(" Default recursion depth limit = %ld\n", lrc);
2267 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2268 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
2269 goto EXIT;
2270 }
2271 else if (strcmp(argv[op], "-help") == 0 ||
2272 strcmp(argv[op], "--help") == 0)
2273 {
2274 usage();
2275 goto EXIT;
2276 }
2277 else
2278 {
2279 printf("** Unknown or malformed option %s\n", argv[op]);
2280 usage();
2281 yield = 1;
2282 goto EXIT;
2283 }
2284 op++;
2285 argc--;
2286 }
2287
2288 /* Get the store for the offsets vector, and remember what it was */
2289
2290 size_offsets_max = size_offsets;
2291 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2292 if (offsets == NULL)
2293 {
2294 printf("** Failed to get %d bytes of memory for offsets vector\n",
2295 (int)(size_offsets_max * sizeof(int)));
2296 yield = 1;
2297 goto EXIT;
2298 }
2299
2300 /* Sort out the input and output files */
2301
2302 if (argc > 1)
2303 {
2304 infile = fopen(argv[op], INPUT_MODE);
2305 if (infile == NULL)
2306 {
2307 printf("** Failed to open %s\n", argv[op]);
2308 yield = 1;
2309 goto EXIT;
2310 }
2311 }
2312
2313 if (argc > 2)
2314 {
2315 outfile = fopen(argv[op+1], OUTPUT_MODE);
2316 if (outfile == NULL)
2317 {
2318 printf("** Failed to open %s\n", argv[op+1]);
2319 yield = 1;
2320 goto EXIT;
2321 }
2322 }
2323
2324 /* Set alternative malloc function */
2325
2326 #ifdef SUPPORT_PCRE8
2327 pcre_malloc = new_malloc;
2328 pcre_free = new_free;
2329 pcre_stack_malloc = stack_malloc;
2330 pcre_stack_free = stack_free;
2331 #endif
2332
2333 #ifdef SUPPORT_PCRE16
2334 pcre16_malloc = new_malloc;
2335 pcre16_free = new_free;
2336 pcre16_stack_malloc = stack_malloc;
2337 pcre16_stack_free = stack_free;
2338 #endif
2339
2340 /* Heading line unless quiet, then prompt for first regex if stdin */
2341
2342 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2343
2344 /* Main loop */
2345
2346 while (!done)
2347 {
2348 pcre *re = NULL;
2349 pcre_extra *extra = NULL;
2350
2351 #if !defined NOPOSIX /* There are still compilers that require no indent */
2352 regex_t preg;
2353 int do_posix = 0;
2354 #endif
2355
2356 const char *error;
2357 pcre_uint8 *markptr;
2358 pcre_uint8 *p, *pp, *ppp;
2359 pcre_uint8 *to_file = NULL;
2360 const pcre_uint8 *tables = NULL;
2361 unsigned long int true_size, true_study_size = 0;
2362 size_t size, regex_gotten_store;
2363 int do_allcaps = 0;
2364 int do_mark = 0;
2365 int do_study = 0;
2366 int no_force_study = 0;
2367 int do_debug = debug;
2368 int do_G = 0;
2369 int do_g = 0;
2370 int do_showinfo = showinfo;
2371 int do_showrest = 0;
2372 int do_showcaprest = 0;
2373 int do_flip = 0;
2374 int erroroffset, len, delimiter, poffset;
2375
2376 use_utf = 0;
2377 debug_lengths = 1;
2378
2379 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2380 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2381 fflush(outfile);
2382
2383 p = buffer;
2384 while (isspace(*p)) p++;
2385 if (*p == 0) continue;
2386
2387 /* See if the pattern is to be loaded pre-compiled from a file. */
2388
2389 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2390 {
2391 unsigned long int magic, get_options;
2392 pcre_uint8 sbuf[8];
2393 FILE *f;
2394
2395 p++;
2396 pp = p + (int)strlen((char *)p);
2397 while (isspace(pp[-1])) pp--;
2398 *pp = 0;
2399
2400 f = fopen((char *)p, "rb");
2401 if (f == NULL)
2402 {
2403 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2404 continue;
2405 }
2406
2407 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2408
2409 true_size =
2410 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2411 true_study_size =
2412 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2413
2414 re = (real_pcre *)new_malloc(true_size);
2415 regex_gotten_store = first_gotten_store;
2416
2417 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2418
2419 magic = ((real_pcre *)re)->magic_number;
2420 if (magic != MAGIC_NUMBER)
2421 {
2422 if (swap_uint32(magic) == MAGIC_NUMBER)
2423 {
2424 do_flip = 1;
2425 }
2426 else
2427 {
2428 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2429 fclose(f);
2430 continue;
2431 }
2432 }
2433
2434 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2435 do_flip? " (byte-inverted)" : "", p);
2436
2437 /* Now see if there is any following study data. */
2438
2439 if (true_study_size != 0)
2440 {
2441 pcre_study_data *psd;
2442
2443 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2444 extra->flags = PCRE_EXTRA_STUDY_DATA;
2445
2446 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2447 extra->study_data = psd;
2448
2449 if (fread(psd, 1, true_study_size, f) != true_study_size)
2450 {
2451 FAIL_READ:
2452 fprintf(outfile, "Failed to read data from %s\n", p);
2453 if (extra != NULL)
2454 {
2455 PCRE_FREE_STUDY(extra);
2456 }
2457 if (re != NULL) new_free(re);
2458 fclose(f);
2459 continue;
2460 }
2461 fprintf(outfile, "Study data loaded from %s\n", p);
2462 do_study = 1; /* To get the data output if requested */
2463 }
2464 else fprintf(outfile, "No study data\n");
2465
2466 /* Flip the necessary bytes. */
2467 if (do_flip)
2468 {
2469 PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
2470 }
2471
2472 /* Need to know if UTF-8 for printing data strings */
2473
2474 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2475 use_utf = (get_options & PCRE_UTF8) != 0;
2476
2477 fclose(f);
2478 goto SHOW_INFO;
2479 }
2480
2481 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2482 the pattern; if it isn't complete, read more. */
2483
2484 delimiter = *p++;
2485
2486 if (isalnum(delimiter) || delimiter == '\\')
2487 {
2488 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2489 goto SKIP_DATA;
2490 }
2491
2492 pp = p;
2493 poffset = (int)(p - buffer);
2494
2495 for(;;)
2496 {
2497 while (*pp != 0)
2498 {
2499 if (*pp == '\\' && pp[1] != 0) pp++;
2500 else if (*pp == delimiter) break;
2501 pp++;
2502 }
2503 if (*pp != 0) break;
2504 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2505 {
2506 fprintf(outfile, "** Unexpected EOF\n");
2507 done = 1;
2508 goto CONTINUE;
2509 }
2510 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2511 }
2512
2513 /* The buffer may have moved while being extended; reset the start of data
2514 pointer to the correct relative point in the buffer. */
2515
2516 p = buffer + poffset;
2517
2518 /* If the first character after the delimiter is backslash, make
2519 the pattern end with backslash. This is purely to provide a way
2520 of testing for the error message when a pattern ends with backslash. */
2521
2522 if (pp[1] == '\\') *pp++ = '\\';
2523
2524 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2525 for callouts. */
2526
2527 *pp++ = 0;
2528 strcpy((char *)pbuffer, (char *)p);
2529
2530 /* Look for options after final delimiter */
2531
2532 options = 0;
2533 study_options = 0;
2534 log_store = showstore; /* default from command line */
2535
2536 while (*pp != 0)
2537 {
2538 switch (*pp++)
2539 {
2540 case 'f': options |= PCRE_FIRSTLINE; break;
2541 case 'g': do_g = 1; break;
2542 case 'i': options |= PCRE_CASELESS; break;
2543 case 'm': options |= PCRE_MULTILINE; break;
2544 case 's': options |= PCRE_DOTALL; break;
2545 case 'x': options |= PCRE_EXTENDED; break;
2546
2547 case '+':
2548 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2549 break;
2550
2551 case '=': do_allcaps = 1; break;
2552 case 'A': options |= PCRE_ANCHORED; break;
2553 case 'B': do_debug = 1; break;
2554 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2555 case 'D': do_debug = do_showinfo = 1; break;
2556 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2557 case 'F': do_flip = 1; break;
2558 case 'G': do_G = 1; break;
2559 case 'I': do_showinfo = 1; break;
2560 case 'J': options |= PCRE_DUPNAMES; break;
2561 case 'K': do_mark = 1; break;
2562 case 'M': log_store = 1; break;
2563 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2564
2565 #if !defined NOPOSIX
2566 case 'P': do_posix = 1; break;
2567 #endif
2568
2569 case 'S':
2570 if (do_study == 0)
2571 {
2572 do_study = 1;
2573 if (*pp == '+')
2574 {
2575 study_options |= PCRE_STUDY_JIT_COMPILE;
2576 pp++;
2577 }
2578 }
2579 else
2580 {
2581 do_study = 0;
2582 no_force_study = 1;
2583 }
2584 break;
2585
2586 case 'U': options |= PCRE_UNGREEDY; break;
2587 case 'W': options |= PCRE_UCP; break;
2588 case 'X': options |= PCRE_EXTRA; break;
2589 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2590 case 'Z': debug_lengths = 0; break;
2591 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2592 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2593
2594 case 'T':
2595 switch (*pp++)
2596 {
2597 case '0': tables = tables0; break;
2598 case '1': tables = tables1; break;
2599
2600 case '\r':
2601 case '\n':
2602 case ' ':
2603 case 0:
2604 fprintf(outfile, "** Missing table number after /T\n");
2605 goto SKIP_DATA;
2606
2607 default:
2608 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2609 goto SKIP_DATA;
2610 }
2611 break;
2612
2613 case 'L':
2614 ppp = pp;
2615 /* The '\r' test here is so that it works on Windows. */
2616 /* The '0' test is just in case this is an unterminated line. */
2617 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2618 *ppp = 0;
2619 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2620 {
2621 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2622 goto SKIP_DATA;
2623 }
2624 locale_set = 1;
2625 tables = PCRE_MAKETABLES;
2626 pp = ppp;
2627 break;
2628
2629 case '>':
2630 to_file = pp;
2631 while (*pp != 0) pp++;
2632 while (isspace(pp[-1])) pp--;
2633 *pp = 0;
2634 break;
2635
2636 case '<':
2637 {
2638 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2639 {
2640 options |= PCRE_JAVASCRIPT_COMPAT;
2641 pp += 3;
2642 }
2643 else
2644 {
2645 int x = check_newline(pp, outfile);
2646 if (x == 0) goto SKIP_DATA;
2647 options |= x;
2648 while (*pp++ != '>');
2649 }
2650 }
2651 break;
2652
2653 case '\r': /* So that it works in Windows */
2654 case '\n':
2655 case ' ':
2656 break;
2657
2658 default:
2659 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2660 goto SKIP_DATA;
2661 }
2662 }
2663
2664 /* Handle compiling via the POSIX interface, which doesn't support the
2665 timing, showing, or debugging options, nor the ability to pass over
2666 local character tables. Neither does it have 16-bit support. */
2667
2668 #if !defined NOPOSIX
2669 if (posix || do_posix)
2670 {
2671 int rc;
2672 int cflags = 0;
2673
2674 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2675 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2676 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2677 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2678 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2679 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2680 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2681
2682 first_gotten_store = 0;
2683 rc = regcomp(&preg, (char *)p, cflags);
2684
2685 /* Compilation failed; go back for another re, skipping to blank line
2686 if non-interactive. */
2687
2688 if (rc != 0)
2689 {
2690 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2691 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2692 goto SKIP_DATA;
2693 }
2694 }
2695
2696 /* Handle compiling via the native interface */
2697
2698 else
2699 #endif /* !defined NOPOSIX */
2700
2701 {
2702 unsigned long int get_options;
2703
2704 /* In 16-bit mode, convert the input. */
2705
2706 #ifdef SUPPORT_PCRE16
2707 if (use_pcre16)
2708 {
2709 if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2710 {
2711 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2712 "converted to UTF-16\n");
2713 goto SKIP_DATA;
2714 }
2715 p = (pcre_uint8 *)buffer16;
2716 }
2717 #endif
2718
2719 /* Compile many times when timing */
2720
2721 if (timeit > 0)
2722 {
2723 register int i;
2724 clock_t time_taken;
2725 clock_t start_time = clock();
2726 for (i = 0; i < timeit; i++)
2727 {
2728 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2729 if (re != NULL) free(re);
2730 }
2731 time_taken = clock() - start_time;
2732 fprintf(outfile, "Compile time %.4f milliseconds\n",
2733 (((double)time_taken * 1000.0) / (double)timeit) /
2734 (double)CLOCKS_PER_SEC);
2735 }
2736
2737 first_gotten_store = 0;
2738 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2739
2740 /* Compilation failed; go back for another re, skipping to blank line
2741 if non-interactive. */
2742
2743 if (re == NULL)
2744 {
2745 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2746 SKIP_DATA:
2747 if (infile != stdin)
2748 {
2749 for (;;)
2750 {
2751 if (extend_inputline(infile, buffer, NULL) == NULL)
2752 {
2753 done = 1;
2754 goto CONTINUE;
2755 }
2756 len = (int)strlen((char *)buffer);
2757 while (len > 0 && isspace(buffer[len-1])) len--;
2758 if (len == 0) break;
2759 }
2760 fprintf(outfile, "\n");
2761 }
2762 goto CONTINUE;
2763 }
2764
2765 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2766 within the regex; check for this so that we know how to process the data
2767 lines. */
2768
2769 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2770 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2771
2772 /* Extract the size for possible writing before possibly flipping it,
2773 and remember the store that was got. */
2774
2775 true_size = ((real_pcre *)re)->size;
2776 regex_gotten_store = first_gotten_store;
2777
2778 /* Output code size information if requested */
2779
2780 if (log_store)
2781 fprintf(outfile, "Memory allocation (code space): %d\n",
2782 (int)(first_gotten_store -
2783 sizeof(real_pcre) -
2784 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2785
2786 /* If -s or /S was present, study the regex to generate additional info to
2787 help with the matching, unless the pattern has the SS option, which
2788 suppresses the effect of /S (used for a few test patterns where studying is
2789 never sensible). */
2790
2791 if (do_study || (force_study >= 0 && !no_force_study))
2792 {
2793 if (timeit > 0)
2794 {
2795 register int i;
2796 clock_t time_taken;
2797 clock_t start_time = clock();
2798 for (i = 0; i < timeit; i++)
2799 {
2800 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2801 }
2802 time_taken = clock() - start_time;
2803 if (extra != NULL)
2804 {
2805 PCRE_FREE_STUDY(extra);
2806 }
2807 fprintf(outfile, " Study time %.4f milliseconds\n",
2808 (((double)time_taken * 1000.0) / (double)timeit) /
2809 (double)CLOCKS_PER_SEC);
2810 }
2811 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2812 if (error != NULL)
2813 fprintf(outfile, "Failed to study: %s\n", error);
2814 else if (extra != NULL)
2815 {
2816 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2817 if (log_store)
2818 {
2819 size_t jitsize;
2820 new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2821 if (jitsize != 0)
2822 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
2823 }
2824 }
2825 }
2826
2827 /* If /K was present, we set up for handling MARK data. */
2828
2829 if (do_mark)
2830 {
2831 if (extra == NULL)
2832 {
2833 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2834 extra->flags = 0;
2835 }
2836 extra->mark = &markptr;
2837 extra->flags |= PCRE_EXTRA_MARK;
2838 }
2839
2840 /* Extract and display information from the compiled data if required. */
2841
2842 SHOW_INFO:
2843
2844 if (do_debug)
2845 {
2846 fprintf(outfile, "------------------------------------------------------------------\n");
2847 PCRE_PRINTINT(re, outfile, debug_lengths);
2848 }
2849
2850 /* We already have the options in get_options (see above) */
2851
2852 if (do_showinfo)
2853 {
2854 unsigned long int all_options;
2855 int count, backrefmax, first_char, need_char, okpartial, jchanged,
2856 hascrorlf;
2857 int nameentrysize, namecount;
2858 const pcre_uchar *nametable;
2859
2860 new_info(re, NULL, PCRE_INFO_SIZE, &size);
2861 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2862 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
2863 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
2864 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
2865 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2866 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2867 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2868 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2869 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2870 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2871
2872 if (size != regex_gotten_store) fprintf(outfile,
2873 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2874 (int)size, (int)regex_gotten_store);
2875
2876 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2877 if (backrefmax > 0)
2878 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2879
2880 if (namecount > 0)
2881 {
2882 fprintf(outfile, "Named capturing subpatterns:\n");
2883 while (namecount-- > 0)
2884 {
2885 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2886 int imm2_size = use_pcre16 ? 1 : 2;
2887 #else
2888 int imm2_size = IMM2_SIZE;
2889 #endif
2890 int length = (int)STRLEN(nametable + imm2_size);
2891 fprintf(outfile, " ");
2892 PCHARSV(nametable, imm2_size, length, outfile);
2893 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
2894 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2895 fprintf(outfile, "%3d\n", use_pcre16?
2896 (int)(((PCRE_SPTR16)nametable)[0])
2897 :((int)nametable[0] << 8) | (int)nametable[1]);
2898 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
2899 #else
2900 fprintf(outfile, "%3d\n", GET2(nametable, 0));
2901 nametable += nameentrysize;
2902 #endif
2903 }
2904 }
2905
2906 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2907 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2908
2909 all_options = ((real_pcre *)re)->options;
2910 if (do_flip) all_options = swap_uint32(all_options);
2911
2912 if (get_options == 0) fprintf(outfile, "No options\n");
2913 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2914 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2915 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2916 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2917 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2918 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2919 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2920 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2921 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2922 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2923 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2924 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2925 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2926 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2927 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2928 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2929 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2930 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2931
2932 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2933
2934 switch (get_options & PCRE_NEWLINE_BITS)
2935 {
2936 case PCRE_NEWLINE_CR:
2937 fprintf(outfile, "Forced newline sequence: CR\n");
2938 break;
2939
2940 case PCRE_NEWLINE_LF:
2941 fprintf(outfile, "Forced newline sequence: LF\n");
2942 break;
2943
2944 case PCRE_NEWLINE_CRLF:
2945 fprintf(outfile, "Forced newline sequence: CRLF\n");
2946 break;
2947
2948 case PCRE_NEWLINE_ANYCRLF:
2949 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2950 break;
2951
2952 case PCRE_NEWLINE_ANY:
2953 fprintf(outfile, "Forced newline sequence: ANY\n");
2954 break;
2955
2956 default:
2957 break;
2958 }
2959
2960 if (first_char == -1)
2961 {
2962 fprintf(outfile, "First char at start or follows newline\n");
2963 }
2964 else if (first_char < 0)
2965 {
2966 fprintf(outfile, "No first char\n");
2967 }
2968 else
2969 {
2970 const char *caseless =
2971 ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2972 "" : " (caseless)";
2973
2974 if (PRINTOK(first_char))
2975 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2976 else
2977 {
2978 fprintf(outfile, "First char = ");
2979 pchar(first_char, outfile);
2980 fprintf(outfile, "%s\n", caseless);
2981 }
2982 }
2983
2984 if (need_char < 0)
2985 {
2986 fprintf(outfile, "No need char\n");
2987 }
2988 else
2989 {
2990 const char *caseless =
2991 ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2992 "" : " (caseless)";
2993
2994 if (PRINTOK(need_char))
2995 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2996 else
2997 {
2998 fprintf(outfile, "Need char = ");
2999 pchar(need_char, outfile);
3000 fprintf(outfile, "%s\n", caseless);
3001 }
3002 }
3003
3004 /* Don't output study size; at present it is in any case a fixed
3005 value, but it varies, depending on the computer architecture, and
3006 so messes up the test suite. (And with the /F option, it might be
3007 flipped.) If study was forced by an external -s, don't show this
3008 information unless -i or -d was also present. This means that, except
3009 when auto-callouts are involved, the output from runs with and without
3010 -s should be identical. */
3011
3012 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3013 {
3014 if (extra == NULL)
3015 fprintf(outfile, "Study returned NULL\n");
3016 else
3017 {
3018 pcre_uint8 *start_bits = NULL;
3019 int minlength;
3020
3021 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
3022 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3023
3024 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
3025 if (start_bits == NULL)
3026 fprintf(outfile, "No set of starting bytes\n");
3027 else
3028 {
3029 int i;
3030 int c = 24;
3031 fprintf(outfile, "Starting byte set: ");
3032 for (i = 0; i < 256; i++)
3033 {
3034 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3035 {
3036 if (c > 75)
3037 {
3038 fprintf(outfile, "\n ");
3039 c = 2;
3040 }
3041 if (PRINTOK(i) && i != ' ')
3042 {
3043 fprintf(outfile, "%c ", i);
3044 c += 2;
3045 }
3046 else
3047 {
3048 fprintf(outfile, "\\x%02x ", i);
3049 c += 5;
3050 }
3051 }
3052 }
3053 fprintf(outfile, "\n");
3054 }
3055 }
3056
3057 /* Show this only if the JIT was set by /S, not by -s. */
3058
3059 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3060 {
3061 int jit;
3062 new_info(re, extra, PCRE_INFO_JIT, &jit);
3063 if (jit)
3064 fprintf(outfile, "JIT study was successful\n");
3065 else
3066 #ifdef SUPPORT_JIT
3067 fprintf(outfile, "JIT study was not successful\n");
3068 #else
3069 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3070 #endif
3071 }
3072 }
3073 }
3074
3075 /* If the '>' option was present, we write out the regex to a file, and
3076 that is all. The first 8 bytes of the file are the regex length and then
3077 the study length, in big-endian order. */
3078
3079 if (to_file != NULL)
3080 {
3081 FILE *f = fopen((char *)to_file, "wb");
3082 if (f == NULL)
3083 {
3084 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3085 }
3086 else
3087 {
3088 pcre_uint8 sbuf[8];
3089
3090 if (do_flip) regexflip(re, extra);
3091 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3092 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3093 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3094 sbuf[3] = (pcre_uint8)((true_size) & 255);
3095 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3096 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3097 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3098 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3099
3100 if (fwrite(sbuf, 1, 8, f) < 8 ||
3101 fwrite(re, 1, true_size, f) < true_size)
3102 {
3103 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3104 }
3105 else
3106 {
3107 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3108
3109 /* If there is study data, write it. */
3110
3111 if (extra != NULL)
3112 {
3113 if (fwrite(extra->study_data, 1, true_study_size, f) <
3114 true_study_size)
3115 {
3116 fprintf(outfile, "Write error on %s: %s\n", to_file,
3117 strerror(errno));
3118 }
3119 else fprintf(outfile, "Study data written to %s\n", to_file);
3120 }
3121 }
3122 fclose(f);
3123 }
3124
3125 new_free(re);
3126 if (extra != NULL)
3127 {
3128 PCRE_FREE_STUDY(extra);
3129 }
3130 if (locale_set)
3131 {
3132 new_free((void *)tables);
3133 setlocale(LC_CTYPE, "C");
3134 locale_set = 0;
3135 }
3136 continue; /* With next regex */
3137 }
3138 } /* End of non-POSIX compile */
3139
3140 /* Read data lines and test them */
3141
3142 for (;;)
3143 {
3144 pcre_uint8 *q;
3145 pcre_uint8 *bptr;
3146 int *use_offsets = offsets;
3147 int use_size_offsets = size_offsets;
3148 int callout_data = 0;
3149 int callout_data_set = 0;
3150 int count, c;
3151 int copystrings = 0;
3152 int find_match_limit = default_find_match_limit;
3153 int getstrings = 0;
3154 int getlist = 0;
3155 int gmatched = 0;
3156 int start_offset = 0;
3157 int start_offset_sign = 1;
3158 int g_notempty = 0;
3159 int use_dfa = 0;
3160
3161 *copynames = 0;
3162 *getnames = 0;
3163
3164 cn16ptr = copynames;
3165 gn16ptr = getnames;
3166 cn8ptr = copynames8;
3167 gn8ptr = getnames8;
3168
3169 SET_PCRE_CALLOUT(callout);
3170 first_callout = 1;
3171 last_callout_mark = NULL;
3172 callout_extra = 0;
3173 callout_count = 0;
3174 callout_fail_count = 999999;
3175 callout_fail_id = -1;
3176 show_malloc = 0;
3177 options = 0;
3178
3179 if (extra != NULL) extra->flags &=
3180 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3181
3182 len = 0;
3183 for (;;)
3184 {
3185 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3186 {
3187 if (len > 0) /* Reached EOF without hitting a newline */
3188 {
3189 fprintf(outfile, "\n");
3190 break;
3191 }
3192 done = 1;
3193 goto CONTINUE;
3194 }
3195 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3196 len = (int)strlen((char *)buffer);
3197 if (buffer[len-1] == '\n') break;
3198 }
3199
3200 while (len > 0 && isspace(buffer[len-1])) len--;
3201 buffer[len] = 0;
3202 if (len == 0) break;
3203
3204 p = buffer;
3205 while (isspace(*p)) p++;
3206
3207 bptr = q = dbuffer;
3208 while ((c = *p++) != 0)
3209 {
3210 int i = 0;
3211 int n = 0;
3212
3213 if (c == '\\') switch ((c = *p++))
3214 {
3215 case 'a': c = 7; break;
3216 case 'b': c = '\b'; break;
3217 case 'e': c = 27; break;
3218 case 'f': c = '\f'; break;
3219 case 'n': c = '\n'; break;
3220 case 'r': c = '\r'; break;
3221 case 't': c = '\t'; break;
3222 case 'v': c = '\v'; break;
3223
3224 case '0': case '1': case '2': case '3':
3225 case '4': case '5': case '6': case '7':
3226 c -= '0';
3227 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3228 c = c * 8 + *p++ - '0';
3229
3230 #if !defined NOUTF8
3231 if (use_utf && c > 255)
3232 {
3233 pcre_uint8 buff8[8];
3234 int ii, utn;
3235 utn = ord2utf8(c, buff8);
3236 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
3237 c = buff8[ii]; /* Last byte */
3238 }
3239 #endif
3240 break;
3241
3242 case 'x':
3243
3244 /* Handle \x{..} specially - new Perl thing for utf8 */
3245
3246 #if !defined NOUTF8
3247 if (*p == '{')
3248 {
3249 pcre_uint8 *pt = p;
3250 c = 0;
3251
3252 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3253 when isxdigit() is a macro that refers to its argument more than
3254 once. This is banned by the C Standard, but apparently happens in at
3255 least one MacOS environment. */
3256
3257 for (pt++; isxdigit(*pt); pt++)
3258 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3259 if (*pt == '}')
3260 {
3261 pcre_uint8 buff8[8];
3262 int ii, utn;
3263 if (use_utf)
3264 {
3265 utn = ord2utf8(c, buff8);
3266 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
3267 c = buff8[ii]; /* Last byte */
3268 }
3269 else
3270 {
3271 if (c > 255)
3272 {
3273 if (use_pcre16)
3274 fprintf(outfile, "** Character \\x{%x} is greater than 255.\n"
3275 "** Because its input is first processed as 8-bit, pcretest "
3276 "does not\n** support such characters in 16-bit mode when "
3277 "UTF-16 is not set.\n", c);
3278 else
3279 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3280 "and UTF-8 mode is not enabled.\n", c);
3281
3282 fprintf(outfile, "** Truncation will probably give the wrong "
3283 "result.\n");
3284 }
3285 }
3286 p = pt + 1;
3287 break;
3288 }
3289 /* Not correct form; fall through */
3290 }
3291 #endif
3292
3293 /* Ordinary \x */
3294
3295 c = 0;
3296 while (i++ < 2 && isxdigit(*p))
3297 {
3298 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3299 p++;
3300 }
3301 break;
3302
3303 case 0: /* \ followed by EOF allows for an empty line */
3304 p--;
3305 continue;
3306
3307 case '>':
3308 if (*p == '-')
3309 {
3310 start_offset_sign = -1;
3311 p++;
3312 }
3313 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3314 start_offset *= start_offset_sign;
3315 continue;
3316
3317 case 'A': /* Option setting */
3318 options |= PCRE_ANCHORED;
3319 continue;
3320
3321 case 'B':
3322 options |= PCRE_NOTBOL;
3323 continue;
3324
3325 case 'C':
3326 if (isdigit(*p)) /* Set copy string */
3327 {
3328 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3329 copystrings |= 1 << n;
3330 }
3331 else if (isalnum(*p))
3332 {
3333 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3334 }
3335 else if (*p == '+')
3336 {
3337 callout_extra = 1;
3338 p++;
3339 }
3340 else if (*p == '-')
3341 {
3342 SET_PCRE_CALLOUT(NULL);
3343 p++;
3344 }
3345 else if (*p == '!')
3346 {
3347 callout_fail_id = 0;
3348 p++;
3349 while(isdigit(*p))
3350 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3351 callout_fail_count = 0;
3352 if (*p == '!')
3353 {
3354 p++;
3355 while(isdigit(*p))
3356 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3357 }
3358 }
3359 else if (*p == '*')
3360 {
3361 int sign = 1;
3362 callout_data = 0;
3363 if (*(++p) == '-') { sign = -1; p++; }
3364 while(isdigit(*p))
3365 callout_data = callout_data * 10 + *p++ - '0';
3366 callout_data *= sign;
3367 callout_data_set = 1;
3368 }
3369 continue;
3370
3371 #if !defined NODFA
3372 case 'D':
3373 #if !defined NOPOSIX
3374 if (posix || do_posix)
3375 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3376 else
3377 #endif
3378 use_dfa = 1;
3379 continue;
3380 #endif
3381
3382 #if !defined NODFA
3383 case 'F':
3384 options |= PCRE_DFA_SHORTEST;
3385 continue;
3386 #endif
3387
3388 case 'G':
3389 if (isdigit(*p))
3390 {
3391 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3392 getstrings |= 1 << n;
3393 }
3394 else if (isalnum(*p))
3395 {
3396 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3397 }
3398 continue;
3399
3400 case 'J':
3401 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3402 if (extra != NULL
3403 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3404 && extra->executable_jit != NULL)
3405 {
3406 if (jit_stack != NULL) PCRE_JIT_STACK_FREE(jit_stack);
3407 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3408 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3409 }
3410 continue;
3411
3412 case 'L':
3413 getlist = 1;
3414 continue;
3415
3416 case 'M':
3417 find_match_limit = 1;
3418 continue;
3419
3420 case 'N':
3421 if ((options & PCRE_NOTEMPTY) != 0)
3422 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3423 else
3424 options |= PCRE_NOTEMPTY;
3425 continue;
3426
3427 case 'O':
3428 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3429 if (n > size_offsets_max)
3430 {
3431 size_offsets_max = n;
3432 free(offsets);
3433 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3434 if (offsets == NULL)
3435 {
3436 printf("** Failed to get %d bytes of memory for offsets vector\n",
3437 (int)(size_offsets_max * sizeof(int)));
3438 yield = 1;
3439 goto EXIT;
3440 }
3441 }
3442 use_size_offsets = n;
3443 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3444 continue;
3445
3446 case 'P':
3447 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3448 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3449 continue;
3450
3451 case 'Q':
3452 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3453 if (extra == NULL)
3454 {
3455 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3456 extra->flags = 0;
3457 }
3458 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3459 extra->match_limit_recursion = n;
3460 continue;
3461
3462 case 'q':
3463 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3464 if (extra == NULL)
3465 {
3466 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3467 extra->flags = 0;
3468 }
3469 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3470 extra->match_limit = n;
3471 continue;
3472
3473 #if !defined NODFA
3474 case 'R':
3475 options |= PCRE_DFA_RESTART;
3476 continue;
3477 #endif
3478
3479 case 'S':
3480 show_malloc = 1;
3481 continue;
3482
3483 case 'Y':
3484 options |= PCRE_NO_START_OPTIMIZE;
3485 continue;
3486
3487 case 'Z':
3488 options |= PCRE_NOTEOL;
3489 continue;
3490
3491 case '?':
3492 options |= PCRE_NO_UTF8_CHECK;
3493 continue;
3494
3495 case '<':
3496 {
3497 int x = check_newline(p, outfile);
3498 if (x == 0) goto NEXT_DATA;
3499 options |= x;
3500 while (*p++ != '>');
3501 }
3502 continue;
3503 }
3504 *q++ = c;
3505 }
3506 *q = 0;
3507 len = (int)(q - dbuffer);
3508
3509 /* Move the data to the end of the buffer so that a read over the end of
3510 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3511 we are using the POSIX interface, we must include the terminating zero. */
3512
3513 #if !defined NOPOSIX
3514 if (posix || do_posix)
3515 {
3516 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3517 bptr += buffer_size - len - 1;
3518 }
3519 else
3520 #endif
3521 {
3522 memmove(bptr + buffer_size - len, bptr, len);
3523 bptr += buffer_size - len;
3524 }
3525
3526 if ((all_use_dfa || use_dfa) && find_match_limit)
3527 {
3528 printf("**Match limit not relevant for DFA matching: ignored\n");
3529 find_match_limit = 0;
3530 }
3531
3532 /* Handle matching via the POSIX interface, which does not
3533 support timing or playing with the match limit or callout data. */
3534
3535 #if !defined NOPOSIX
3536 if (posix || do_posix)
3537 {
3538 int rc;
3539 int eflags = 0;
3540 regmatch_t *pmatch = NULL;
3541 if (use_size_offsets > 0)
3542 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3543 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3544 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3545 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3546
3547 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3548
3549 if (rc != 0)
3550 {
3551 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3552 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3553 }
3554 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3555 != 0)
3556 {
3557 fprintf(outfile, "Matched with REG_NOSUB\n");
3558 }
3559 else
3560 {
3561 size_t i;
3562 for (i = 0; i < (size_t)use_size_offsets; i++)
3563 {
3564 if (pmatch[i].rm_so >= 0)
3565 {
3566 fprintf(outfile, "%2d: ", (int)i);
3567 PCHARSV(dbuffer, pmatch[i].rm_so,
3568 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3569 fprintf(outfile, "\n");
3570 if (do_showcaprest || (i == 0 && do_showrest))
3571 {
3572 fprintf(outfile, "%2d+ ", (int)i);
3573 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3574 outfile);
3575 fprintf(outfile, "\n");
3576 }
3577 }
3578 }
3579 }
3580 free(pmatch);
3581 goto NEXT_DATA;
3582 }
3583
3584 #endif /* !defined NOPOSIX */
3585
3586 /* Handle matching via the native interface - repeats for /g and /G */
3587
3588 #ifdef SUPPORT_PCRE16
3589 if (use_pcre16)
3590 {
3591 len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3592 if (len < 0)
3593 {
3594 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3595 "converted to UTF-16\n");
3596 goto NEXT_DATA;
3597 }
3598 bptr = (pcre_uint8 *)buffer16;
3599 }
3600 #endif
3601
3602 for (;; gmatched++) /* Loop for /g or /G */
3603 {
3604 markptr = NULL;
3605
3606 if (timeitm > 0)
3607 {
3608 register int i;
3609 clock_t time_taken;
3610 clock_t start_time = clock();
3611
3612 #if !defined NODFA
3613 if (all_use_dfa || use_dfa)
3614 {
3615 int workspace[1000];
3616 for (i = 0; i < timeitm; i++)
3617 {
3618 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3619 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3620 (sizeof(workspace)/sizeof(int)));
3621 }
3622 }
3623 else
3624 #endif
3625
3626 for (i = 0; i < timeitm; i++)
3627 {
3628 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3629 (options | g_notempty), use_offsets, use_size_offsets);
3630 }
3631 time_taken = clock() - start_time;
3632 fprintf(outfile, "Execute time %.4f milliseconds\n",
3633 (((double)time_taken * 1000.0) / (double)timeitm) /
3634 (double)CLOCKS_PER_SEC);
3635 }
3636
3637 /* If find_match_limit is set, we want to do repeated matches with
3638 varying limits in order to find the minimum value for the match limit and
3639 for the recursion limit. The match limits are relevant only to the normal
3640 running of pcre_exec(), so disable the JIT optimization. This makes it
3641 possible to run the same set of tests with and without JIT externally
3642 requested. */
3643
3644 if (find_match_limit)
3645 {
3646 if (extra == NULL)
3647 {
3648 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3649 extra->flags = 0;
3650 }
3651 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3652
3653 (void)check_match_limit(re, extra, bptr, len, start_offset,
3654 options|g_notempty, use_offsets, use_size_offsets,
3655 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3656 PCRE_ERROR_MATCHLIMIT, "match()");
3657
3658 count = check_match_limit(re, extra, bptr, len, start_offset,
3659 options|g_notempty, use_offsets, use_size_offsets,
3660 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3661 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3662 }
3663
3664 /* If callout_data is set, use the interface with additional data */
3665
3666 else if (callout_data_set)
3667 {
3668 if (extra == NULL)
3669 {
3670 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3671 extra->flags = 0;
3672 }
3673 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3674 extra->callout_data = &callout_data;
3675 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3676 options | g_notempty, use_offsets, use_size_offsets);
3677 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3678 }
3679
3680 /* The normal case is just to do the match once, with the default
3681 value of match_limit. */
3682
3683 #if !defined NODFA
3684 else if (all_use_dfa || use_dfa)
3685 {
3686 int workspace[1000];
3687 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3688 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3689 (sizeof(workspace)/sizeof(int)));
3690 if (count == 0)
3691 {
3692 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3693 count = use_size_offsets/2;
3694 }
3695 }
3696 #endif
3697
3698 else
3699 {
3700 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3701 options | g_notempty, use_offsets, use_size_offsets);
3702 if (count == 0)
3703 {
3704 fprintf(outfile, "Matched, but too many substrings\n");
3705 count = use_size_offsets/3;
3706 }
3707 }
3708
3709 /* Matched */
3710
3711 if (count >= 0)
3712 {
3713 int i, maxcount;
3714 void *cnptr, *gnptr;
3715
3716 #if !defined NODFA
3717 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3718 #endif
3719 maxcount = use_size_offsets/3;
3720
3721 /* This is a check against a lunatic return value. */
3722
3723 if (count > maxcount)
3724 {
3725 fprintf(outfile,
3726 "** PCRE error: returned count %d is too big for offset size %d\n",
3727 count, use_size_offsets);
3728 count = use_size_offsets/3;
3729 if (do_g || do_G)
3730 {
3731 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3732 do_g = do_G = FALSE; /* Break g/G loop */
3733 }
3734 }
3735
3736 /* do_allcaps requests showing of all captures in the pattern, to check
3737 unset ones at the end. */
3738
3739 if (do_allcaps)
3740 {
3741 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3742 count++; /* Allow for full match */
3743 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3744 }
3745
3746 /* Output the captured substrings */
3747
3748 for (i = 0; i < count * 2; i += 2)
3749 {
3750 if (use_offsets[i] < 0)
3751 {
3752 if (use_offsets[i] != -1)
3753 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3754 use_offsets[i], i);
3755 if (use_offsets[i+1] != -1)
3756 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3757 use_offsets[i+1], i+1);
3758 fprintf(outfile, "%2d: <unset>\n", i/2);
3759 }
3760 else
3761 {
3762 fprintf(outfile, "%2d: ", i/2);
3763 PCHARSV(bptr, use_offsets[i],
3764 use_offsets[i+1] - use_offsets[i], outfile);
3765 fprintf(outfile, "\n");
3766 if (do_showcaprest || (i == 0 && do_showrest))
3767 {
3768 fprintf(outfile, "%2d+ ", i/2);
3769 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
3770 outfile);
3771 fprintf(outfile, "\n");
3772 }
3773 }
3774 }
3775
3776 if (markptr != NULL)
3777 {
3778 fprintf(outfile, "MK: ");
3779 PCHARSV(markptr, 0, -1, outfile);
3780 fprintf(outfile, "\n");
3781 }
3782
3783 for (i = 0; i < 32; i++)
3784 {
3785 if ((copystrings & (1 << i)) != 0)
3786 {
3787 int rc;
3788 char copybuffer[256];
3789 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
3790 copybuffer, sizeof(copybuffer));
3791 if (rc < 0)
3792 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3793 else
3794 {
3795 fprintf(outfile, "%2dC ", i);
3796 PCHARSV(copybuffer, 0, rc, outfile);
3797 fprintf(outfile, " (%d)\n", rc);
3798 }
3799 }
3800 }
3801
3802 cnptr = copynames;
3803 for (;;)
3804 {
3805 int rc;
3806 char copybuffer[256];
3807
3808 if (use_pcre16)
3809 {
3810 if (*(pcre_uint16 *)cnptr == 0) break;
3811 }
3812 else
3813 {
3814 if (*(pcre_uint8 *)cnptr == 0) break;
3815 }
3816
3817 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
3818 cnptr, copybuffer, sizeof(copybuffer));
3819
3820 if (rc < 0)
3821 {
3822 fprintf(outfile, "copy substring ");
3823 PCHARSV(cnptr, 0, -1, outfile);
3824 fprintf(outfile, " failed %d\n", rc);
3825 }
3826 else
3827 {
3828 fprintf(outfile, " C ");
3829 PCHARSV(copybuffer, 0, rc, outfile);
3830 fprintf(outfile, " (%d) ", rc);
3831 PCHARSV(cnptr, 0, -1, outfile);
3832 putc('\n', outfile);
3833 }
3834
3835 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
3836 }
3837
3838 for (i = 0; i < 32; i++)
3839 {
3840 if ((getstrings & (1 << i)) != 0)
3841 {
3842 int rc;
3843 const char *substring;
3844 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
3845 if (rc < 0)
3846 fprintf(outfile, "get substring %d failed %d\n", i, rc);
3847 else
3848 {
3849 fprintf(outfile, "%2dG ", i);
3850 PCHARSV(substring, 0, rc, outfile);
3851 fprintf(outfile, " (%d)\n", rc);
3852 PCRE_FREE_SUBSTRING(substring);
3853 }
3854 }
3855 }
3856
3857 gnptr = getnames;
3858 for (;;)
3859 {
3860 int rc;
3861 const char *substring;
3862
3863 if (use_pcre16)
3864 {
3865 if (*(pcre_uint16 *)gnptr == 0) break;
3866 }
3867 else
3868 {
3869 if (*(pcre_uint8 *)gnptr == 0) break;
3870 }
3871
3872 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
3873 gnptr, &substring);
3874 if (rc < 0)
3875 {
3876 fprintf(outfile, "get substring ");
3877 PCHARSV(gnptr, 0, -1, outfile);
3878 fprintf(outfile, " failed %d\n", rc);
3879 }
3880 else
3881 {
3882 fprintf(outfile, " G ");
3883 PCHARSV(substring, 0, rc, outfile);
3884 fprintf(outfile, " (%d) ", rc);
3885 PCHARSV(gnptr, 0, -1, outfile);
3886 PCRE_FREE_SUBSTRING(substring);
3887 putc('\n', outfile);
3888 }
3889
3890 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
3891 }
3892
3893 if (getlist)
3894 {
3895 int rc;
3896 const char **stringlist;
3897 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
3898 if (rc < 0)
3899 fprintf(outfile, "get substring list failed %d\n", rc);
3900 else
3901 {
3902 for (i = 0; i < count; i++)
3903 {
3904 fprintf(outfile, "%2dL ", i);
3905 PCHARSV(stringlist[i], 0, -1, outfile);
3906 putc('\n', outfile);
3907 }
3908 if (stringlist[i] != NULL)
3909 fprintf(outfile, "string list not terminated by NULL\n");
3910 PCRE_FREE_SUBSTRING_LIST(stringlist);
3911 }
3912 }
3913 }
3914
3915 /* There was a partial match */
3916
3917 else if (count == PCRE_ERROR_PARTIAL)
3918 {
3919 if (markptr == NULL) fprintf(outfile, "Partial match");
3920 else
3921 {
3922 fprintf(outfile, "Partial match, mark=");
3923 PCHARSV(markptr, 0, -1, outfile);
3924 }
3925 if (use_size_offsets > 1)
3926 {
3927 fprintf(outfile, ": ");
3928 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
3929 outfile);
3930 }
3931 fprintf(outfile, "\n");
3932 break; /* Out of the /g loop */
3933 }
3934
3935 /* Failed to match. If this is a /g or /G loop and we previously set
3936 g_notempty after a null match, this is not necessarily the end. We want
3937 to advance the start offset, and continue. We won't be at the end of the
3938 string - that was checked before setting g_notempty.
3939
3940 Complication arises in the case when the newline convention is "any",
3941 "crlf", or "anycrlf". If the previous match was at the end of a line
3942 terminated by CRLF, an advance of one character just passes the \r,
3943 whereas we should prefer the longer newline sequence, as does the code in
3944 pcre_exec(). Fudge the offset value to achieve this. We check for a
3945 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
3946 find the default.
3947
3948 Otherwise, in the case of UTF-8 matching, the advance must be one
3949 character, not one byte. */
3950
3951 else
3952 {
3953 if (g_notempty != 0)
3954 {
3955 int onechar = 1;
3956 unsigned int obits = ((real_pcre *)re)->options;
3957 use_offsets[0] = start_offset;
3958 if ((obits & PCRE_NEWLINE_BITS) == 0)
3959 {
3960 int d;
3961 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
3962 /* Note that these values are always the ASCII ones, even in
3963 EBCDIC environments. CR = 13, NL = 10. */
3964 obits = (d == 13)? PCRE_NEWLINE_CR :
3965 (d == 10)? PCRE_NEWLINE_LF :
3966 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3967 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3968 (d == -1)? PCRE_NEWLINE_ANY : 0;
3969 }
3970 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3971 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3972 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3973 &&
3974 start_offset < len - 1 &&
3975 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3976 (use_pcre16?
3977 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
3978 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
3979 :
3980 bptr[start_offset] == '\r'
3981 && bptr[start_offset + 1] == '\n')
3982 #else
3983 bptr[start_offset] == '\r' &&
3984 bptr[start_offset + 1] == '\n'
3985 #endif
3986 )
3987 onechar++;
3988 else if (use_utf)
3989 {
3990 while (start_offset + onechar < len)
3991 {
3992 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3993 onechar++;
3994 }
3995 }
3996 use_offsets[1] = start_offset + onechar;
3997 }
3998 else
3999 {
4000 switch(count)
4001 {
4002 case PCRE_ERROR_NOMATCH:
4003 if (gmatched == 0)
4004 {
4005 if (markptr == NULL)
4006 {
4007 fprintf(outfile, "No match\n");
4008 }
4009 else
4010 {
4011 fprintf(outfile, "No match, mark = ");
4012 PCHARSV(markptr, 0, -1, outfile);
4013 putc('\n', outfile);
4014 }
4015 }
4016 break;
4017
4018 case PCRE_ERROR_BADUTF8:
4019 case PCRE_ERROR_SHORTUTF8:
4020 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
4021 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
4022 if (use_size_offsets >= 2)
4023 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4024 use_offsets[1]);
4025 fprintf(outfile, "\n");
4026 break;
4027
4028 default:
4029 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
4030 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4031 else
4032 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4033 break;
4034 }
4035
4036 break; /* Out of the /g loop */
4037 }
4038 }
4039
4040 /* If not /g or /G we are done */
4041
4042 if (!do_g && !do_G) break;
4043
4044 /* If we have matched an empty string, first check to see if we are at
4045 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4046 Perl's /g options does. This turns out to be rather cunning. First we set
4047 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4048 same point. If this fails (picked up above) we advance to the next
4049 character. */
4050
4051 g_notempty = 0;
4052
4053 if (use_offsets[0] == use_offsets[1])
4054 {
4055 if (use_offsets[0] == len) break;
4056 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4057 }
4058
4059 /* For /g, update the start offset, leaving the rest alone */
4060
4061 if (do_g) start_offset = use_offsets[1];
4062
4063 /* For /G, update the pointer and length */
4064
4065 else
4066 {
4067 bptr += use_offsets[1] * CHAR_SIZE;
4068 len -= use_offsets[1];
4069 }
4070 } /* End of loop for /g and /G */
4071
4072 NEXT_DATA: continue;
4073 } /* End of loop for data lines */
4074
4075 CONTINUE:
4076
4077 #if !defined NOPOSIX
4078 if (posix || do_posix) regfree(&preg);
4079 #endif
4080
4081 if (re != NULL) new_free(re);
4082 if (extra != NULL)
4083 {
4084 PCRE_FREE_STUDY(extra);
4085 }
4086 if (locale_set)
4087 {
4088 new_free((void *)tables);
4089 setlocale(LC_CTYPE, "C");
4090 locale_set = 0;
4091 }
4092 if (jit_stack != NULL)
4093 {
4094 PCRE_JIT_STACK_FREE(jit_stack);
4095 jit_stack = NULL;
4096 }
4097 }
4098
4099 if (infile == stdin) fprintf(outfile, "\n");
4100
4101 EXIT:
4102
4103 if (infile != NULL && infile != stdin) fclose(infile);
4104 if (outfile != NULL && outfile != stdout) fclose(outfile);
4105
4106 free(buffer);
4107 free(dbuffer);
4108 free(pbuffer);
4109 free(offsets);
4110
4111 #ifdef SUPPORT_PCRE16
4112 if (buffer16 != NULL) free(buffer16);
4113 #endif
4114
4115 return yield;
4116 }
4117
4118 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5