/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 43 by nigel, Sat Feb 24 21:39:21 2007 UTC revision 1022 by ph10, Tue Aug 28 12:28:15 2012 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10    -----------------------------------------------------------------------------
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are met:
13    
14        * Redistributions of source code must retain the above copyright notice,
15          this list of conditions and the following disclaimer.
16    
17        * Redistributions in binary form must reproduce the above copyright
18          notice, this list of conditions and the following disclaimer in the
19          documentation and/or other materials provided with the distribution.
20    
21        * Neither the name of the University of Cambridge nor the names of its
22          contributors may be used to endorse or promote products derived from
23          this software without specific prior written permission.
24    
25    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35    POSSIBILITY OF SUCH DAMAGE.
36    -----------------------------------------------------------------------------
37    */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    #ifdef HAVE_CONFIG_H
50    #include "config.h"
51    #endif
52    
53  #include <ctype.h>  #include <ctype.h>
54  #include <stdio.h>  #include <stdio.h>
55  #include <string.h>  #include <string.h>
56  #include <stdlib.h>  #include <stdlib.h>
57  #include <time.h>  #include <time.h>
58  #include <locale.h>  #include <locale.h>
59    #include <errno.h>
60    
61    /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67    #ifdef HAVE_UNISTD_H
68    #include <unistd.h>
69    #endif
70    #if defined(SUPPORT_LIBREADLINE)
71    #include <readline/readline.h>
72    #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80    #endif
81    
82    /* A number of things vary for Windows builds. Originally, pcretest opened its
83    input and output without "b"; then I was told that "b" was needed in some
84    environments, so it was added for release 5.0 to both the input and output. (It
85    makes no difference on Unix-like systems.) Later I was told that it is wrong
86    for the input on Windows. I've now abstracted the modes into two macros that
87    are set here, to make it easier to fiddle with them, and removed "b" from the
88    input mode under Windows. */
89    
90    #if defined(_WIN32) || defined(WIN32)
91    #include <io.h>                /* For _setmode() */
92    #include <fcntl.h>             /* For _O_BINARY */
93    #define INPUT_MODE   "r"
94    #define OUTPUT_MODE  "wb"
95    
96    #ifndef isatty
97    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
98    #endif                         /* though in some environments they seem to   */
99                                   /* be already defined, hence the #ifndefs.    */
100    #ifndef fileno
101    #define fileno _fileno
102    #endif
103    
104    /* A user sent this fix for Borland Builder 5 under Windows. */
105    
106    #ifdef __BORLANDC__
107    #define _setmode(handle, mode) setmode(handle, mode)
108    #endif
109    
110    /* Not Windows */
111    
112    #else
113    #include <sys/time.h>          /* These two includes are needed */
114    #include <sys/resource.h>      /* for setrlimit(). */
115    #define INPUT_MODE   "rb"
116    #define OUTPUT_MODE  "wb"
117    #endif
118    
119    #define PRIV(name) name
120    
121    /* We have to include pcre_internal.h because we need the internal info for
122    displaying the results of pcre_study() and we also need to know about the
123    internal macros, structures, and other internal data values; pcretest has
124    "inside information" compared to a program that strictly follows the PCRE API.
125    
126    Although pcre_internal.h does itself include pcre.h, we explicitly include it
127    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
128    appropriately for an application, not for building PCRE. */
129    
130    #include "pcre.h"
131    
132    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
133    /* Configure internal macros to 16 bit mode. */
134    #define COMPILE_PCRE16
135    #endif
136    
137    #include "pcre_internal.h"
138    
139    /* The pcre_printint() function, which prints the internal form of a compiled
140    regex, is held in a separate file so that (a) it can be compiled in either
141    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
142    when that is compiled in debug mode. */
143    
144  /* Use the internal info for displaying the results of pcre_study(). */  #ifdef SUPPORT_PCRE8
145    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
146    #endif
147    #ifdef SUPPORT_PCRE16
148    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
149    #endif
150    
151    /* We need access to some of the data tables that PCRE uses. So as not to have
152    to keep two copies, we include the source file here, changing the names of the
153    external symbols to prevent clashes. */
154    
155    #define PCRE_INCLUDED
156    
157    #include "pcre_tables.c"
158    
159    /* The definition of the macro PRINTABLE, which determines whether to print an
160    output character as-is or as a hex value when showing compiled patterns, is
161    the same as in the printint.src file. We uses it here in cases when the locale
162    has not been explicitly changed, so as to get consistent output from systems
163    that differ in their output from isprint() even in the "C" locale. */
164    
165    #ifdef EBCDIC
166    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
167    #else
168    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
169    #endif
170    
171  #include "internal.h"  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
172    
173    /* Posix support is disabled in 16 bit only mode. */
174    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
175    #define NOPOSIX
176    #endif
177    
178  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
179  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 183  Makefile. */
183  #include "pcreposix.h"  #include "pcreposix.h"
184  #endif  #endif
185    
186    /* It is also possible, originally for the benefit of a version that was
187    imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
188    NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
189    automatically cut out the UTF support if PCRE is built without it. */
190    
191    #ifndef SUPPORT_UTF
192    #ifndef NOUTF
193    #define NOUTF
194    #endif
195    #endif
196    
197    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
198    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
199    only from one place and is handled differently). I couldn't dream up any way of
200    using a single macro to do this in a generic way, because of the many different
201    argument requirements. We know that at least one of SUPPORT_PCRE8 and
202    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
203    use these in the definitions of generic macros.
204    
205    **** Special note about the PCHARSxxx macros: the address of the string to be
206    printed is always given as two arguments: a base address followed by an offset.
207    The base address is cast to the correct data size for 8 or 16 bit data; the
208    offset is in units of this size. If the string were given as base+offset in one
209    argument, the casting might be incorrectly applied. */
210    
211    #ifdef SUPPORT_PCRE8
212    
213    #define PCHARS8(lv, p, offset, len, f) \
214      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
215    
216    #define PCHARSV8(p, offset, len, f) \
217      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
218    
219    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
220      p = read_capture_name8(p, cn8, re)
221    
222    #define STRLEN8(p) ((int)strlen((char *)p))
223    
224    #define SET_PCRE_CALLOUT8(callout) \
225      pcre_callout = callout
226    
227    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
228       pcre_assign_jit_stack(extra, callback, userdata)
229    
230    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
231      re = pcre_compile((char *)pat, options, error, erroffset, tables)
232    
233    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
234        namesptr, cbuffer, size) \
235      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
236        (char *)namesptr, cbuffer, size)
237    
238    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
239      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
240    
241    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
242        offsets, size_offsets, workspace, size_workspace) \
243      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
244        offsets, size_offsets, workspace, size_workspace)
245    
246    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
247        offsets, size_offsets) \
248      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
249        offsets, size_offsets)
250    
251    #define PCRE_FREE_STUDY8(extra) \
252      pcre_free_study(extra)
253    
254    #define PCRE_FREE_SUBSTRING8(substring) \
255      pcre_free_substring(substring)
256    
257    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
258      pcre_free_substring_list(listptr)
259    
260    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
261        getnamesptr, subsptr) \
262      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
263        (char *)getnamesptr, subsptr)
264    
265    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
266      n = pcre_get_stringnumber(re, (char *)ptr)
267    
268    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
269      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
270    
271    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
272      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
273    
274    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
275      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
276    
277    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
278      pcre_printint(re, outfile, debug_lengths)
279    
280    #define PCRE_STUDY8(extra, re, options, error) \
281      extra = pcre_study(re, options, error)
282    
283    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
284      pcre_jit_stack_alloc(startsize, maxsize)
285    
286    #define PCRE_JIT_STACK_FREE8(stack) \
287      pcre_jit_stack_free(stack)
288    
289    #endif /* SUPPORT_PCRE8 */
290    
291    /* -----------------------------------------------------------*/
292    
293    #ifdef SUPPORT_PCRE16
294    
295    #define PCHARS16(lv, p, offset, len, f) \
296      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
297    
298    #define PCHARSV16(p, offset, len, f) \
299      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
300    
301    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
302      p = read_capture_name16(p, cn16, re)
303    
304    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
305    
306    #define SET_PCRE_CALLOUT16(callout) \
307      pcre16_callout = (int (*)(pcre16_callout_block *))callout
308    
309    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
310      pcre16_assign_jit_stack((pcre16_extra *)extra, \
311        (pcre16_jit_callback)callback, userdata)
312    
313    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
314      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
315        tables)
316    
317    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
318        namesptr, cbuffer, size) \
319      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
320        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
321    
322    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
323      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
324        (PCRE_UCHAR16 *)cbuffer, size/2)
325    
326    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
327        offsets, size_offsets, workspace, size_workspace) \
328      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
329        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
330        workspace, size_workspace)
331    
332    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
333        offsets, size_offsets) \
334      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
335        len, start_offset, options, offsets, size_offsets)
336    
337    #define PCRE_FREE_STUDY16(extra) \
338      pcre16_free_study((pcre16_extra *)extra)
339    
340    #define PCRE_FREE_SUBSTRING16(substring) \
341      pcre16_free_substring((PCRE_SPTR16)substring)
342    
343    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
344      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
345    
346    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
347        getnamesptr, subsptr) \
348      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
349        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
350    
351    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
352      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
353    
354    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
355      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
356        (PCRE_SPTR16 *)(void*)subsptr)
357    
358    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
359      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
360        (PCRE_SPTR16 **)(void*)listptr)
361    
362    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
363      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
364        tables)
365    
366    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
367      pcre16_printint(re, outfile, debug_lengths)
368    
369    #define PCRE_STUDY16(extra, re, options, error) \
370      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
371    
372    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
373      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
374    
375    #define PCRE_JIT_STACK_FREE16(stack) \
376      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
377    
378    #endif /* SUPPORT_PCRE16 */
379    
380    
381    /* ----- Both modes are supported; a runtime test is needed, except for
382    pcre_config(), and the JIT stack functions, when it doesn't matter which
383    version is called. ----- */
384    
385    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
386    
387    #define CHAR_SIZE (use_pcre16? 2:1)
388    
389    #define PCHARS(lv, p, offset, len, f) \
390      if (use_pcre16) \
391        PCHARS16(lv, p, offset, len, f); \
392      else \
393        PCHARS8(lv, p, offset, len, f)
394    
395    #define PCHARSV(p, offset, len, f) \
396      if (use_pcre16) \
397        PCHARSV16(p, offset, len, f); \
398      else \
399        PCHARSV8(p, offset, len, f)
400    
401    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
402      if (use_pcre16) \
403        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
404      else \
405        READ_CAPTURE_NAME8(p, cn8, cn16, re)
406    
407    #define SET_PCRE_CALLOUT(callout) \
408      if (use_pcre16) \
409        SET_PCRE_CALLOUT16(callout); \
410      else \
411        SET_PCRE_CALLOUT8(callout)
412    
413    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
414    
415    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
416      if (use_pcre16) \
417        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
418      else \
419        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
420    
421    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
422      if (use_pcre16) \
423        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
424      else \
425        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
426    
427    #define PCRE_CONFIG pcre_config
428    
429    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
430        namesptr, cbuffer, size) \
431      if (use_pcre16) \
432        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
433          namesptr, cbuffer, size); \
434      else \
435        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
436          namesptr, cbuffer, size)
437    
438    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
439      if (use_pcre16) \
440        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
441      else \
442        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
443    
444    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
445        offsets, size_offsets, workspace, size_workspace) \
446      if (use_pcre16) \
447        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
448          offsets, size_offsets, workspace, size_workspace); \
449      else \
450        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
451          offsets, size_offsets, workspace, size_workspace)
452    
453    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
454        offsets, size_offsets) \
455      if (use_pcre16) \
456        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
457          offsets, size_offsets); \
458      else \
459        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
460          offsets, size_offsets)
461    
462    #define PCRE_FREE_STUDY(extra) \
463      if (use_pcre16) \
464        PCRE_FREE_STUDY16(extra); \
465      else \
466        PCRE_FREE_STUDY8(extra)
467    
468    #define PCRE_FREE_SUBSTRING(substring) \
469      if (use_pcre16) \
470        PCRE_FREE_SUBSTRING16(substring); \
471      else \
472        PCRE_FREE_SUBSTRING8(substring)
473    
474    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
475      if (use_pcre16) \
476        PCRE_FREE_SUBSTRING_LIST16(listptr); \
477      else \
478        PCRE_FREE_SUBSTRING_LIST8(listptr)
479    
480    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
481        getnamesptr, subsptr) \
482      if (use_pcre16) \
483        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
484          getnamesptr, subsptr); \
485      else \
486        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
487          getnamesptr, subsptr)
488    
489    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
490      if (use_pcre16) \
491        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
492      else \
493        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
494    
495    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
496      if (use_pcre16) \
497        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
498      else \
499        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
500    
501    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
502      if (use_pcre16) \
503        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
504      else \
505        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
506    
507    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
508      (use_pcre16 ? \
509         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
510        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
511    
512    #define PCRE_JIT_STACK_FREE(stack) \
513      if (use_pcre16) \
514        PCRE_JIT_STACK_FREE16(stack); \
515      else \
516        PCRE_JIT_STACK_FREE8(stack)
517    
518    #define PCRE_MAKETABLES \
519      (use_pcre16? pcre16_maketables() : pcre_maketables())
520    
521    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
522      if (use_pcre16) \
523        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
524      else \
525        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
526    
527    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
528      if (use_pcre16) \
529        PCRE_PRINTINT16(re, outfile, debug_lengths); \
530      else \
531        PCRE_PRINTINT8(re, outfile, debug_lengths)
532    
533    #define PCRE_STUDY(extra, re, options, error) \
534      if (use_pcre16) \
535        PCRE_STUDY16(extra, re, options, error); \
536      else \
537        PCRE_STUDY8(extra, re, options, error)
538    
539    /* ----- Only 8-bit mode is supported ----- */
540    
541    #elif defined SUPPORT_PCRE8
542    #define CHAR_SIZE                 1
543    #define PCHARS                    PCHARS8
544    #define PCHARSV                   PCHARSV8
545    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
546    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
547    #define STRLEN                    STRLEN8
548    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
549    #define PCRE_COMPILE              PCRE_COMPILE8
550    #define PCRE_CONFIG               pcre_config
551    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
552    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
553    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
554    #define PCRE_EXEC                 PCRE_EXEC8
555    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
556    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
557    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
558    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
559    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
560    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
561    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
562    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
563    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
564    #define PCRE_MAKETABLES           pcre_maketables()
565    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
566    #define PCRE_PRINTINT             PCRE_PRINTINT8
567    #define PCRE_STUDY                PCRE_STUDY8
568    
569    /* ----- Only 16-bit mode is supported ----- */
570    
571    #else
572    #define CHAR_SIZE                 2
573    #define PCHARS                    PCHARS16
574    #define PCHARSV                   PCHARSV16
575    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
576    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
577    #define STRLEN                    STRLEN16
578    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
579    #define PCRE_COMPILE              PCRE_COMPILE16
580    #define PCRE_CONFIG               pcre16_config
581    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
582    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
583    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
584    #define PCRE_EXEC                 PCRE_EXEC16
585    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
586    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
587    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
588    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
589    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
590    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
591    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
592    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
593    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
594    #define PCRE_MAKETABLES           pcre16_maketables()
595    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
596    #define PCRE_PRINTINT             PCRE_PRINTINT16
597    #define PCRE_STUDY                PCRE_STUDY16
598    #endif
599    
600    /* ----- End of mode-specific function call macros ----- */
601    
602    
603    /* Other parameters */
604    
605  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
606  #ifdef CLK_TCK  #ifdef CLK_TCK
607  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 610  Makefile. */
610  #endif  #endif
611  #endif  #endif
612    
613  #define LOOPREPEAT 20000  #if !defined NODFA
614    #define DFA_WS_DIMENSION 1000
615    #endif
616    
617    /* This is the default loop count for timing. */
618    
619    #define LOOPREPEAT 500000
620    
621    /* Static variables */
622    
623  static FILE *outfile;  static FILE *outfile;
624  static int log_store = 0;  static int log_store = 0;
625    static int callout_count;
626    static int callout_extra;
627    static int callout_fail_count;
628    static int callout_fail_id;
629    static int debug_lengths;
630    static int first_callout;
631    static int jit_was_used;
632    static int locale_set = 0;
633    static int show_malloc;
634    static int use_utf;
635  static size_t gotten_store;  static size_t gotten_store;
636    static size_t first_gotten_store = 0;
637    static const unsigned char *last_callout_mark = NULL;
638    
639    /* The buffers grow automatically if very long input lines are encountered. */
640    
641    static int buffer_size = 50000;
642    static pcre_uint8 *buffer = NULL;
643    static pcre_uint8 *dbuffer = NULL;
644    static pcre_uint8 *pbuffer = NULL;
645    
646    /* Another buffer is needed translation to 16-bit character strings. It will
647    obtained and extended as required. */
648    
649    #ifdef SUPPORT_PCRE16
650    static int buffer16_size = 0;
651    static pcre_uint16 *buffer16 = NULL;
652    
653  /* Debugging function to print the internal form of the regex. This is the same  #ifdef SUPPORT_PCRE8
 code as contained in pcre.c under the DEBUG macro. */  
654    
655  static const char *OP_names[] = {  /* We need the table of operator lengths that is used for 16-bit compiling, in
656    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  order to swap bytes in a pattern for saving/reloading testing. Luckily, the
657    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
658    "Opt", "^", "$", "Any", "chars", "not",  appropriately for the 16-bit world. Just as a safety check, make sure that
659    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  COMPILE_PCRE16 is *not* set. */
660    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
661    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  #ifdef COMPILE_PCRE16
662    "*", "*?", "+", "+?", "?", "??", "{", "{",  #error COMPILE_PCRE16 must not be set when compiling pcretest.c
663    "class", "Ref", "Recurse",  #endif
664    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
665    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  #if LINK_SIZE == 2
666    "Brazero", "Braminzero", "Bra"  #undef LINK_SIZE
667    #define LINK_SIZE 1
668    #elif LINK_SIZE == 3 || LINK_SIZE == 4
669    #undef LINK_SIZE
670    #define LINK_SIZE 2
671    #else
672    #error LINK_SIZE must be either 2, 3, or 4
673    #endif
674    
675    #undef IMM2_SIZE
676    #define IMM2_SIZE 1
677    
678    #endif /* SUPPORT_PCRE8 */
679    
680    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
681    #endif  /* SUPPORT_PCRE16 */
682    
683    /* If we have 8-bit support, default use_pcre16 to false; if there is also
684    16-bit support, it can be changed by an option. If there is no 8-bit support,
685    there must be 16-bit support, so default it to 1. */
686    
687    #ifdef SUPPORT_PCRE8
688    static int use_pcre16 = 0;
689    #else
690    static int use_pcre16 = 1;
691    #endif
692    
693    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
694    
695    static int jit_study_bits[] =
696      {
697      PCRE_STUDY_JIT_COMPILE,
698      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
699      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
700      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
701      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
702      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
703      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
704        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
705    };
706    
707    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
708      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
709    
710    /* Textual explanations for runtime error codes */
711    
712    static const char *errtexts[] = {
713      NULL,  /* 0 is no error */
714      NULL,  /* NOMATCH is handled specially */
715      "NULL argument passed",
716      "bad option value",
717      "magic number missing",
718      "unknown opcode - pattern overwritten?",
719      "no more memory",
720      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
721      "match limit exceeded",
722      "callout error code",
723      NULL,  /* BADUTF8/16 is handled specially */
724      NULL,  /* BADUTF8/16 offset is handled specially */
725      NULL,  /* PARTIAL is handled specially */
726      "not used - internal error",
727      "internal error - pattern overwritten?",
728      "bad count value",
729      "item unsupported for DFA matching",
730      "backreference condition or recursion test not supported for DFA matching",
731      "match limit not supported for DFA matching",
732      "workspace size exceeded in DFA matching",
733      "too much recursion for DFA matching",
734      "recursion limit exceeded",
735      "not used - internal error",
736      "invalid combination of newline options",
737      "bad offset value",
738      NULL,  /* SHORTUTF8/16 is handled specially */
739      "nested recursion at the same subject position",
740      "JIT stack limit reached",
741      "pattern compiled in wrong mode: 8-bit/16-bit error",
742      "pattern compiled with other endianness",
743      "invalid data in workspace for DFA restart"
744    };
745    
746    
747    /*************************************************
748    *         Alternate character tables             *
749    *************************************************/
750    
751    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
752    using the default tables of the library. However, the T option can be used to
753    select alternate sets of tables, for different kinds of testing. Note also that
754    the L (locale) option also adjusts the tables. */
755    
756    /* This is the set of tables distributed as default with PCRE. It recognizes
757    only ASCII characters. */
758    
759    static const pcre_uint8 tables0[] = {
760    
761    /* This table is a lower casing table. */
762    
763        0,  1,  2,  3,  4,  5,  6,  7,
764        8,  9, 10, 11, 12, 13, 14, 15,
765       16, 17, 18, 19, 20, 21, 22, 23,
766       24, 25, 26, 27, 28, 29, 30, 31,
767       32, 33, 34, 35, 36, 37, 38, 39,
768       40, 41, 42, 43, 44, 45, 46, 47,
769       48, 49, 50, 51, 52, 53, 54, 55,
770       56, 57, 58, 59, 60, 61, 62, 63,
771       64, 97, 98, 99,100,101,102,103,
772      104,105,106,107,108,109,110,111,
773      112,113,114,115,116,117,118,119,
774      120,121,122, 91, 92, 93, 94, 95,
775       96, 97, 98, 99,100,101,102,103,
776      104,105,106,107,108,109,110,111,
777      112,113,114,115,116,117,118,119,
778      120,121,122,123,124,125,126,127,
779      128,129,130,131,132,133,134,135,
780      136,137,138,139,140,141,142,143,
781      144,145,146,147,148,149,150,151,
782      152,153,154,155,156,157,158,159,
783      160,161,162,163,164,165,166,167,
784      168,169,170,171,172,173,174,175,
785      176,177,178,179,180,181,182,183,
786      184,185,186,187,188,189,190,191,
787      192,193,194,195,196,197,198,199,
788      200,201,202,203,204,205,206,207,
789      208,209,210,211,212,213,214,215,
790      216,217,218,219,220,221,222,223,
791      224,225,226,227,228,229,230,231,
792      232,233,234,235,236,237,238,239,
793      240,241,242,243,244,245,246,247,
794      248,249,250,251,252,253,254,255,
795    
796    /* This table is a case flipping table. */
797    
798        0,  1,  2,  3,  4,  5,  6,  7,
799        8,  9, 10, 11, 12, 13, 14, 15,
800       16, 17, 18, 19, 20, 21, 22, 23,
801       24, 25, 26, 27, 28, 29, 30, 31,
802       32, 33, 34, 35, 36, 37, 38, 39,
803       40, 41, 42, 43, 44, 45, 46, 47,
804       48, 49, 50, 51, 52, 53, 54, 55,
805       56, 57, 58, 59, 60, 61, 62, 63,
806       64, 97, 98, 99,100,101,102,103,
807      104,105,106,107,108,109,110,111,
808      112,113,114,115,116,117,118,119,
809      120,121,122, 91, 92, 93, 94, 95,
810       96, 65, 66, 67, 68, 69, 70, 71,
811       72, 73, 74, 75, 76, 77, 78, 79,
812       80, 81, 82, 83, 84, 85, 86, 87,
813       88, 89, 90,123,124,125,126,127,
814      128,129,130,131,132,133,134,135,
815      136,137,138,139,140,141,142,143,
816      144,145,146,147,148,149,150,151,
817      152,153,154,155,156,157,158,159,
818      160,161,162,163,164,165,166,167,
819      168,169,170,171,172,173,174,175,
820      176,177,178,179,180,181,182,183,
821      184,185,186,187,188,189,190,191,
822      192,193,194,195,196,197,198,199,
823      200,201,202,203,204,205,206,207,
824      208,209,210,211,212,213,214,215,
825      216,217,218,219,220,221,222,223,
826      224,225,226,227,228,229,230,231,
827      232,233,234,235,236,237,238,239,
828      240,241,242,243,244,245,246,247,
829      248,249,250,251,252,253,254,255,
830    
831    /* This table contains bit maps for various character classes. Each map is 32
832    bytes long and the bits run from the least significant end of each byte. The
833    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
834    graph, print, punct, and cntrl. Other classes are built from combinations. */
835    
836      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
837      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
838      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
839      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840    
841      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
842      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
843      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845    
846      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
847      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850    
851      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
852      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
853      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
855    
856      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
858      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
859      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
860    
861      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
862      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
863      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
864      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
865    
866      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
867      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
868      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
869      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
870    
871      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
872      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
873      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
874      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
875    
876      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
877      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
878      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
879      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
880    
881      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
882      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
883      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
884      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
885    
886    /* This table identifies various classes of character by individual bits:
887      0x01   white space character
888      0x02   letter
889      0x04   decimal digit
890      0x08   hexadecimal digit
891      0x10   alphanumeric or '_'
892      0x80   regular expression metacharacter or binary zero
893    */
894    
895      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
896      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
897      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
898      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
899      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
900      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
901      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
902      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
903      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
904      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
905      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
906      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
907      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
908      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
909      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
910      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
911      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
912      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
913      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
914      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
915      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
916      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
917      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
918      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
919      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
920      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
921      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
922      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
923      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
924      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
925      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
926      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
927    
928    /* This is a set of tables that came orginally from a Windows user. It seems to
929    be at least an approximation of ISO 8859. In particular, there are characters
930    greater than 128 that are marked as spaces, letters, etc. */
931    
932    static const pcre_uint8 tables1[] = {
933    0,1,2,3,4,5,6,7,
934    8,9,10,11,12,13,14,15,
935    16,17,18,19,20,21,22,23,
936    24,25,26,27,28,29,30,31,
937    32,33,34,35,36,37,38,39,
938    40,41,42,43,44,45,46,47,
939    48,49,50,51,52,53,54,55,
940    56,57,58,59,60,61,62,63,
941    64,97,98,99,100,101,102,103,
942    104,105,106,107,108,109,110,111,
943    112,113,114,115,116,117,118,119,
944    120,121,122,91,92,93,94,95,
945    96,97,98,99,100,101,102,103,
946    104,105,106,107,108,109,110,111,
947    112,113,114,115,116,117,118,119,
948    120,121,122,123,124,125,126,127,
949    128,129,130,131,132,133,134,135,
950    136,137,138,139,140,141,142,143,
951    144,145,146,147,148,149,150,151,
952    152,153,154,155,156,157,158,159,
953    160,161,162,163,164,165,166,167,
954    168,169,170,171,172,173,174,175,
955    176,177,178,179,180,181,182,183,
956    184,185,186,187,188,189,190,191,
957    224,225,226,227,228,229,230,231,
958    232,233,234,235,236,237,238,239,
959    240,241,242,243,244,245,246,215,
960    248,249,250,251,252,253,254,223,
961    224,225,226,227,228,229,230,231,
962    232,233,234,235,236,237,238,239,
963    240,241,242,243,244,245,246,247,
964    248,249,250,251,252,253,254,255,
965    0,1,2,3,4,5,6,7,
966    8,9,10,11,12,13,14,15,
967    16,17,18,19,20,21,22,23,
968    24,25,26,27,28,29,30,31,
969    32,33,34,35,36,37,38,39,
970    40,41,42,43,44,45,46,47,
971    48,49,50,51,52,53,54,55,
972    56,57,58,59,60,61,62,63,
973    64,97,98,99,100,101,102,103,
974    104,105,106,107,108,109,110,111,
975    112,113,114,115,116,117,118,119,
976    120,121,122,91,92,93,94,95,
977    96,65,66,67,68,69,70,71,
978    72,73,74,75,76,77,78,79,
979    80,81,82,83,84,85,86,87,
980    88,89,90,123,124,125,126,127,
981    128,129,130,131,132,133,134,135,
982    136,137,138,139,140,141,142,143,
983    144,145,146,147,148,149,150,151,
984    152,153,154,155,156,157,158,159,
985    160,161,162,163,164,165,166,167,
986    168,169,170,171,172,173,174,175,
987    176,177,178,179,180,181,182,183,
988    184,185,186,187,188,189,190,191,
989    224,225,226,227,228,229,230,231,
990    232,233,234,235,236,237,238,239,
991    240,241,242,243,244,245,246,215,
992    248,249,250,251,252,253,254,223,
993    192,193,194,195,196,197,198,199,
994    200,201,202,203,204,205,206,207,
995    208,209,210,211,212,213,214,247,
996    216,217,218,219,220,221,222,255,
997    0,62,0,0,1,0,0,0,
998    0,0,0,0,0,0,0,0,
999    32,0,0,0,1,0,0,0,
1000    0,0,0,0,0,0,0,0,
1001    0,0,0,0,0,0,255,3,
1002    126,0,0,0,126,0,0,0,
1003    0,0,0,0,0,0,0,0,
1004    0,0,0,0,0,0,0,0,
1005    0,0,0,0,0,0,255,3,
1006    0,0,0,0,0,0,0,0,
1007    0,0,0,0,0,0,12,2,
1008    0,0,0,0,0,0,0,0,
1009    0,0,0,0,0,0,0,0,
1010    254,255,255,7,0,0,0,0,
1011    0,0,0,0,0,0,0,0,
1012    255,255,127,127,0,0,0,0,
1013    0,0,0,0,0,0,0,0,
1014    0,0,0,0,254,255,255,7,
1015    0,0,0,0,0,4,32,4,
1016    0,0,0,128,255,255,127,255,
1017    0,0,0,0,0,0,255,3,
1018    254,255,255,135,254,255,255,7,
1019    0,0,0,0,0,4,44,6,
1020    255,255,127,255,255,255,127,255,
1021    0,0,0,0,254,255,255,255,
1022    255,255,255,255,255,255,255,127,
1023    0,0,0,0,254,255,255,255,
1024    255,255,255,255,255,255,255,255,
1025    0,2,0,0,255,255,255,255,
1026    255,255,255,255,255,255,255,127,
1027    0,0,0,0,255,255,255,255,
1028    255,255,255,255,255,255,255,255,
1029    0,0,0,0,254,255,0,252,
1030    1,0,0,248,1,0,0,120,
1031    0,0,0,0,254,255,255,255,
1032    0,0,128,0,0,0,128,0,
1033    255,255,255,255,0,0,0,0,
1034    0,0,0,0,0,0,0,128,
1035    255,255,255,255,0,0,0,0,
1036    0,0,0,0,0,0,0,0,
1037    128,0,0,0,0,0,0,0,
1038    0,1,1,0,1,1,0,0,
1039    0,0,0,0,0,0,0,0,
1040    0,0,0,0,0,0,0,0,
1041    1,0,0,0,128,0,0,0,
1042    128,128,128,128,0,0,128,0,
1043    28,28,28,28,28,28,28,28,
1044    28,28,0,0,0,0,0,128,
1045    0,26,26,26,26,26,26,18,
1046    18,18,18,18,18,18,18,18,
1047    18,18,18,18,18,18,18,18,
1048    18,18,18,128,128,0,128,16,
1049    0,26,26,26,26,26,26,18,
1050    18,18,18,18,18,18,18,18,
1051    18,18,18,18,18,18,18,18,
1052    18,18,18,128,128,0,0,0,
1053    0,0,0,0,0,1,0,0,
1054    0,0,0,0,0,0,0,0,
1055    0,0,0,0,0,0,0,0,
1056    0,0,0,0,0,0,0,0,
1057    1,0,0,0,0,0,0,0,
1058    0,0,18,0,0,0,0,0,
1059    0,0,20,20,0,18,0,0,
1060    0,20,18,0,0,0,0,0,
1061    18,18,18,18,18,18,18,18,
1062    18,18,18,18,18,18,18,18,
1063    18,18,18,18,18,18,18,0,
1064    18,18,18,18,18,18,18,18,
1065    18,18,18,18,18,18,18,18,
1066    18,18,18,18,18,18,18,18,
1067    18,18,18,18,18,18,18,0,
1068    18,18,18,18,18,18,18,18
1069  };  };
1070    
1071    
1072  static void print_internals(pcre *re)  
1073    
1074    #ifndef HAVE_STRERROR
1075    /*************************************************
1076    *     Provide strerror() for non-ANSI libraries  *
1077    *************************************************/
1078    
1079    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1080    in their libraries, but can provide the same facility by this simple
1081    alternative function. */
1082    
1083    extern int   sys_nerr;
1084    extern char *sys_errlist[];
1085    
1086    char *
1087    strerror(int n)
1088    {
1089    if (n < 0 || n >= sys_nerr) return "unknown error number";
1090    return sys_errlist[n];
1091    }
1092    #endif /* HAVE_STRERROR */
1093    
1094    
1095    /*************************************************
1096    *         JIT memory callback                    *
1097    *************************************************/
1098    
1099    static pcre_jit_stack* jit_callback(void *arg)
1100    {
1101    jit_was_used = TRUE;
1102    return (pcre_jit_stack *)arg;
1103    }
1104    
1105    
1106    #if !defined NOUTF || defined SUPPORT_PCRE16
1107    /*************************************************
1108    *            Convert UTF-8 string to value       *
1109    *************************************************/
1110    
1111    /* This function takes one or more bytes that represents a UTF-8 character,
1112    and returns the value of the character.
1113    
1114    Argument:
1115      utf8bytes   a pointer to the byte vector
1116      vptr        a pointer to an int to receive the value
1117    
1118    Returns:      >  0 => the number of bytes consumed
1119                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1120    */
1121    
1122    static int
1123    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1124    {
1125    int c = *utf8bytes++;
1126    int d = c;
1127    int i, j, s;
1128    
1129    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1130      {
1131      if ((d & 0x80) == 0) break;
1132      d <<= 1;
1133      }
1134    
1135    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1136    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1137    
1138    /* i now has a value in the range 1-5 */
1139    
1140    s = 6*i;
1141    d = (c & utf8_table3[i]) << s;
1142    
1143    for (j = 0; j < i; j++)
1144      {
1145      c = *utf8bytes++;
1146      if ((c & 0xc0) != 0x80) return -(j+1);
1147      s -= 6;
1148      d |= (c & 0x3f) << s;
1149      }
1150    
1151    /* Check that encoding was the correct unique one */
1152    
1153    for (j = 0; j < utf8_table1_size; j++)
1154      if (d <= utf8_table1[j]) break;
1155    if (j != i) return -(i+1);
1156    
1157    /* Valid value */
1158    
1159    *vptr = d;
1160    return i+1;
1161    }
1162    #endif /* NOUTF || SUPPORT_PCRE16 */
1163    
1164    
1165    
1166    #if !defined NOUTF || defined SUPPORT_PCRE16
1167    /*************************************************
1168    *       Convert character value to UTF-8         *
1169    *************************************************/
1170    
1171    /* This function takes an integer value in the range 0 - 0x7fffffff
1172    and encodes it as a UTF-8 character in 0 to 6 bytes.
1173    
1174    Arguments:
1175      cvalue     the character value
1176      utf8bytes  pointer to buffer for result - at least 6 bytes long
1177    
1178    Returns:     number of characters placed in the buffer
1179    */
1180    
1181    static int
1182    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1183    {
1184    register int i, j;
1185    for (i = 0; i < utf8_table1_size; i++)
1186      if (cvalue <= utf8_table1[i]) break;
1187    utf8bytes += i;
1188    for (j = i; j > 0; j--)
1189     {
1190     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1191     cvalue >>= 6;
1192     }
1193    *utf8bytes = utf8_table2[i] | cvalue;
1194    return i + 1;
1195    }
1196    #endif
1197    
1198    
1199    #ifdef SUPPORT_PCRE16
1200    /*************************************************
1201    *         Convert a string to 16-bit             *
1202    *************************************************/
1203    
1204    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1205    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1206    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1207    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1208    result is always left in buffer16.
1209    
1210    Note that this function does not object to surrogate values. This is
1211    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1212    for the purpose of testing that they are correctly faulted.
1213    
1214    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1215    in UTF-8 so that values greater than 255 can be handled.
1216    
1217    Arguments:
1218      data       TRUE if converting a data line; FALSE for a regex
1219      p          points to a byte string
1220      utf        true if UTF-8 (to be converted to UTF-16)
1221      len        number of bytes in the string (excluding trailing zero)
1222    
1223    Returns:     number of 16-bit data items used (excluding trailing zero)
1224                 OR -1 if a UTF-8 string is malformed
1225                 OR -2 if a value > 0x10ffff is encountered
1226                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1227    */
1228    
1229    static int
1230    to16(int data, pcre_uint8 *p, int utf, int len)
1231    {
1232    pcre_uint16 *pp;
1233    
1234    if (buffer16_size < 2*len + 2)
1235      {
1236      if (buffer16 != NULL) free(buffer16);
1237      buffer16_size = 2*len + 2;
1238      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1239      if (buffer16 == NULL)
1240        {
1241        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1242        exit(1);
1243        }
1244      }
1245    
1246    pp = buffer16;
1247    
1248    if (!utf && !data)
1249      {
1250      while (len-- > 0) *pp++ = *p++;
1251      }
1252    
1253    else
1254      {
1255      int c = 0;
1256      while (len > 0)
1257        {
1258        int chlen = utf82ord(p, &c);
1259        if (chlen <= 0) return -1;
1260        if (c > 0x10ffff) return -2;
1261        p += chlen;
1262        len -= chlen;
1263        if (c < 0x10000) *pp++ = c; else
1264          {
1265          if (!utf) return -3;
1266          c -= 0x10000;
1267          *pp++ = 0xD800 | (c >> 10);
1268          *pp++ = 0xDC00 | (c & 0x3ff);
1269          }
1270        }
1271      }
1272    
1273    *pp = 0;
1274    return pp - buffer16;
1275    }
1276    #endif
1277    
1278    
1279    /*************************************************
1280    *        Read or extend an input line            *
1281    *************************************************/
1282    
1283    /* Input lines are read into buffer, but both patterns and data lines can be
1284    continued over multiple input lines. In addition, if the buffer fills up, we
1285    want to automatically expand it so as to be able to handle extremely large
1286    lines that are needed for certain stress tests. When the input buffer is
1287    expanded, the other two buffers must also be expanded likewise, and the
1288    contents of pbuffer, which are a copy of the input for callouts, must be
1289    preserved (for when expansion happens for a data line). This is not the most
1290    optimal way of handling this, but hey, this is just a test program!
1291    
1292    Arguments:
1293      f            the file to read
1294      start        where in buffer to start (this *must* be within buffer)
1295      prompt       for stdin or readline()
1296    
1297    Returns:       pointer to the start of new data
1298                   could be a copy of start, or could be moved
1299                   NULL if no data read and EOF reached
1300    */
1301    
1302    static pcre_uint8 *
1303    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1304    {
1305    pcre_uint8 *here = start;
1306    
1307    for (;;)
1308      {
1309      size_t rlen = (size_t)(buffer_size - (here - buffer));
1310    
1311      if (rlen > 1000)
1312        {
1313        int dlen;
1314    
1315        /* If libreadline or libedit support is required, use readline() to read a
1316        line if the input is a terminal. Note that readline() removes the trailing
1317        newline, so we must put it back again, to be compatible with fgets(). */
1318    
1319    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1320        if (isatty(fileno(f)))
1321          {
1322          size_t len;
1323          char *s = readline(prompt);
1324          if (s == NULL) return (here == start)? NULL : start;
1325          len = strlen(s);
1326          if (len > 0) add_history(s);
1327          if (len > rlen - 1) len = rlen - 1;
1328          memcpy(here, s, len);
1329          here[len] = '\n';
1330          here[len+1] = 0;
1331          free(s);
1332          }
1333        else
1334    #endif
1335    
1336        /* Read the next line by normal means, prompting if the file is stdin. */
1337    
1338          {
1339          if (f == stdin) printf("%s", prompt);
1340          if (fgets((char *)here, rlen,  f) == NULL)
1341            return (here == start)? NULL : start;
1342          }
1343    
1344        dlen = (int)strlen((char *)here);
1345        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1346        here += dlen;
1347        }
1348    
1349      else
1350        {
1351        int new_buffer_size = 2*buffer_size;
1352        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1353        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1354        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1355    
1356        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1357          {
1358          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1359          exit(1);
1360          }
1361    
1362        memcpy(new_buffer, buffer, buffer_size);
1363        memcpy(new_pbuffer, pbuffer, buffer_size);
1364    
1365        buffer_size = new_buffer_size;
1366    
1367        start = new_buffer + (start - buffer);
1368        here = new_buffer + (here - buffer);
1369    
1370        free(buffer);
1371        free(dbuffer);
1372        free(pbuffer);
1373    
1374        buffer = new_buffer;
1375        dbuffer = new_dbuffer;
1376        pbuffer = new_pbuffer;
1377        }
1378      }
1379    
1380    return NULL;  /* Control never gets here */
1381    }
1382    
1383    
1384    
1385    /*************************************************
1386    *          Read number from string               *
1387    *************************************************/
1388    
1389    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1390    around with conditional compilation, just do the job by hand. It is only used
1391    for unpicking arguments, so just keep it simple.
1392    
1393    Arguments:
1394      str           string to be converted
1395      endptr        where to put the end pointer
1396    
1397    Returns:        the unsigned long
1398    */
1399    
1400    static int
1401    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1402    {
1403    int result = 0;
1404    while(*str != 0 && isspace(*str)) str++;
1405    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1406    *endptr = str;
1407    return(result);
1408    }
1409    
1410    
1411    
1412    /*************************************************
1413    *             Print one character                *
1414    *************************************************/
1415    
1416    /* Print a single character either literally, or as a hex escape. */
1417    
1418    static int pchar(int c, FILE *f)
1419    {
1420    if (PRINTOK(c))
1421      {
1422      if (f != NULL) fprintf(f, "%c", c);
1423      return 1;
1424      }
1425    
1426    if (c < 0x100)
1427      {
1428      if (use_utf)
1429        {
1430        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1431        return 6;
1432        }
1433      else
1434        {
1435        if (f != NULL) fprintf(f, "\\x%02x", c);
1436        return 4;
1437        }
1438      }
1439    
1440    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1441    return (c <= 0x000000ff)? 6 :
1442           (c <= 0x00000fff)? 7 :
1443           (c <= 0x0000ffff)? 8 :
1444           (c <= 0x000fffff)? 9 : 10;
1445    }
1446    
1447    
1448    
1449    #ifdef SUPPORT_PCRE8
1450    /*************************************************
1451    *         Print 8-bit character string           *
1452    *************************************************/
1453    
1454    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1455    If handed a NULL file, just counts chars without printing. */
1456    
1457    static int pchars(pcre_uint8 *p, int length, FILE *f)
1458    {
1459    int c = 0;
1460    int yield = 0;
1461    
1462    if (length < 0)
1463      length = strlen((char *)p);
1464    
1465    while (length-- > 0)
1466      {
1467    #if !defined NOUTF
1468      if (use_utf)
1469        {
1470        int rc = utf82ord(p, &c);
1471        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1472          {
1473          length -= rc - 1;
1474          p += rc;
1475          yield += pchar(c, f);
1476          continue;
1477          }
1478        }
1479    #endif
1480      c = *p++;
1481      yield += pchar(c, f);
1482      }
1483    
1484    return yield;
1485    }
1486    #endif
1487    
1488    
1489    
1490    #ifdef SUPPORT_PCRE16
1491    /*************************************************
1492    *    Find length of 0-terminated 16-bit string   *
1493    *************************************************/
1494    
1495    static int strlen16(PCRE_SPTR16 p)
1496    {
1497    int len = 0;
1498    while (*p++ != 0) len++;
1499    return len;
1500    }
1501    #endif  /* SUPPORT_PCRE16 */
1502    
1503    
1504    #ifdef SUPPORT_PCRE16
1505    /*************************************************
1506    *           Print 16-bit character string        *
1507    *************************************************/
1508    
1509    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1510    If handed a NULL file, just counts chars without printing. */
1511    
1512    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1513    {
1514    int yield = 0;
1515    
1516    if (length < 0)
1517      length = strlen16(p);
1518    
1519    while (length-- > 0)
1520      {
1521      int c = *p++ & 0xffff;
1522    #if !defined NOUTF
1523      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1524        {
1525        int d = *p & 0xffff;
1526        if (d >= 0xDC00 && d < 0xDFFF)
1527          {
1528          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1529          length--;
1530          p++;
1531          }
1532        }
1533    #endif
1534      yield += pchar(c, f);
1535      }
1536    
1537    return yield;
1538    }
1539    #endif  /* SUPPORT_PCRE16 */
1540    
1541    
1542    
1543    #ifdef SUPPORT_PCRE8
1544    /*************************************************
1545    *     Read a capture name (8-bit) and check it   *
1546    *************************************************/
1547    
1548    static pcre_uint8 *
1549    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1550    {
1551    pcre_uint8 *npp = *pp;
1552    while (isalnum(*p)) *npp++ = *p++;
1553    *npp++ = 0;
1554    *npp = 0;
1555    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1556      {
1557      fprintf(outfile, "no parentheses with name \"");
1558      PCHARSV(*pp, 0, -1, outfile);
1559      fprintf(outfile, "\"\n");
1560      }
1561    
1562    *pp = npp;
1563    return p;
1564    }
1565    #endif  /* SUPPORT_PCRE8 */
1566    
1567    
1568    
1569    #ifdef SUPPORT_PCRE16
1570    /*************************************************
1571    *     Read a capture name (16-bit) and check it  *
1572    *************************************************/
1573    
1574    /* Note that the text being read is 8-bit. */
1575    
1576    static pcre_uint8 *
1577    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1578    {
1579    pcre_uint16 *npp = *pp;
1580    while (isalnum(*p)) *npp++ = *p++;
1581    *npp++ = 0;
1582    *npp = 0;
1583    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1584      {
1585      fprintf(outfile, "no parentheses with name \"");
1586      PCHARSV(*pp, 0, -1, outfile);
1587      fprintf(outfile, "\"\n");
1588      }
1589    *pp = npp;
1590    return p;
1591    }
1592    #endif  /* SUPPORT_PCRE16 */
1593    
1594    
1595    
1596    /*************************************************
1597    *              Callout function                  *
1598    *************************************************/
1599    
1600    /* Called from PCRE as a result of the (?C) item. We print out where we are in
1601    the match. Yield zero unless more callouts than the fail count, or the callout
1602    data is not zero. */
1603    
1604    static int callout(pcre_callout_block *cb)
1605    {
1606    FILE *f = (first_callout | callout_extra)? outfile : NULL;
1607    int i, pre_start, post_start, subject_length;
1608    
1609    if (callout_extra)
1610      {
1611      fprintf(f, "Callout %d: last capture = %d\n",
1612        cb->callout_number, cb->capture_last);
1613    
1614      for (i = 0; i < cb->capture_top * 2; i += 2)
1615        {
1616        if (cb->offset_vector[i] < 0)
1617          fprintf(f, "%2d: <unset>\n", i/2);
1618        else
1619          {
1620          fprintf(f, "%2d: ", i/2);
1621          PCHARSV(cb->subject, cb->offset_vector[i],
1622            cb->offset_vector[i+1] - cb->offset_vector[i], f);
1623          fprintf(f, "\n");
1624          }
1625        }
1626      }
1627    
1628    /* Re-print the subject in canonical form, the first time or if giving full
1629    datails. On subsequent calls in the same match, we use pchars just to find the
1630    printed lengths of the substrings. */
1631    
1632    if (f != NULL) fprintf(f, "--->");
1633    
1634    PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1635    PCHARS(post_start, cb->subject, cb->start_match,
1636      cb->current_position - cb->start_match, f);
1637    
1638    PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1639    
1640    PCHARSV(cb->subject, cb->current_position,
1641      cb->subject_length - cb->current_position, f);
1642    
1643    if (f != NULL) fprintf(f, "\n");
1644    
1645    /* Always print appropriate indicators, with callout number if not already
1646    shown. For automatic callouts, show the pattern offset. */
1647    
1648    if (cb->callout_number == 255)
1649      {
1650      fprintf(outfile, "%+3d ", cb->pattern_position);
1651      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
1652      }
1653    else
1654      {
1655      if (callout_extra) fprintf(outfile, "    ");
1656        else fprintf(outfile, "%3d ", cb->callout_number);
1657      }
1658    
1659    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1660    fprintf(outfile, "^");
1661    
1662    if (post_start > 0)
1663      {
1664      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1665      fprintf(outfile, "^");
1666      }
1667    
1668    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1669      fprintf(outfile, " ");
1670    
1671    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1672      pbuffer + cb->pattern_position);
1673    
1674    fprintf(outfile, "\n");
1675    first_callout = 0;
1676    
1677    if (cb->mark != last_callout_mark)
1678      {
1679      if (cb->mark == NULL)
1680        fprintf(outfile, "Latest Mark: <unset>\n");
1681      else
1682        {
1683        fprintf(outfile, "Latest Mark: ");
1684        PCHARSV(cb->mark, 0, -1, outfile);
1685        putc('\n', outfile);
1686        }
1687      last_callout_mark = cb->mark;
1688      }
1689    
1690    if (cb->callout_data != NULL)
1691      {
1692      int callout_data = *((int *)(cb->callout_data));
1693      if (callout_data != 0)
1694        {
1695        fprintf(outfile, "Callout data = %d\n", callout_data);
1696        return callout_data;
1697        }
1698      }
1699    
1700    return (cb->callout_number != callout_fail_id)? 0 :
1701           (++callout_count >= callout_fail_count)? 1 : 0;
1702    }
1703    
1704    
1705    /*************************************************
1706    *            Local malloc functions              *
1707    *************************************************/
1708    
1709    /* Alternative malloc function, to test functionality and save the size of a
1710    compiled re, which is the first store request that pcre_compile() makes. The
1711    show_malloc variable is set only during matching. */
1712    
1713    static void *new_malloc(size_t size)
1714    {
1715    void *block = malloc(size);
1716    gotten_store = size;
1717    if (first_gotten_store == 0) first_gotten_store = size;
1718    if (show_malloc)
1719      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1720    return block;
1721    }
1722    
1723    static void new_free(void *block)
1724    {
1725    if (show_malloc)
1726      fprintf(outfile, "free             %p\n", block);
1727    free(block);
1728    }
1729    
1730    /* For recursion malloc/free, to test stacking calls */
1731    
1732    static void *stack_malloc(size_t size)
1733    {
1734    void *block = malloc(size);
1735    if (show_malloc)
1736      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1737    return block;
1738    }
1739    
1740    static void stack_free(void *block)
1741    {
1742    if (show_malloc)
1743      fprintf(outfile, "stack_free       %p\n", block);
1744    free(block);
1745    }
1746    
1747    
1748    /*************************************************
1749    *          Call pcre_fullinfo()                  *
1750    *************************************************/
1751    
1752    /* Get one piece of information from the pcre_fullinfo() function. When only
1753    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1754    value, but the code is defensive.
1755    
1756    Arguments:
1757      re        compiled regex
1758      study     study data
1759      option    PCRE_INFO_xxx option
1760      ptr       where to put the data
1761    
1762    Returns:    0 when OK, < 0 on error
1763    */
1764    
1765    static int
1766    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1767    {
1768    int rc;
1769    
1770    if (use_pcre16)
1771    #ifdef SUPPORT_PCRE16
1772      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1773    #else
1774      rc = PCRE_ERROR_BADMODE;
1775    #endif
1776    else
1777    #ifdef SUPPORT_PCRE8
1778      rc = pcre_fullinfo(re, study, option, ptr);
1779    #else
1780      rc = PCRE_ERROR_BADMODE;
1781    #endif
1782    
1783    if (rc < 0)
1784      {
1785      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1786        use_pcre16? "16" : "", option);
1787      if (rc == PCRE_ERROR_BADMODE)
1788        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1789          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1790      }
1791    
1792    return rc;
1793    }
1794    
1795    
1796    
1797    /*************************************************
1798    *             Swap byte functions                *
1799    *************************************************/
1800    
1801    /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1802    value, respectively.
1803    
1804    Arguments:
1805      value        any number
1806    
1807    Returns:       the byte swapped value
1808    */
1809    
1810    static pcre_uint32
1811    swap_uint32(pcre_uint32 value)
1812    {
1813    return ((value & 0x000000ff) << 24) |
1814           ((value & 0x0000ff00) <<  8) |
1815           ((value & 0x00ff0000) >>  8) |
1816           (value >> 24);
1817    }
1818    
1819    static pcre_uint16
1820    swap_uint16(pcre_uint16 value)
1821    {
1822    return (value >> 8) | (value << 8);
1823    }
1824    
1825    
1826    
1827    /*************************************************
1828    *        Flip bytes in a compiled pattern        *
1829    *************************************************/
1830    
1831    /* This function is called if the 'F' option was present on a pattern that is
1832    to be written to a file. We flip the bytes of all the integer fields in the
1833    regex data block and the study block. In 16-bit mode this also flips relevant
1834    bytes in the pattern itself. This is to make it possible to test PCRE's
1835    ability to reload byte-flipped patterns, e.g. those compiled on a different
1836    architecture. */
1837    
1838    static void
1839    regexflip(pcre *ere, pcre_extra *extra)
1840  {  {
1841  unsigned char *code = ((real_pcre *)re)->code;  REAL_PCRE *re = (REAL_PCRE *)ere;
1842    #ifdef SUPPORT_PCRE16
1843  fprintf(outfile, "------------------------------------------------------------------\n");  int op;
1844    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1845    int length = re->name_count * re->name_entry_size;
1846    #ifdef SUPPORT_UTF
1847    BOOL utf = (re->options & PCRE_UTF16) != 0;
1848    BOOL utf16_char = FALSE;
1849    #endif /* SUPPORT_UTF */
1850    #endif /* SUPPORT_PCRE16 */
1851    
1852    /* Always flip the bytes in the main data block and study blocks. */
1853    
1854    re->magic_number = REVERSED_MAGIC_NUMBER;
1855    re->size = swap_uint32(re->size);
1856    re->options = swap_uint32(re->options);
1857    re->flags = swap_uint16(re->flags);
1858    re->top_bracket = swap_uint16(re->top_bracket);
1859    re->top_backref = swap_uint16(re->top_backref);
1860    re->first_char = swap_uint16(re->first_char);
1861    re->req_char = swap_uint16(re->req_char);
1862    re->name_table_offset = swap_uint16(re->name_table_offset);
1863    re->name_entry_size = swap_uint16(re->name_entry_size);
1864    re->name_count = swap_uint16(re->name_count);
1865    
1866  for(;;)  if (extra != NULL)
1867    {    {
1868    int c;    pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1869    int charlength;    rsd->size = swap_uint32(rsd->size);
1870      rsd->flags = swap_uint32(rsd->flags);
1871      rsd->minlength = swap_uint32(rsd->minlength);
1872      }
1873    
1874    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1875    in the name table, if present, and then in the pattern itself. */
1876    
1877    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  #ifdef SUPPORT_PCRE16
1878    if (!use_pcre16) return;
1879    
1880    if (*code >= OP_BRA)  while(TRUE)
1881      {
1882      /* Swap previous characters. */
1883      while (length-- > 0)
1884      {      {
1885      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      *ptr = swap_uint16(*ptr);
1886      code += 2;      ptr++;
1887      }      }
1888    #ifdef SUPPORT_UTF
1889    else switch(*code)    if (utf16_char)
1890      {      {
1891      case OP_END:      if ((ptr[-1] & 0xfc00) == 0xd800)
1892      fprintf(outfile, "    %s\n", OP_names[*code]);        {
1893      fprintf(outfile, "------------------------------------------------------------------\n");        /* We know that there is only one extra character in UTF-16. */
1894      return;        *ptr = swap_uint16(*ptr);
1895          ptr++;
1896      case OP_OPT:        }
1897      fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);      }
1898      code++;    utf16_char = FALSE;
1899      break;  #endif /* SUPPORT_UTF */
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
1900    
1901      case OP_CHARS:    /* Get next opcode. */
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
1902    
1903      case OP_KETRMAX:    length = 0;
1904      case OP_KETRMIN:    op = *ptr;
1905      case OP_ALT:    *ptr++ = swap_uint16(op);
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
1906    
1907      case OP_REVERSE:    switch (op)
1908      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      {
1909      code += 2;      case OP_END:
1910      break;      return;
1911    
1912    #ifdef SUPPORT_UTF
1913        case OP_CHAR:
1914        case OP_CHARI:
1915        case OP_NOT:
1916        case OP_NOTI:
1917      case OP_STAR:      case OP_STAR:
1918      case OP_MINSTAR:      case OP_MINSTAR:
1919      case OP_PLUS:      case OP_PLUS:
1920      case OP_MINPLUS:      case OP_MINPLUS:
1921      case OP_QUERY:      case OP_QUERY:
1922      case OP_MINQUERY:      case OP_MINQUERY:
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
1923      case OP_UPTO:      case OP_UPTO:
1924      case OP_MINUPTO:      case OP_MINUPTO:
1925      if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);      case OP_EXACT:
1926        else fprintf(outfile, "    \\x%02x{", c);      case OP_POSSTAR:
1927      if (*code != OP_EXACT) fprintf(outfile, ",");      case OP_POSPLUS:
1928      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_POSQUERY:
1929      if (*code == OP_MINUPTO) fprintf(outfile, "?");      case OP_POSUPTO:
1930      code += 3;      case OP_STARI:
1931      break;      case OP_MINSTARI:
1932        case OP_PLUSI:
1933      case OP_TYPEEXACT:      case OP_MINPLUSI:
1934      case OP_TYPEUPTO:      case OP_QUERYI:
1935      case OP_TYPEMINUPTO:      case OP_MINQUERYI:
1936      fprintf(outfile, "    %s{", OP_names[code[3]]);      case OP_UPTOI:
1937      if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");      case OP_MINUPTOI:
1938      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_EXACTI:
1939      if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");      case OP_POSSTARI:
1940      code += 3;      case OP_POSPLUSI:
1941      break;      case OP_POSQUERYI:
1942        case OP_POSUPTOI:
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
1943      case OP_NOTSTAR:      case OP_NOTSTAR:
1944      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
1945      case OP_NOTPLUS:      case OP_NOTPLUS:
1946      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
1947      case OP_NOTQUERY:      case OP_NOTQUERY:
1948      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
1949      case OP_NOTUPTO:      case OP_NOTUPTO:
1950      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
1951      if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);      case OP_NOTEXACT:
1952        else fprintf(outfile, "    [^\\x%02x]{", c);      case OP_NOTPOSSTAR:
1953      if (*code != OP_NOTEXACT) fprintf(outfile, ",");      case OP_NOTPOSPLUS:
1954      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_NOTPOSQUERY:
1955      if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");      case OP_NOTPOSUPTO:
1956      code += 3;      case OP_NOTSTARI:
1957      break;      case OP_NOTMINSTARI:
1958        case OP_NOTPLUSI:
1959        case OP_NOTMINPLUSI:
1960        case OP_NOTQUERYI:
1961        case OP_NOTMINQUERYI:
1962        case OP_NOTUPTOI:
1963        case OP_NOTMINUPTOI:
1964        case OP_NOTEXACTI:
1965        case OP_NOTPOSSTARI:
1966        case OP_NOTPOSPLUSI:
1967        case OP_NOTPOSQUERYI:
1968        case OP_NOTPOSUPTOI:
1969        if (utf) utf16_char = TRUE;
1970    #endif
1971        /* Fall through. */
1972    
1973      case OP_REF:      default:
1974      fprintf(outfile, "    \\%d", *(++code));      length = OP_lengths16[op] - 1;
1975      code++;      break;
     goto CLASS_REF_REPEAT;  
1976    
1977      case OP_CLASS:      case OP_CLASS:
1978        case OP_NCLASS:
1979        /* Skip the character bit map. */
1980        ptr += 32/sizeof(pcre_uint16);
1981        length = 0;
1982        break;
1983    
1984        case OP_XCLASS:
1985        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1986        if (LINK_SIZE > 1)
1987          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1988            - (1 + LINK_SIZE + 1));
1989        else
1990          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1991    
1992        /* Reverse the size of the XCLASS instance. */
1993        *ptr = swap_uint16(*ptr);
1994        ptr++;
1995        if (LINK_SIZE > 1)
1996        {        {
1997        int i, min, max;        *ptr = swap_uint16(*ptr);
1998        code++;        ptr++;
1999        fprintf(outfile, "    [");        }
2000    
2001        for (i = 0; i < 256; i++)      op = *ptr;
2002          {      *ptr = swap_uint16(op);
2003          if ((code[i/8] & (1 << (i&7))) != 0)      ptr++;
2004            {      if ((op & XCL_MAP) != 0)
2005            int j;        {
2006            for (j = i+1; j < 256; j++)        /* Skip the character bit map. */
2007              if ((code[j/8] & (1 << (j&7))) == 0) break;        ptr += 32/sizeof(pcre_uint16);
2008            if (i == '-' || i == ']') fprintf(outfile, "\\");        length -= 32/sizeof(pcre_uint16);
2009            if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);        }
2010            if (--j > i)      break;
2011              {      }
2012              fprintf(outfile, "-");    }
2013              if (j == '-' || j == ']') fprintf(outfile, "\\");  /* Control should never reach here in 16 bit mode. */
2014              if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  #endif /* SUPPORT_PCRE16 */
2015              }  }
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
2016    
       CLASS_REF_REPEAT:  
2017    
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
2018    
2019          case OP_CRRANGE:  /*************************************************
2020          case OP_CRMINRANGE:  *        Check match or recursion limit          *
2021          min = (code[1] << 8) + code[2];  *************************************************/
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
2022    
2023          default:  static int
2024          code--;  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2025          }    int start_offset, int options, int *use_offsets, int use_size_offsets,
2026        }    int flag, unsigned long int *limit, int errnumber, const char *msg)
2027      break;  {
2028    int count;
2029    int min = 0;
2030    int mid = 64;
2031    int max = -1;
2032    
2033      /* Anything else is just a one-node item */  extra->flags |= flag;
2034    
2035      default:  for (;;)
2036      fprintf(outfile, "    %s", OP_names[*code]);    {
2037      break;    *limit = mid;
2038    
2039      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2040        use_offsets, use_size_offsets);
2041    
2042      if (count == errnumber)
2043        {
2044        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2045        min = mid;
2046        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2047      }      }
2048    
2049    code++;    else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2050    fprintf(outfile, "\n");                           count == PCRE_ERROR_PARTIAL)
2051        {
2052        if (mid == min + 1)
2053          {
2054          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2055          break;
2056          }
2057        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2058        max = mid;
2059        mid = (min + mid)/2;
2060        }
2061      else break;    /* Some other error */
2062    }    }
2063    
2064    extra->flags &= ~flag;
2065    return count;
2066  }  }
2067    
2068    
2069    
2070  /* Character string printing function. */  /*************************************************
2071    *         Case-independent strncmp() function    *
2072    *************************************************/
2073    
2074    /*
2075    Arguments:
2076      s         first string
2077      t         second string
2078      n         number of characters to compare
2079    
2080    Returns:    < 0, = 0, or > 0, according to the comparison
2081    */
2082    
2083  static void pchars(unsigned char *p, int length)  static int
2084    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2085  {  {
2086  int c;  while (n--)
2087  while (length-- > 0)    {
2088    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    int c = tolower(*s++) - tolower(*t++);
2089      else fprintf(outfile, "\\x%02x", c);    if (c) return c;
2090      }
2091    return 0;
2092  }  }
2093    
2094    
2095    
2096  /* Alternative malloc function, to test functionality and show the size of the  /*************************************************
2097  compiled re. */  *         Check newline indicator                *
2098    *************************************************/
2099    
2100  static void *new_malloc(size_t size)  /* This is used both at compile and run-time to check for <xxx> escapes. Print
2101    a message and return 0 if there is no match.
2102    
2103    Arguments:
2104      p           points after the leading '<'
2105      f           file for error message
2106    
2107    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
2108    */
2109    
2110    static int
2111    check_newline(pcre_uint8 *p, FILE *f)
2112  {  {
2113  gotten_store = size;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2114  if (log_store)  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2115    fprintf(outfile, "Memory allocation (code space): %d\n",  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2116      (int)((int)size - offsetof(real_pcre, code[0])));  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2117  return malloc(size);  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2118    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2119    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2120    fprintf(f, "Unknown newline type at: <%s\n", p);
2121    return 0;
2122  }  }
2123    
2124    
2125    
2126    /*************************************************
2127    *             Usage function                     *
2128    *************************************************/
2129    
2130  /* Get one piece of information from the pcre_fullinfo() function */  static void
2131    usage(void)
 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  
2132  {  {
2133  int rc;  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2134  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  printf("Input and output default to stdin and stdout.\n");
2135    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2136    printf("If input is a terminal, readline() is used to read from it.\n");
2137    #else
2138    printf("This version of pcretest is not linked with readline().\n");
2139    #endif
2140    printf("\nOptions:\n");
2141    #ifdef SUPPORT_PCRE16
2142    printf("  -16      use the 16-bit library\n");
2143    #endif
2144    printf("  -b       show compiled code\n");
2145    printf("  -C       show PCRE compile-time options and exit\n");
2146    printf("  -C arg   show a specific compile-time option\n");
2147    printf("           and exit with its value. The arg can be:\n");
2148    printf("     linksize     internal link size [2, 3, 4]\n");
2149    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2150    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2151    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2152    printf("     ucp          Unicode Properties supported [0, 1]\n");
2153    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2154    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2155    printf("  -d       debug: show compiled code and information (-b and -i)\n");
2156    #if !defined NODFA
2157    printf("  -dfa     force DFA matching for all subjects\n");
2158    #endif
2159    printf("  -help    show usage information\n");
2160    printf("  -i       show information about compiled patterns\n"
2161           "  -M       find MATCH_LIMIT minimum for each subject\n"
2162           "  -m       output memory used information\n"
2163           "  -o <n>   set size of offsets vector to <n>\n");
2164    #if !defined NOPOSIX
2165    printf("  -p       use POSIX interface\n");
2166    #endif
2167    printf("  -q       quiet: do not output PCRE version number at start\n");
2168    printf("  -S <n>   set stack size to <n> megabytes\n");
2169    printf("  -s       force each pattern to be studied at basic level\n"
2170           "  -s+      force each pattern to be studied, using JIT if available\n"
2171           "  -s++     ditto, verifying when JIT was actually used\n"
2172           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2173           "             where 1 <= n <= 7 selects JIT options\n"
2174           "  -s++n    ditto, verifying when JIT was actually used\n"
2175           "  -t       time compilation and execution\n");
2176    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2177    printf("  -tm      time execution (matching) only\n");
2178    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
2179  }  }
2180    
2181    
2182    
2183    /*************************************************
2184    *                Main Program                    *
2185    *************************************************/
2186    
2187  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
2188  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 311  options, followed by a set of test data, Line 2191  options, followed by a set of test data,
2191  int main(int argc, char **argv)  int main(int argc, char **argv)
2192  {  {
2193  FILE *infile = stdin;  FILE *infile = stdin;
2194    const char *version;
2195  int options = 0;  int options = 0;
2196  int study_options = 0;  int study_options = 0;
2197    int default_find_match_limit = FALSE;
2198  int op = 1;  int op = 1;
2199  int timeit = 0;  int timeit = 0;
2200    int timeitm = 0;
2201  int showinfo = 0;  int showinfo = 0;
2202  int showstore = 0;  int showstore = 0;
2203  int posix = 0;  int force_study = -1;
2204    int force_study_options = 0;
2205    int quiet = 0;
2206    int size_offsets = 45;
2207    int size_offsets_max;
2208    int *offsets = NULL;
2209  int debug = 0;  int debug = 0;
2210  int done = 0;  int done = 0;
2211  unsigned char buffer[30000];  int all_use_dfa = 0;
2212  unsigned char dbuffer[1024];  int verify_jit = 0;
2213    int yield = 0;
2214    int stack_size;
2215    
2216    #if !defined NOPOSIX
2217    int posix = 0;
2218    #endif
2219    #if !defined NODFA
2220    int *dfa_workspace = NULL;
2221    #endif
2222    
2223    pcre_jit_stack *jit_stack = NULL;
2224    
2225  /* Static so that new_malloc can use it. */  /* These vectors store, end-to-end, a list of zero-terminated captured
2226    substring names, each list itself being terminated by an empty name. Assume
2227    that 1024 is plenty long enough for the few names we'll be testing. It is
2228    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2229    for the actual memory, to ensure alignment. */
2230    
2231    pcre_uint16 copynames[1024];
2232    pcre_uint16 getnames[1024];
2233    
2234    #ifdef SUPPORT_PCRE16
2235    pcre_uint16 *cn16ptr;
2236    pcre_uint16 *gn16ptr;
2237    #endif
2238    
2239    #ifdef SUPPORT_PCRE8
2240    pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2241    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2242    pcre_uint8 *cn8ptr;
2243    pcre_uint8 *gn8ptr;
2244    #endif
2245    
2246    /* Get buffers from malloc() so that valgrind will check their misuse when
2247    debugging. They grow automatically when very long lines are read. The 16-bit
2248    buffer (buffer16) is obtained only if needed. */
2249    
2250    buffer = (pcre_uint8 *)malloc(buffer_size);
2251    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2252    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2253    
2254    /* The outfile variable is static so that new_malloc can use it. */
2255    
2256  outfile = stdout;  outfile = stdout;
2257    
2258    /* The following  _setmode() stuff is some Windows magic that tells its runtime
2259    library to translate CRLF into a single LF character. At least, that's what
2260    I've been told: never having used Windows I take this all on trust. Originally
2261    it set 0x8000, but then I was advised that _O_BINARY was better. */
2262    
2263    #if defined(_WIN32) || defined(WIN32)
2264    _setmode( _fileno( stdout ), _O_BINARY );
2265    #endif
2266    
2267    /* Get the version number: both pcre_version() and pcre16_version() give the
2268    same answer. We just need to ensure that we call one that is available. */
2269    
2270    #ifdef SUPPORT_PCRE8
2271    version = pcre_version();
2272    #else
2273    version = pcre16_version();
2274    #endif
2275    
2276  /* Scan options */  /* Scan options */
2277    
2278  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2279    {    {
2280    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    pcre_uint8 *endptr;
2281      showstore = 1;    char *arg = argv[op];
2282    else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
2283    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    if (strcmp(arg, "-m") == 0) showstore = 1;
2284    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(arg, "-s") == 0) force_study = 0;
2285    else if (strcmp(argv[op], "-p") == 0) posix = 1;  
2286      else if (strncmp(arg, "-s+", 3) == 0)
2287        {
2288        arg += 3;
2289        if (*arg == '+') { arg++; verify_jit = TRUE; }
2290        force_study = 1;
2291        if (*arg == 0)
2292          force_study_options = jit_study_bits[6];
2293        else if (*arg >= '1' && *arg <= '7')
2294          force_study_options = jit_study_bits[*arg - '1'];
2295        else goto BAD_ARG;
2296        }
2297      else if (strcmp(arg, "-16") == 0)
2298        {
2299    #ifdef SUPPORT_PCRE16
2300        use_pcre16 = 1;
2301    #else
2302        printf("** This version of PCRE was built without 16-bit support\n");
2303        exit(1);
2304    #endif
2305        }
2306      else if (strcmp(arg, "-q") == 0) quiet = 1;
2307      else if (strcmp(arg, "-b") == 0) debug = 1;
2308      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2309      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2310      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2311    #if !defined NODFA
2312      else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2313    #endif
2314      else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2315          ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2316            *endptr == 0))
2317        {
2318        op++;
2319        argc--;
2320        }
2321      else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2322        {
2323        int both = arg[2] == 0;
2324        int temp;
2325        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2326                         *endptr == 0))
2327          {
2328          timeitm = temp;
2329          op++;
2330          argc--;
2331          }
2332        else timeitm = LOOPREPEAT;
2333        if (both) timeit = timeitm;
2334        }
2335      else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2336          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2337            *endptr == 0))
2338        {
2339    #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2340        printf("PCRE: -S not supported on this OS\n");
2341        exit(1);
2342    #else
2343        int rc;
2344        struct rlimit rlim;
2345        getrlimit(RLIMIT_STACK, &rlim);
2346        rlim.rlim_cur = stack_size * 1024 * 1024;
2347        rc = setrlimit(RLIMIT_STACK, &rlim);
2348        if (rc != 0)
2349          {
2350        printf("PCRE: setrlimit() failed with error %d\n", rc);
2351        exit(1);
2352          }
2353        op++;
2354        argc--;
2355    #endif
2356        }
2357    #if !defined NOPOSIX
2358      else if (strcmp(arg, "-p") == 0) posix = 1;
2359    #endif
2360      else if (strcmp(arg, "-C") == 0)
2361        {
2362        int rc;
2363        unsigned long int lrc;
2364    
2365        if (argc > 2)
2366          {
2367          if (strcmp(argv[op + 1], "linksize") == 0)
2368            {
2369            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2370            printf("%d\n", rc);
2371            yield = rc;
2372            goto EXIT;
2373            }
2374          if (strcmp(argv[op + 1], "pcre8") == 0)
2375            {
2376    #ifdef SUPPORT_PCRE8
2377            printf("1\n");
2378            yield = 1;
2379    #else
2380            printf("0\n");
2381            yield = 0;
2382    #endif
2383            goto EXIT;
2384            }
2385          if (strcmp(argv[op + 1], "pcre16") == 0)
2386            {
2387    #ifdef SUPPORT_PCRE16
2388            printf("1\n");
2389            yield = 1;
2390    #else
2391            printf("0\n");
2392            yield = 0;
2393    #endif
2394            goto EXIT;
2395            }
2396          if (strcmp(argv[op + 1], "utf") == 0)
2397            {
2398    #ifdef SUPPORT_PCRE8
2399            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2400            printf("%d\n", rc);
2401            yield = rc;
2402    #else
2403            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2404            printf("%d\n", rc);
2405            yield = rc;
2406    #endif
2407            goto EXIT;
2408            }
2409          if (strcmp(argv[op + 1], "ucp") == 0)
2410            {
2411            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2412            printf("%d\n", rc);
2413            yield = rc;
2414            goto EXIT;
2415            }
2416          if (strcmp(argv[op + 1], "jit") == 0)
2417            {
2418            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2419            printf("%d\n", rc);
2420            yield = rc;
2421            goto EXIT;
2422            }
2423          if (strcmp(argv[op + 1], "newline") == 0)
2424            {
2425            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2426            /* Note that these values are always the ASCII values, even
2427            in EBCDIC environments. CR is 13 and NL is 10. */
2428            printf("%s\n", (rc == 13)? "CR" :
2429              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2430              (rc == -2)? "ANYCRLF" :
2431              (rc == -1)? "ANY" : "???");
2432            goto EXIT;
2433            }
2434          printf("Unknown -C option: %s\n", argv[op + 1]);
2435          goto EXIT;
2436          }
2437    
2438        printf("PCRE version %s\n", version);
2439        printf("Compiled with\n");
2440    
2441    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2442    are set, either both UTFs are supported or both are not supported. */
2443    
2444    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2445        printf("  8-bit and 16-bit support\n");
2446        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2447        if (rc)
2448          printf("  UTF-8 and UTF-16 support\n");
2449        else
2450          printf("  No UTF-8 or UTF-16 support\n");
2451    #elif defined SUPPORT_PCRE8
2452        printf("  8-bit support only\n");
2453        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2454        printf("  %sUTF-8 support\n", rc? "" : "No ");
2455    #else
2456        printf("  16-bit support only\n");
2457        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2458        printf("  %sUTF-16 support\n", rc? "" : "No ");
2459    #endif
2460    
2461        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2462        printf("  %sUnicode properties support\n", rc? "" : "No ");
2463        (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2464        if (rc)
2465          {
2466          const char *arch;
2467          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2468          printf("  Just-in-time compiler support: %s\n", arch);
2469          }
2470        else
2471          printf("  No just-in-time compiler support\n");
2472        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2473        /* Note that these values are always the ASCII values, even
2474        in EBCDIC environments. CR is 13 and NL is 10. */
2475        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2476          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2477          (rc == -2)? "ANYCRLF" :
2478          (rc == -1)? "ANY" : "???");
2479        (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2480        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2481                                         "all Unicode newlines");
2482        (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2483        printf("  Internal link size = %d\n", rc);
2484        (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2485        printf("  POSIX malloc threshold = %d\n", rc);
2486        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2487        printf("  Default match limit = %ld\n", lrc);
2488        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2489        printf("  Default recursion depth limit = %ld\n", lrc);
2490        (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2491        printf("  Match recursion uses %s", rc? "stack" : "heap");
2492        if (showstore)
2493          {
2494          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2495          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2496          }
2497        printf("\n");
2498        goto EXIT;
2499        }
2500      else if (strcmp(arg, "-help") == 0 ||
2501               strcmp(arg, "--help") == 0)
2502        {
2503        usage();
2504        goto EXIT;
2505        }
2506    else    else
2507      {      {
2508      printf("*** Unknown option %s\n", argv[op]);      BAD_ARG:
2509      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("** Unknown or malformed option %s\n", arg);
2510      printf("  -d   debug: show compiled code; implies -i\n"      usage();
2511             "  -i   show information about compiled pattern\n"      yield = 1;
2512             "  -p   use POSIX interface\n"      goto EXIT;
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
2513      }      }
2514    op++;    op++;
2515    argc--;    argc--;
2516    }    }
2517    
2518    /* Get the store for the offsets vector, and remember what it was */
2519    
2520    size_offsets_max = size_offsets;
2521    offsets = (int *)malloc(size_offsets_max * sizeof(int));
2522    if (offsets == NULL)
2523      {
2524      printf("** Failed to get %d bytes of memory for offsets vector\n",
2525        (int)(size_offsets_max * sizeof(int)));
2526      yield = 1;
2527      goto EXIT;
2528      }
2529    
2530  /* Sort out the input and output files */  /* Sort out the input and output files */
2531    
2532  if (argc > 1)  if (argc > 1)
2533    {    {
2534    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
2535    if (infile == NULL)    if (infile == NULL)
2536      {      {
2537      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
2538      return 1;      yield = 1;
2539        goto EXIT;
2540      }      }
2541    }    }
2542    
2543  if (argc > 2)  if (argc > 2)
2544    {    {
2545    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
2546    if (outfile == NULL)    if (outfile == NULL)
2547      {      {
2548      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
2549      return 1;      yield = 1;
2550        goto EXIT;
2551      }      }
2552    }    }
2553    
2554  /* Set alternative malloc function */  /* Set alternative malloc function */
2555    
2556  pcre_malloc = new_malloc;  #ifdef SUPPORT_PCRE8
2557    pcre_malloc = new_malloc;
2558    pcre_free = new_free;
2559    pcre_stack_malloc = stack_malloc;
2560    pcre_stack_free = stack_free;
2561    #endif
2562    
2563    #ifdef SUPPORT_PCRE16
2564    pcre16_malloc = new_malloc;
2565    pcre16_free = new_free;
2566    pcre16_stack_malloc = stack_malloc;
2567    pcre16_stack_free = stack_free;
2568    #endif
2569    
2570    /* Heading line unless quiet, then prompt for first regex if stdin */
2571    
2572    if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2573    
2574    /* Main loop */
2575    
2576    while (!done)
2577      {
2578      pcre *re = NULL;
2579      pcre_extra *extra = NULL;
2580    
2581    #if !defined NOPOSIX  /* There are still compilers that require no indent */
2582      regex_t preg;
2583      int do_posix = 0;
2584    #endif
2585    
2586      const char *error;
2587      pcre_uint8 *markptr;
2588      pcre_uint8 *p, *pp, *ppp;
2589      pcre_uint8 *to_file = NULL;
2590      const pcre_uint8 *tables = NULL;
2591      unsigned long int get_options;
2592      unsigned long int true_size, true_study_size = 0;
2593      size_t size, regex_gotten_store;
2594      int do_allcaps = 0;
2595      int do_mark = 0;
2596      int do_study = 0;
2597      int no_force_study = 0;
2598      int do_debug = debug;
2599      int do_G = 0;
2600      int do_g = 0;
2601      int do_showinfo = showinfo;
2602      int do_showrest = 0;
2603      int do_showcaprest = 0;
2604      int do_flip = 0;
2605      int erroroffset, len, delimiter, poffset;
2606    
2607    #if !defined NODFA
2608      int dfa_matched = 0;
2609    #endif
2610    
2611      use_utf = 0;
2612      debug_lengths = 1;
2613    
2614      if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
2615      if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2616      fflush(outfile);
2617    
2618      p = buffer;
2619      while (isspace(*p)) p++;
2620      if (*p == 0) continue;
2621    
2622      /* See if the pattern is to be loaded pre-compiled from a file. */
2623    
2624      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2625        {
2626        pcre_uint32 magic;
2627        pcre_uint8 sbuf[8];
2628        FILE *f;
2629    
2630        p++;
2631        if (*p == '!')
2632          {
2633          do_debug = TRUE;
2634          do_showinfo = TRUE;
2635          p++;
2636          }
2637    
2638        pp = p + (int)strlen((char *)p);
2639        while (isspace(pp[-1])) pp--;
2640        *pp = 0;
2641    
2642        f = fopen((char *)p, "rb");
2643        if (f == NULL)
2644          {
2645          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2646          continue;
2647          }
2648    
2649        first_gotten_store = 0;
2650        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2651    
2652        true_size =
2653          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2654        true_study_size =
2655          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2656    
2657        re = (pcre *)new_malloc(true_size);
2658        if (re == NULL)
2659          {
2660          printf("** Failed to get %d bytes of memory for pcre object\n",
2661            (int)true_size);
2662          yield = 1;
2663          goto EXIT;
2664          }
2665        regex_gotten_store = first_gotten_store;
2666    
2667        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2668    
2669        magic = ((REAL_PCRE *)re)->magic_number;
2670        if (magic != MAGIC_NUMBER)
2671          {
2672          if (swap_uint32(magic) == MAGIC_NUMBER)
2673            {
2674            do_flip = 1;
2675            }
2676          else
2677            {
2678            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2679            new_free(re);
2680            fclose(f);
2681            continue;
2682            }
2683          }
2684    
2685        /* We hide the byte-invert info for little and big endian tests. */
2686        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2687          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2688    
2689        /* Now see if there is any following study data. */
2690    
2691  /* Heading line, then prompt for first regex if stdin */      if (true_study_size != 0)
2692          {
2693          pcre_study_data *psd;
2694    
2695  fprintf(outfile, "PCRE version %s\n\n", pcre_version());        extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2696          extra->flags = PCRE_EXTRA_STUDY_DATA;
2697    
2698  /* Main loop */        psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2699          extra->study_data = psd;
2700    
2701  while (!done)        if (fread(psd, 1, true_study_size, f) != true_study_size)
2702    {          {
2703    pcre *re = NULL;          FAIL_READ:
2704    pcre_extra *extra = NULL;          fprintf(outfile, "Failed to read data from %s\n", p);
2705            if (extra != NULL)
2706              {
2707              PCRE_FREE_STUDY(extra);
2708              }
2709            new_free(re);
2710            fclose(f);
2711            continue;
2712            }
2713          fprintf(outfile, "Study data loaded from %s\n", p);
2714          do_study = 1;     /* To get the data output if requested */
2715          }
2716        else fprintf(outfile, "No study data\n");
2717    
2718  #if !defined NOPOSIX  /* There are still compilers that require no indent */      /* Flip the necessary bytes. */
2719    regex_t preg;      if (do_flip)
2720  #endif        {
2721          int rc;
2722          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2723          if (rc == PCRE_ERROR_BADMODE)
2724            {
2725            /* Simulate the result of the function call below. */
2726            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2727              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2728            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2729              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2730            new_free(re);
2731            fclose(f);
2732            continue;
2733            }
2734          }
2735    
2736    const char *error;      /* Need to know if UTF-8 for printing data strings. */
   unsigned char *p, *pp, *ppp;  
   unsigned const char *tables = NULL;  
   int do_study = 0;  
   int do_debug = debug;  
   int do_G = 0;  
   int do_g = 0;  
   int do_showinfo = showinfo;  
   int do_showrest = 0;  
   int do_posix = 0;  
   int erroroffset, len, delimiter;  
2737    
2738    if (infile == stdin) printf("  re> ");      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2739    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;        {
2740    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);        new_free(re);
2741          fclose(f);
2742          continue;
2743          }
2744        use_utf = (get_options & PCRE_UTF8) != 0;
2745    
2746    p = buffer;      fclose(f);
2747    while (isspace(*p)) p++;      goto SHOW_INFO;
2748    if (*p == 0) continue;      }
2749    
2750    /* Get the delimiter and seek the end of the pattern; if is isn't    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2751    complete, read more. */    the pattern; if it isn't complete, read more. */
2752    
2753    delimiter = *p++;    delimiter = *p++;
2754    
2755    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
2756      {      {
2757      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2758      goto SKIP_DATA;      goto SKIP_DATA;
2759      }      }
2760    
2761    pp = p;    pp = p;
2762      poffset = (int)(p - buffer);
2763    
2764    for(;;)    for(;;)
2765      {      {
# Line 435  while (!done) Line 2770  while (!done)
2770        pp++;        pp++;
2771        }        }
2772      if (*pp != 0) break;      if (*pp != 0) break;
2773        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
2774        {        {
2775        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
2776        done = 1;        done = 1;
# Line 453  while (!done) Line 2779  while (!done)
2779      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2780      }      }
2781    
2782      /* The buffer may have moved while being extended; reset the start of data
2783      pointer to the correct relative point in the buffer. */
2784    
2785      p = buffer + poffset;
2786    
2787    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
2788    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
2789    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
2790    
2791    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
2792    
2793    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
2794      for callouts. */
2795    
2796    *pp++ = 0;    *pp++ = 0;
2797      strcpy((char *)pbuffer, (char *)p);
2798    
2799    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2800    
2801    options = 0;    options = 0;
2802    study_options = 0;    study_options = force_study_options;
2803    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
2804    
2805    while (*pp != 0)    while (*pp != 0)
2806      {      {
2807      switch (*pp++)      switch (*pp++)
2808        {        {
2809          case 'f': options |= PCRE_FIRSTLINE; break;
2810        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
2811        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
2812        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
2813        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2814        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2815    
2816        case '+': do_showrest = 1; break;        case '+':
2817          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2818          break;
2819    
2820          case '=': do_allcaps = 1; break;
2821        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2822          case 'B': do_debug = 1; break;
2823          case 'C': options |= PCRE_AUTO_CALLOUT; break;
2824        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
2825        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2826          case 'F': do_flip = 1; break;
2827        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2828        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2829          case 'J': options |= PCRE_DUPNAMES; break;
2830          case 'K': do_mark = 1; break;
2831        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2832          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2833    
2834  #if !defined NOPOSIX  #if !defined NOPOSIX
2835        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2836  #endif  #endif
2837    
2838        case 'S': do_study = 1; break;        case 'S':
2839          do_study = 1;
2840          for (;;)
2841            {
2842            switch (*pp++)
2843              {
2844              case 'S':
2845              do_study = 0;
2846              no_force_study = 1;
2847              break;
2848    
2849              case '!':
2850              study_options |= PCRE_STUDY_EXTRA_NEEDED;
2851              break;
2852    
2853              case '+':
2854              if (*pp == '+')
2855                {
2856                verify_jit = TRUE;
2857                pp++;
2858                }
2859              if (*pp >= '1' && *pp <= '7')
2860                study_options |= jit_study_bits[*pp++ - '1'];
2861              else
2862                study_options |= jit_study_bits[6];
2863              break;
2864    
2865              case '-':
2866              study_options &= ~PCRE_STUDY_ALLJIT;
2867              break;
2868    
2869              default:
2870              pp--;
2871              goto ENDLOOP;
2872              }
2873            }
2874          ENDLOOP:
2875          break;
2876    
2877        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2878          case 'W': options |= PCRE_UCP; break;
2879        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2880          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2881          case 'Z': debug_lengths = 0; break;
2882          case '8': options |= PCRE_UTF8; use_utf = 1; break;
2883          case '?': options |= PCRE_NO_UTF8_CHECK; break;
2884    
2885          case 'T':
2886          switch (*pp++)
2887            {
2888            case '0': tables = tables0; break;
2889            case '1': tables = tables1; break;
2890    
2891            case '\r':
2892            case '\n':
2893            case ' ':
2894            case 0:
2895            fprintf(outfile, "** Missing table number after /T\n");
2896            goto SKIP_DATA;
2897    
2898            default:
2899            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2900            goto SKIP_DATA;
2901            }
2902          break;
2903    
2904        case 'L':        case 'L':
2905        ppp = pp;        ppp = pp;
2906        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
2907          /* The '0' test is just in case this is an unterminated line. */
2908          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2909        *ppp = 0;        *ppp = 0;
2910        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2911          {          {
2912          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2913          goto SKIP_DATA;          goto SKIP_DATA;
2914          }          }
2915        tables = pcre_maketables();        locale_set = 1;
2916          tables = PCRE_MAKETABLES;
2917        pp = ppp;        pp = ppp;
2918        break;        break;
2919    
2920        case '\n': case ' ': break;        case '>':
2921          to_file = pp;
2922          while (*pp != 0) pp++;
2923          while (isspace(pp[-1])) pp--;
2924          *pp = 0;
2925          break;
2926    
2927          case '<':
2928            {
2929            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2930              {
2931              options |= PCRE_JAVASCRIPT_COMPAT;
2932              pp += 3;
2933              }
2934            else
2935              {
2936              int x = check_newline(pp, outfile);
2937              if (x == 0) goto SKIP_DATA;
2938              options |= x;
2939              while (*pp++ != '>');
2940              }
2941            }
2942          break;
2943    
2944          case '\r':                      /* So that it works in Windows */
2945          case '\n':
2946          case ' ':
2947          break;
2948    
2949        default:        default:
2950        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2951        goto SKIP_DATA;        goto SKIP_DATA;
# Line 517  while (!done) Line 2954  while (!done)
2954    
2955    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2956    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2957    local character tables. */    local character tables. Neither does it have 16-bit support. */
2958    
2959  #if !defined NOPOSIX  #if !defined NOPOSIX
2960    if (posix || do_posix)    if (posix || do_posix)
2961      {      {
2962      int rc;      int rc;
2963      int cflags = 0;      int cflags = 0;
2964    
2965      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2966      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2967        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2968        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2969        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2970        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2971        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2972    
2973        first_gotten_store = 0;
2974      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2975    
2976      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 533  while (!done) Line 2978  while (!done)
2978    
2979      if (rc != 0)      if (rc != 0)
2980        {        {
2981        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2982        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2983        goto SKIP_DATA;        goto SKIP_DATA;
2984        }        }
# Line 545  while (!done) Line 2990  while (!done)
2990  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2991    
2992      {      {
2993      if (timeit)      /* In 16-bit mode, convert the input. */
2994    
2995    #ifdef SUPPORT_PCRE16
2996        if (use_pcre16)
2997          {
2998          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2999            {
3000            case -1:
3001            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3002              "converted to UTF-16\n");
3003            goto SKIP_DATA;
3004    
3005            case -2:
3006            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3007              "cannot be converted to UTF-16\n");
3008            goto SKIP_DATA;
3009    
3010            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3011            fprintf(outfile, "**Failed: character value greater than 0xffff "
3012              "cannot be converted to 16-bit in non-UTF mode\n");
3013            goto SKIP_DATA;
3014    
3015            default:
3016            break;
3017            }
3018          p = (pcre_uint8 *)buffer16;
3019          }
3020    #endif
3021    
3022        /* Compile many times when timing */
3023    
3024        if (timeit > 0)
3025        {        {
3026        register int i;        register int i;
3027        clock_t time_taken;        clock_t time_taken;
3028        clock_t start_time = clock();        clock_t start_time = clock();
3029        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
3030          {          {
3031          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3032          if (re != NULL) free(re);          if (re != NULL) free(re);
3033          }          }
3034        time_taken = clock() - start_time;        time_taken = clock() - start_time;
3035        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
3036          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
3037          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
3038        }        }
3039    
3040      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
3041        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3042    
3043      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
3044      if non-interactive. */      if non-interactive. */
# Line 574  while (!done) Line 3051  while (!done)
3051          {          {
3052          for (;;)          for (;;)
3053            {            {
3054            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
3055              {              {
3056              done = 1;              done = 1;
3057              goto CONTINUE;              goto CONTINUE;
# Line 588  while (!done) Line 3065  while (!done)
3065        goto CONTINUE;        goto CONTINUE;
3066        }        }
3067    
3068      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
3069      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
3070      returns only limited data. Check that it agrees with the newer one. */      lines. */
3071    
3072      if (do_showinfo)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3073          goto SKIP_DATA;
3074        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3075    
3076        /* Extract the size for possible writing before possibly flipping it,
3077        and remember the store that was got. */
3078    
3079        true_size = ((REAL_PCRE *)re)->size;
3080        regex_gotten_store = first_gotten_store;
3081    
3082        /* Output code size information if requested */
3083    
3084        if (log_store)
3085          fprintf(outfile, "Memory allocation (code space): %d\n",
3086            (int)(first_gotten_store -
3087                  sizeof(REAL_PCRE) -
3088                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3089    
3090        /* If -s or /S was present, study the regex to generate additional info to
3091        help with the matching, unless the pattern has the SS option, which
3092        suppresses the effect of /S (used for a few test patterns where studying is
3093        never sensible). */
3094    
3095        if (do_study || (force_study >= 0 && !no_force_study))
3096        {        {
3097        int old_first_char, old_options, old_count;        if (timeit > 0)
3098        int count, backrefmax, first_char, need_char;          {
3099        size_t size;          register int i;
3100            clock_t time_taken;
3101        if (do_debug) print_internals(re);          clock_t start_time = clock();
3102            for (i = 0; i < timeit; i++)
3103        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);            {
3104        new_info(re, NULL, PCRE_INFO_SIZE, &size);            PCRE_STUDY(extra, re, study_options, &error);
3105        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            }
3106        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);          time_taken = clock() - start_time;
3107        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);          if (extra != NULL)
3108        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            {
3109              PCRE_FREE_STUDY(extra);
3110        old_count = pcre_info(re, &old_options, &old_first_char);            }
3111        if (count < 0) fprintf(outfile,          fprintf(outfile, "  Study time %.4f milliseconds\n",
3112          "Error %d from pcre_info()\n", count);            (((double)time_taken * 1000.0) / (double)timeit) /
3113        else              (double)CLOCKS_PER_SEC);
3114            }
3115          PCRE_STUDY(extra, re, study_options, &error);
3116          if (error != NULL)
3117            fprintf(outfile, "Failed to study: %s\n", error);
3118          else if (extra != NULL)
3119          {          {
3120          if (old_count != count) fprintf(outfile,          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3121            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,          if (log_store)
3122              old_count);            {
3123              size_t jitsize;
3124              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3125                  jitsize != 0)
3126                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3127              }
3128            }
3129          }
3130    
3131          if (old_first_char != first_char) fprintf(outfile,      /* If /K was present, we set up for handling MARK data. */
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
3132    
3133          if (old_options != options) fprintf(outfile,      if (do_mark)
3134            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,        {
3135              old_options);        if (extra == NULL)
3136            {
3137            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3138            extra->flags = 0;
3139          }          }
3140          extra->mark = &markptr;
3141          extra->flags |= PCRE_EXTRA_MARK;
3142          }
3143    
3144        /* Extract and display information from the compiled data if required. */
3145    
3146        SHOW_INFO:
3147    
3148        if (do_debug)
3149          {
3150          fprintf(outfile, "------------------------------------------------------------------\n");
3151          PCRE_PRINTINT(re, outfile, debug_lengths);
3152          }
3153    
3154        /* We already have the options in get_options (see above) */
3155    
3156        if (do_showinfo)
3157          {
3158          unsigned long int all_options;
3159          int count, backrefmax, first_char, need_char, okpartial, jchanged,
3160            hascrorlf, maxlookbehind;
3161          int nameentrysize, namecount;
3162          const pcre_uint8 *nametable;
3163    
3164          if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3165              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3166              new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3167              new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3168              new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3169              new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3170              new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3171              new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3172              new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3173              new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3174              new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3175              new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3176              != 0)
3177            goto SKIP_DATA;
3178    
3179        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3180          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3181          size, gotten_store);          (int)size, (int)regex_gotten_store);
3182    
3183        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
3184        if (backrefmax > 0)        if (backrefmax > 0)
3185          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
       if (options == 0) fprintf(outfile, "No options\n");  
         else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",  
           ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
           ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
           ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
           ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
           ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
           ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
           ((options & PCRE_EXTRA) != 0)? " extra" : "",  
           ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
3186    
3187        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (namecount > 0)
3188          fprintf(outfile, "Case state changes\n");          {
3189            fprintf(outfile, "Named capturing subpatterns:\n");
3190            while (namecount-- > 0)
3191              {
3192    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3193              int imm2_size = use_pcre16 ? 1 : 2;
3194    #else
3195              int imm2_size = IMM2_SIZE;
3196    #endif
3197              int length = (int)STRLEN(nametable + imm2_size);
3198              fprintf(outfile, "  ");
3199              PCHARSV(nametable, imm2_size, length, outfile);
3200              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3201    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3202              fprintf(outfile, "%3d\n", use_pcre16?
3203                 (int)(((PCRE_SPTR16)nametable)[0])
3204                :((int)nametable[0] << 8) | (int)nametable[1]);
3205              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3206    #else
3207              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3208    #ifdef SUPPORT_PCRE8
3209              nametable += nameentrysize;
3210    #else
3211              nametable += nameentrysize * 2;
3212    #endif
3213    #endif
3214              }
3215            }
3216    
3217          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3218          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3219    
3220          all_options = ((REAL_PCRE *)re)->options;
3221          if (do_flip) all_options = swap_uint32(all_options);
3222    
3223          if (get_options == 0) fprintf(outfile, "No options\n");
3224            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3225              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3226              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3227              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3228              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3229              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3230              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3231              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3232              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3233              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3234              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3235              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3236              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3237              ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3238              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3239              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3240              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3241              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3242    
3243          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3244    
3245          switch (get_options & PCRE_NEWLINE_BITS)
3246            {
3247            case PCRE_NEWLINE_CR:
3248            fprintf(outfile, "Forced newline sequence: CR\n");
3249            break;
3250    
3251            case PCRE_NEWLINE_LF:
3252            fprintf(outfile, "Forced newline sequence: LF\n");
3253            break;
3254    
3255            case PCRE_NEWLINE_CRLF:
3256            fprintf(outfile, "Forced newline sequence: CRLF\n");
3257            break;
3258    
3259            case PCRE_NEWLINE_ANYCRLF:
3260            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3261            break;
3262    
3263            case PCRE_NEWLINE_ANY:
3264            fprintf(outfile, "Forced newline sequence: ANY\n");
3265            break;
3266    
3267            default:
3268            break;
3269            }
3270    
3271        if (first_char == -1)        if (first_char == -1)
3272          {          {
3273          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
3274          }          }
3275        else if (first_char < 0)        else if (first_char < 0)
3276          {          {
# Line 656  while (!done) Line 3278  while (!done)
3278          }          }
3279        else        else
3280          {          {
3281          if (isprint(first_char))          const char *caseless =
3282            fprintf(outfile, "First char = \'%c\'\n", first_char);            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3283              "" : " (caseless)";
3284    
3285            if (PRINTOK(first_char))
3286              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3287          else          else
3288            fprintf(outfile, "First char = %d\n", first_char);            {
3289              fprintf(outfile, "First char = ");
3290              pchar(first_char, outfile);
3291              fprintf(outfile, "%s\n", caseless);
3292              }
3293          }          }
3294    
3295        if (need_char < 0)        if (need_char < 0)
# Line 668  while (!done) Line 3298  while (!done)
3298          }          }
3299        else        else
3300          {          {
3301          if (isprint(need_char))          const char *caseless =
3302            fprintf(outfile, "Need char = \'%c\'\n", need_char);            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3303              "" : " (caseless)";
3304    
3305            if (PRINTOK(need_char))
3306              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3307            else
3308              {
3309              fprintf(outfile, "Need char = ");
3310              pchar(need_char, outfile);
3311              fprintf(outfile, "%s\n", caseless);
3312              }
3313            }
3314    
3315          if (maxlookbehind > 0)
3316            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3317    
3318          /* Don't output study size; at present it is in any case a fixed
3319          value, but it varies, depending on the computer architecture, and
3320          so messes up the test suite. (And with the /F option, it might be
3321          flipped.) If study was forced by an external -s, don't show this
3322          information unless -i or -d was also present. This means that, except
3323          when auto-callouts are involved, the output from runs with and without
3324          -s should be identical. */
3325    
3326          if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3327            {
3328            if (extra == NULL)
3329              fprintf(outfile, "Study returned NULL\n");
3330          else          else
3331            fprintf(outfile, "Need char = %d\n", need_char);            {
3332              pcre_uint8 *start_bits = NULL;
3333              int minlength;
3334    
3335              if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3336                fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3337    
3338              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3339                {
3340                if (start_bits == NULL)
3341                  fprintf(outfile, "No set of starting bytes\n");
3342                else
3343                  {
3344                  int i;
3345                  int c = 24;
3346                  fprintf(outfile, "Starting byte set: ");
3347                  for (i = 0; i < 256; i++)
3348                    {
3349                    if ((start_bits[i/8] & (1<<(i&7))) != 0)
3350                      {
3351                      if (c > 75)
3352                        {
3353                        fprintf(outfile, "\n  ");
3354                        c = 2;
3355                        }
3356                      if (PRINTOK(i) && i != ' ')
3357                        {
3358                        fprintf(outfile, "%c ", i);
3359                        c += 2;
3360                        }
3361                      else
3362                        {
3363                        fprintf(outfile, "\\x%02x ", i);
3364                        c += 5;
3365                        }
3366                      }
3367                    }
3368                  fprintf(outfile, "\n");
3369                  }
3370                }
3371              }
3372    
3373            /* Show this only if the JIT was set by /S, not by -s. */
3374    
3375            if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3376                (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3377              {
3378              int jit;
3379              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3380                {
3381                if (jit)
3382                  fprintf(outfile, "JIT study was successful\n");
3383                else
3384    #ifdef SUPPORT_JIT
3385                  fprintf(outfile, "JIT study was not successful\n");
3386    #else
3387                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3388    #endif
3389                }
3390              }
3391          }          }
3392        }        }
3393    
3394      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
3395      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
3396        the study length, in big-endian order. */
3397    
3398      if (do_study)      if (to_file != NULL)
3399        {        {
3400        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
3401          if (f == NULL)
3402          {          {
3403          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           ((double)time_taken * 1000.0)/  
           ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
3404          }          }
3405          else
3406            {
3407            pcre_uint8 sbuf[8];
3408    
3409        extra = pcre_study(re, study_options, &error);          if (do_flip) regexflip(re, extra);
3410        if (error != NULL)          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3411          fprintf(outfile, "Failed to study: %s\n", error);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3412        else if (extra == NULL)          sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3413          fprintf(outfile, "Study returned NULL\n");          sbuf[3] = (pcre_uint8)((true_size) & 255);
3414            sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3415            sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3416            sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3417            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3418    
3419        else if (do_showinfo)          if (fwrite(sbuf, 1, 8, f) < 8 ||
3420          {              fwrite(re, 1, true_size, f) < true_size)
3421          uschar *start_bits = NULL;            {
3422          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3423          if (start_bits == NULL)            }
           fprintf(outfile, "No starting character set\n");  
3424          else          else
3425            {            {
3426            int i;            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3427            int c = 24;  
3428            fprintf(outfile, "Starting character set: ");            /* If there is study data, write it. */
3429            for (i = 0; i < 256; i++)  
3430              if (extra != NULL)
3431              {              {
3432              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
3433                    true_study_size)
3434                {                {
3435                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
3436                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
3437                }                }
3438                else fprintf(outfile, "Study data written to %s\n", to_file);
3439              }              }
           fprintf(outfile, "\n");  
3440            }            }
3441            fclose(f);
3442            }
3443    
3444          new_free(re);
3445          if (extra != NULL)
3446            {
3447            PCRE_FREE_STUDY(extra);
3448            }
3449          if (locale_set)
3450            {