/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 43 by nigel, Sat Feb 24 21:39:21 2007 UTC revision 1046 by ph10, Tue Sep 25 16:27:58 2012 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10    -----------------------------------------------------------------------------
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are met:
13    
14        * Redistributions of source code must retain the above copyright notice,
15          this list of conditions and the following disclaimer.
16    
17        * Redistributions in binary form must reproduce the above copyright
18          notice, this list of conditions and the following disclaimer in the
19          documentation and/or other materials provided with the distribution.
20    
21        * Neither the name of the University of Cambridge nor the names of its
22          contributors may be used to endorse or promote products derived from
23          this software without specific prior written permission.
24    
25    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35    POSSIBILITY OF SUCH DAMAGE.
36    -----------------------------------------------------------------------------
37    */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    #ifdef HAVE_CONFIG_H
50    #include "config.h"
51    #endif
52    
53  #include <ctype.h>  #include <ctype.h>
54  #include <stdio.h>  #include <stdio.h>
55  #include <string.h>  #include <string.h>
56  #include <stdlib.h>  #include <stdlib.h>
57  #include <time.h>  #include <time.h>
58  #include <locale.h>  #include <locale.h>
59    #include <errno.h>
60    
61  /* Use the internal info for displaying the results of pcre_study(). */  /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67    #ifdef HAVE_UNISTD_H
68    #include <unistd.h>
69    #endif
70    #if defined(SUPPORT_LIBREADLINE)
71    #include <readline/readline.h>
72    #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80    #endif
81    
82    /* A number of things vary for Windows builds. Originally, pcretest opened its
83    input and output without "b"; then I was told that "b" was needed in some
84    environments, so it was added for release 5.0 to both the input and output. (It
85    makes no difference on Unix-like systems.) Later I was told that it is wrong
86    for the input on Windows. I've now abstracted the modes into two macros that
87    are set here, to make it easier to fiddle with them, and removed "b" from the
88    input mode under Windows. */
89    
90    #if defined(_WIN32) || defined(WIN32)
91    #include <io.h>                /* For _setmode() */
92    #include <fcntl.h>             /* For _O_BINARY */
93    #define INPUT_MODE   "r"
94    #define OUTPUT_MODE  "wb"
95    
96    #ifndef isatty
97    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
98    #endif                         /* though in some environments they seem to   */
99                                   /* be already defined, hence the #ifndefs.    */
100    #ifndef fileno
101    #define fileno _fileno
102    #endif
103    
104    /* A user sent this fix for Borland Builder 5 under Windows. */
105    
106  #include "internal.h"  #ifdef __BORLANDC__
107    #define _setmode(handle, mode) setmode(handle, mode)
108    #endif
109    
110    /* Not Windows */
111    
112    #else
113    #include <sys/time.h>          /* These two includes are needed */
114    #include <sys/resource.h>      /* for setrlimit(). */
115    #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
116    #define INPUT_MODE   "r"
117    #define OUTPUT_MODE  "w"
118    #else
119    #define INPUT_MODE   "rb"
120    #define OUTPUT_MODE  "wb"
121    #endif
122    #endif
123    
124    #define PRIV(name) name
125    
126    /* We have to include pcre_internal.h because we need the internal info for
127    displaying the results of pcre_study() and we also need to know about the
128    internal macros, structures, and other internal data values; pcretest has
129    "inside information" compared to a program that strictly follows the PCRE API.
130    
131    Although pcre_internal.h does itself include pcre.h, we explicitly include it
132    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133    appropriately for an application, not for building PCRE. */
134    
135    #include "pcre.h"
136    
137    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
138    /* Configure internal macros to 16 bit mode. */
139    #define COMPILE_PCRE16
140    #endif
141    
142    #include "pcre_internal.h"
143    
144    /* The pcre_printint() function, which prints the internal form of a compiled
145    regex, is held in a separate file so that (a) it can be compiled in either
146    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
147    when that is compiled in debug mode. */
148    
149    #ifdef SUPPORT_PCRE8
150    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151    #endif
152    #ifdef SUPPORT_PCRE16
153    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154    #endif
155    
156    /* We need access to some of the data tables that PCRE uses. So as not to have
157    to keep two copies, we include the source files here, changing the names of the
158    external symbols to prevent clashes. */
159    
160    #define PCRE_INCLUDED
161    
162    #include "pcre_tables.c"
163    #include "pcre_ucd.c"
164    
165    /* The definition of the macro PRINTABLE, which determines whether to print an
166    output character as-is or as a hex value when showing compiled patterns, is
167    the same as in the printint.src file. We uses it here in cases when the locale
168    has not been explicitly changed, so as to get consistent output from systems
169    that differ in their output from isprint() even in the "C" locale. */
170    
171    #ifdef EBCDIC
172    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
173    #else
174    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
175    #endif
176    
177    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
178    
179    /* Posix support is disabled in 16 bit only mode. */
180    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
181    #define NOPOSIX
182    #endif
183    
184  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
185  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 189  Makefile. */
189  #include "pcreposix.h"  #include "pcreposix.h"
190  #endif  #endif
191    
192    /* It is also possible, originally for the benefit of a version that was
193    imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
194    NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
195    automatically cut out the UTF support if PCRE is built without it. */
196    
197    #ifndef SUPPORT_UTF
198    #ifndef NOUTF
199    #define NOUTF
200    #endif
201    #endif
202    
203    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
204    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
205    only from one place and is handled differently). I couldn't dream up any way of
206    using a single macro to do this in a generic way, because of the many different
207    argument requirements. We know that at least one of SUPPORT_PCRE8 and
208    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
209    use these in the definitions of generic macros.
210    
211    **** Special note about the PCHARSxxx macros: the address of the string to be
212    printed is always given as two arguments: a base address followed by an offset.
213    The base address is cast to the correct data size for 8 or 16 bit data; the
214    offset is in units of this size. If the string were given as base+offset in one
215    argument, the casting might be incorrectly applied. */
216    
217    #ifdef SUPPORT_PCRE8
218    
219    #define PCHARS8(lv, p, offset, len, f) \
220      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
221    
222    #define PCHARSV8(p, offset, len, f) \
223      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
224    
225    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
226      p = read_capture_name8(p, cn8, re)
227    
228    #define STRLEN8(p) ((int)strlen((char *)p))
229    
230    #define SET_PCRE_CALLOUT8(callout) \
231      pcre_callout = callout
232    
233    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
234       pcre_assign_jit_stack(extra, callback, userdata)
235    
236    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
237      re = pcre_compile((char *)pat, options, error, erroffset, tables)
238    
239    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
240        namesptr, cbuffer, size) \
241      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
242        (char *)namesptr, cbuffer, size)
243    
244    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
245      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
246    
247    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
248        offsets, size_offsets, workspace, size_workspace) \
249      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
250        offsets, size_offsets, workspace, size_workspace)
251    
252    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
253        offsets, size_offsets) \
254      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
255        offsets, size_offsets)
256    
257    #define PCRE_FREE_STUDY8(extra) \
258      pcre_free_study(extra)
259    
260    #define PCRE_FREE_SUBSTRING8(substring) \
261      pcre_free_substring(substring)
262    
263    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
264      pcre_free_substring_list(listptr)
265    
266    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
267        getnamesptr, subsptr) \
268      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
269        (char *)getnamesptr, subsptr)
270    
271    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
272      n = pcre_get_stringnumber(re, (char *)ptr)
273    
274    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
275      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
276    
277    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
278      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
279    
280    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
281      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
282    
283    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
284      pcre_printint(re, outfile, debug_lengths)
285    
286    #define PCRE_STUDY8(extra, re, options, error) \
287      extra = pcre_study(re, options, error)
288    
289    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
290      pcre_jit_stack_alloc(startsize, maxsize)
291    
292    #define PCRE_JIT_STACK_FREE8(stack) \
293      pcre_jit_stack_free(stack)
294    
295    #endif /* SUPPORT_PCRE8 */
296    
297    /* -----------------------------------------------------------*/
298    
299    #ifdef SUPPORT_PCRE16
300    
301    #define PCHARS16(lv, p, offset, len, f) \
302      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
303    
304    #define PCHARSV16(p, offset, len, f) \
305      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
306    
307    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
308      p = read_capture_name16(p, cn16, re)
309    
310    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
311    
312    #define SET_PCRE_CALLOUT16(callout) \
313      pcre16_callout = (int (*)(pcre16_callout_block *))callout
314    
315    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
316      pcre16_assign_jit_stack((pcre16_extra *)extra, \
317        (pcre16_jit_callback)callback, userdata)
318    
319    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
320      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
321        tables)
322    
323    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
324        namesptr, cbuffer, size) \
325      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
326        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
327    
328    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
329      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
330        (PCRE_UCHAR16 *)cbuffer, size/2)
331    
332    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
333        offsets, size_offsets, workspace, size_workspace) \
334      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
335        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
336        workspace, size_workspace)
337    
338    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
339        offsets, size_offsets) \
340      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
341        len, start_offset, options, offsets, size_offsets)
342    
343    #define PCRE_FREE_STUDY16(extra) \
344      pcre16_free_study((pcre16_extra *)extra)
345    
346    #define PCRE_FREE_SUBSTRING16(substring) \
347      pcre16_free_substring((PCRE_SPTR16)substring)
348    
349    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
350      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
351    
352    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
353        getnamesptr, subsptr) \
354      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
355        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
356    
357    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
358      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
359    
360    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
361      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
362        (PCRE_SPTR16 *)(void*)subsptr)
363    
364    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
365      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
366        (PCRE_SPTR16 **)(void*)listptr)
367    
368    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
369      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
370        tables)
371    
372    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
373      pcre16_printint(re, outfile, debug_lengths)
374    
375    #define PCRE_STUDY16(extra, re, options, error) \
376      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
377    
378    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
379      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
380    
381    #define PCRE_JIT_STACK_FREE16(stack) \
382      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
383    
384    #endif /* SUPPORT_PCRE16 */
385    
386    
387    /* ----- Both modes are supported; a runtime test is needed, except for
388    pcre_config(), and the JIT stack functions, when it doesn't matter which
389    version is called. ----- */
390    
391    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
392    
393    #define CHAR_SIZE (use_pcre16? 2:1)
394    
395    #define PCHARS(lv, p, offset, len, f) \
396      if (use_pcre16) \
397        PCHARS16(lv, p, offset, len, f); \
398      else \
399        PCHARS8(lv, p, offset, len, f)
400    
401    #define PCHARSV(p, offset, len, f) \
402      if (use_pcre16) \
403        PCHARSV16(p, offset, len, f); \
404      else \
405        PCHARSV8(p, offset, len, f)
406    
407    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
408      if (use_pcre16) \
409        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
410      else \
411        READ_CAPTURE_NAME8(p, cn8, cn16, re)
412    
413    #define SET_PCRE_CALLOUT(callout) \
414      if (use_pcre16) \
415        SET_PCRE_CALLOUT16(callout); \
416      else \
417        SET_PCRE_CALLOUT8(callout)
418    
419    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
420    
421    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
422      if (use_pcre16) \
423        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
424      else \
425        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
426    
427    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
428      if (use_pcre16) \
429        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
430      else \
431        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
432    
433    #define PCRE_CONFIG pcre_config
434    
435    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
436        namesptr, cbuffer, size) \
437      if (use_pcre16) \
438        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
439          namesptr, cbuffer, size); \
440      else \
441        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
442          namesptr, cbuffer, size)
443    
444    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
445      if (use_pcre16) \
446        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
447      else \
448        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
449    
450    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
451        offsets, size_offsets, workspace, size_workspace) \
452      if (use_pcre16) \
453        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
454          offsets, size_offsets, workspace, size_workspace); \
455      else \
456        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
457          offsets, size_offsets, workspace, size_workspace)
458    
459    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
460        offsets, size_offsets) \
461      if (use_pcre16) \
462        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
463          offsets, size_offsets); \
464      else \
465        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
466          offsets, size_offsets)
467    
468    #define PCRE_FREE_STUDY(extra) \
469      if (use_pcre16) \
470        PCRE_FREE_STUDY16(extra); \
471      else \
472        PCRE_FREE_STUDY8(extra)
473    
474    #define PCRE_FREE_SUBSTRING(substring) \
475      if (use_pcre16) \
476        PCRE_FREE_SUBSTRING16(substring); \
477      else \
478        PCRE_FREE_SUBSTRING8(substring)
479    
480    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
481      if (use_pcre16) \
482        PCRE_FREE_SUBSTRING_LIST16(listptr); \
483      else \
484        PCRE_FREE_SUBSTRING_LIST8(listptr)
485    
486    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
487        getnamesptr, subsptr) \
488      if (use_pcre16) \
489        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
490          getnamesptr, subsptr); \
491      else \
492        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
493          getnamesptr, subsptr)
494    
495    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
496      if (use_pcre16) \
497        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
498      else \
499        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
500    
501    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
502      if (use_pcre16) \
503        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
504      else \
505        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
506    
507    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
508      if (use_pcre16) \
509        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
510      else \
511        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
512    
513    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
514      (use_pcre16 ? \
515         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
516        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
517    
518    #define PCRE_JIT_STACK_FREE(stack) \
519      if (use_pcre16) \
520        PCRE_JIT_STACK_FREE16(stack); \
521      else \
522        PCRE_JIT_STACK_FREE8(stack)
523    
524    #define PCRE_MAKETABLES \
525      (use_pcre16? pcre16_maketables() : pcre_maketables())
526    
527    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
528      if (use_pcre16) \
529        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
530      else \
531        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
532    
533    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
534      if (use_pcre16) \
535        PCRE_PRINTINT16(re, outfile, debug_lengths); \
536      else \
537        PCRE_PRINTINT8(re, outfile, debug_lengths)
538    
539    #define PCRE_STUDY(extra, re, options, error) \
540      if (use_pcre16) \
541        PCRE_STUDY16(extra, re, options, error); \
542      else \
543        PCRE_STUDY8(extra, re, options, error)
544    
545    /* ----- Only 8-bit mode is supported ----- */
546    
547    #elif defined SUPPORT_PCRE8
548    #define CHAR_SIZE                 1
549    #define PCHARS                    PCHARS8
550    #define PCHARSV                   PCHARSV8
551    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
552    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
553    #define STRLEN                    STRLEN8
554    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
555    #define PCRE_COMPILE              PCRE_COMPILE8
556    #define PCRE_CONFIG               pcre_config
557    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
558    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
559    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
560    #define PCRE_EXEC                 PCRE_EXEC8
561    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
562    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
563    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
564    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
565    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
566    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
567    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
568    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
569    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
570    #define PCRE_MAKETABLES           pcre_maketables()
571    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
572    #define PCRE_PRINTINT             PCRE_PRINTINT8
573    #define PCRE_STUDY                PCRE_STUDY8
574    
575    /* ----- Only 16-bit mode is supported ----- */
576    
577    #else
578    #define CHAR_SIZE                 2
579    #define PCHARS                    PCHARS16
580    #define PCHARSV                   PCHARSV16
581    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
582    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
583    #define STRLEN                    STRLEN16
584    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
585    #define PCRE_COMPILE              PCRE_COMPILE16
586    #define PCRE_CONFIG               pcre16_config
587    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
588    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
589    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
590    #define PCRE_EXEC                 PCRE_EXEC16
591    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
592    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
593    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
594    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
595    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
596    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
597    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
598    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
599    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
600    #define PCRE_MAKETABLES           pcre16_maketables()
601    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
602    #define PCRE_PRINTINT             PCRE_PRINTINT16
603    #define PCRE_STUDY                PCRE_STUDY16
604    #endif
605    
606    /* ----- End of mode-specific function call macros ----- */
607    
608    
609    /* Other parameters */
610    
611  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
612  #ifdef CLK_TCK  #ifdef CLK_TCK
613  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 616  Makefile. */
616  #endif  #endif
617  #endif  #endif
618    
619  #define LOOPREPEAT 20000  #if !defined NODFA
620    #define DFA_WS_DIMENSION 1000
621    #endif
622    
623    /* This is the default loop count for timing. */
624    
625    #define LOOPREPEAT 500000
626    
627    /* Static variables */
628    
629  static FILE *outfile;  static FILE *outfile;
630  static int log_store = 0;  static int log_store = 0;
631    static int callout_count;
632    static int callout_extra;
633    static int callout_fail_count;
634    static int callout_fail_id;
635    static int debug_lengths;
636    static int first_callout;
637    static int jit_was_used;
638    static int locale_set = 0;
639    static int show_malloc;
640    static int use_utf;
641  static size_t gotten_store;  static size_t gotten_store;
642    static size_t first_gotten_store = 0;
643    static const unsigned char *last_callout_mark = NULL;
644    
645    /* The buffers grow automatically if very long input lines are encountered. */
646    
647    static int buffer_size = 50000;
648    static pcre_uint8 *buffer = NULL;
649    static pcre_uint8 *dbuffer = NULL;
650    static pcre_uint8 *pbuffer = NULL;
651    
652    /* Another buffer is needed translation to 16-bit character strings. It will
653    obtained and extended as required. */
654    
655  /* Debugging function to print the internal form of the regex. This is the same  #ifdef SUPPORT_PCRE16
656  code as contained in pcre.c under the DEBUG macro. */  static int buffer16_size = 0;
657    static pcre_uint16 *buffer16 = NULL;
658    
659  static const char *OP_names[] = {  #ifdef SUPPORT_PCRE8
660    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  
661    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  /* We need the table of operator lengths that is used for 16-bit compiling, in
662    "Opt", "^", "$", "Any", "chars", "not",  order to swap bytes in a pattern for saving/reloading testing. Luckily, the
663    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
664    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  appropriately for the 16-bit world. Just as a safety check, make sure that
665    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  COMPILE_PCRE16 is *not* set. */
666    "*", "*?", "+", "+?", "?", "??", "{", "{",  
667    "class", "Ref", "Recurse",  #ifdef COMPILE_PCRE16
668    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  #error COMPILE_PCRE16 must not be set when compiling pcretest.c
669    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  #endif
670    "Brazero", "Braminzero", "Bra"  
671    #if LINK_SIZE == 2
672    #undef LINK_SIZE
673    #define LINK_SIZE 1
674    #elif LINK_SIZE == 3 || LINK_SIZE == 4
675    #undef LINK_SIZE
676    #define LINK_SIZE 2
677    #else
678    #error LINK_SIZE must be either 2, 3, or 4
679    #endif
680    
681    #undef IMM2_SIZE
682    #define IMM2_SIZE 1
683    
684    #endif /* SUPPORT_PCRE8 */
685    
686    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
687    #endif  /* SUPPORT_PCRE16 */
688    
689    /* If we have 8-bit support, default use_pcre16 to false; if there is also
690    16-bit support, it can be changed by an option. If there is no 8-bit support,
691    there must be 16-bit support, so default it to 1. */
692    
693    #ifdef SUPPORT_PCRE8
694    static int use_pcre16 = 0;
695    #else
696    static int use_pcre16 = 1;
697    #endif
698    
699    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
700    
701    static int jit_study_bits[] =
702      {
703      PCRE_STUDY_JIT_COMPILE,
704      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
705      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
706      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
707      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
708      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
709      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
710        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
711    };
712    
713    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
714      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
715    
716    /* Textual explanations for runtime error codes */
717    
718    static const char *errtexts[] = {
719      NULL,  /* 0 is no error */
720      NULL,  /* NOMATCH is handled specially */
721      "NULL argument passed",
722      "bad option value",
723      "magic number missing",
724      "unknown opcode - pattern overwritten?",
725      "no more memory",
726      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
727      "match limit exceeded",
728      "callout error code",
729      NULL,  /* BADUTF8/16 is handled specially */
730      NULL,  /* BADUTF8/16 offset is handled specially */
731      NULL,  /* PARTIAL is handled specially */
732      "not used - internal error",
733      "internal error - pattern overwritten?",
734      "bad count value",
735      "item unsupported for DFA matching",
736      "backreference condition or recursion test not supported for DFA matching",
737      "match limit not supported for DFA matching",
738      "workspace size exceeded in DFA matching",
739      "too much recursion for DFA matching",
740      "recursion limit exceeded",
741      "not used - internal error",
742      "invalid combination of newline options",
743      "bad offset value",
744      NULL,  /* SHORTUTF8/16 is handled specially */
745      "nested recursion at the same subject position",
746      "JIT stack limit reached",
747      "pattern compiled in wrong mode: 8-bit/16-bit error",
748      "pattern compiled with other endianness",
749      "invalid data in workspace for DFA restart"
750    };
751    
752    
753    /*************************************************
754    *         Alternate character tables             *
755    *************************************************/
756    
757    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
758    using the default tables of the library. However, the T option can be used to
759    select alternate sets of tables, for different kinds of testing. Note also that
760    the L (locale) option also adjusts the tables. */
761    
762    /* This is the set of tables distributed as default with PCRE. It recognizes
763    only ASCII characters. */
764    
765    static const pcre_uint8 tables0[] = {
766    
767    /* This table is a lower casing table. */
768    
769        0,  1,  2,  3,  4,  5,  6,  7,
770        8,  9, 10, 11, 12, 13, 14, 15,
771       16, 17, 18, 19, 20, 21, 22, 23,
772       24, 25, 26, 27, 28, 29, 30, 31,
773       32, 33, 34, 35, 36, 37, 38, 39,
774       40, 41, 42, 43, 44, 45, 46, 47,
775       48, 49, 50, 51, 52, 53, 54, 55,
776       56, 57, 58, 59, 60, 61, 62, 63,
777       64, 97, 98, 99,100,101,102,103,
778      104,105,106,107,108,109,110,111,
779      112,113,114,115,116,117,118,119,
780      120,121,122, 91, 92, 93, 94, 95,
781       96, 97, 98, 99,100,101,102,103,
782      104,105,106,107,108,109,110,111,
783      112,113,114,115,116,117,118,119,
784      120,121,122,123,124,125,126,127,
785      128,129,130,131,132,133,134,135,
786      136,137,138,139,140,141,142,143,
787      144,145,146,147,148,149,150,151,
788      152,153,154,155,156,157,158,159,
789      160,161,162,163,164,165,166,167,
790      168,169,170,171,172,173,174,175,
791      176,177,178,179,180,181,182,183,
792      184,185,186,187,188,189,190,191,
793      192,193,194,195,196,197,198,199,
794      200,201,202,203,204,205,206,207,
795      208,209,210,211,212,213,214,215,
796      216,217,218,219,220,221,222,223,
797      224,225,226,227,228,229,230,231,
798      232,233,234,235,236,237,238,239,
799      240,241,242,243,244,245,246,247,
800      248,249,250,251,252,253,254,255,
801    
802    /* This table is a case flipping table. */
803    
804        0,  1,  2,  3,  4,  5,  6,  7,
805        8,  9, 10, 11, 12, 13, 14, 15,
806       16, 17, 18, 19, 20, 21, 22, 23,
807       24, 25, 26, 27, 28, 29, 30, 31,
808       32, 33, 34, 35, 36, 37, 38, 39,
809       40, 41, 42, 43, 44, 45, 46, 47,
810       48, 49, 50, 51, 52, 53, 54, 55,
811       56, 57, 58, 59, 60, 61, 62, 63,
812       64, 97, 98, 99,100,101,102,103,
813      104,105,106,107,108,109,110,111,
814      112,113,114,115,116,117,118,119,
815      120,121,122, 91, 92, 93, 94, 95,
816       96, 65, 66, 67, 68, 69, 70, 71,
817       72, 73, 74, 75, 76, 77, 78, 79,
818       80, 81, 82, 83, 84, 85, 86, 87,
819       88, 89, 90,123,124,125,126,127,
820      128,129,130,131,132,133,134,135,
821      136,137,138,139,140,141,142,143,
822      144,145,146,147,148,149,150,151,
823      152,153,154,155,156,157,158,159,
824      160,161,162,163,164,165,166,167,
825      168,169,170,171,172,173,174,175,
826      176,177,178,179,180,181,182,183,
827      184,185,186,187,188,189,190,191,
828      192,193,194,195,196,197,198,199,
829      200,201,202,203,204,205,206,207,
830      208,209,210,211,212,213,214,215,
831      216,217,218,219,220,221,222,223,
832      224,225,226,227,228,229,230,231,
833      232,233,234,235,236,237,238,239,
834      240,241,242,243,244,245,246,247,
835      248,249,250,251,252,253,254,255,
836    
837    /* This table contains bit maps for various character classes. Each map is 32
838    bytes long and the bits run from the least significant end of each byte. The
839    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
840    graph, print, punct, and cntrl. Other classes are built from combinations. */
841    
842      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
843      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846    
847      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
848      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
849      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851    
852      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
853      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
855      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
856    
857      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
858      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
859      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
860      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
861    
862      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
863      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
864      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
865      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
866    
867      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
868      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
869      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
870      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
871    
872      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
873      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
874      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
875      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
876    
877      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
878      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
879      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
880      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
881    
882      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
883      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
884      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
885      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
886    
887      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
888      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
889      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
890      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
891    
892    /* This table identifies various classes of character by individual bits:
893      0x01   white space character
894      0x02   letter
895      0x04   decimal digit
896      0x08   hexadecimal digit
897      0x10   alphanumeric or '_'
898      0x80   regular expression metacharacter or binary zero
899    */
900    
901      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
902      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
903      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
904      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
905      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
906      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
907      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
908      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
909      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
910      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
911      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
912      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
913      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
914      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
915      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
916      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
917      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
918      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
919      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
920      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
921      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
922      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
923      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
924      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
925      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
926      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
927      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
928      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
929      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
930      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
931      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
932      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
933    
934    /* This is a set of tables that came orginally from a Windows user. It seems to
935    be at least an approximation of ISO 8859. In particular, there are characters
936    greater than 128 that are marked as spaces, letters, etc. */
937    
938    static const pcre_uint8 tables1[] = {
939    0,1,2,3,4,5,6,7,
940    8,9,10,11,12,13,14,15,
941    16,17,18,19,20,21,22,23,
942    24,25,26,27,28,29,30,31,
943    32,33,34,35,36,37,38,39,
944    40,41,42,43,44,45,46,47,
945    48,49,50,51,52,53,54,55,
946    56,57,58,59,60,61,62,63,
947    64,97,98,99,100,101,102,103,
948    104,105,106,107,108,109,110,111,
949    112,113,114,115,116,117,118,119,
950    120,121,122,91,92,93,94,95,
951    96,97,98,99,100,101,102,103,
952    104,105,106,107,108,109,110,111,
953    112,113,114,115,116,117,118,119,
954    120,121,122,123,124,125,126,127,
955    128,129,130,131,132,133,134,135,
956    136,137,138,139,140,141,142,143,
957    144,145,146,147,148,149,150,151,
958    152,153,154,155,156,157,158,159,
959    160,161,162,163,164,165,166,167,
960    168,169,170,171,172,173,174,175,
961    176,177,178,179,180,181,182,183,
962    184,185,186,187,188,189,190,191,
963    224,225,226,227,228,229,230,231,
964    232,233,234,235,236,237,238,239,
965    240,241,242,243,244,245,246,215,
966    248,249,250,251,252,253,254,223,
967    224,225,226,227,228,229,230,231,
968    232,233,234,235,236,237,238,239,
969    240,241,242,243,244,245,246,247,
970    248,249,250,251,252,253,254,255,
971    0,1,2,3,4,5,6,7,
972    8,9,10,11,12,13,14,15,
973    16,17,18,19,20,21,22,23,
974    24,25,26,27,28,29,30,31,
975    32,33,34,35,36,37,38,39,
976    40,41,42,43,44,45,46,47,
977    48,49,50,51,52,53,54,55,
978    56,57,58,59,60,61,62,63,
979    64,97,98,99,100,101,102,103,
980    104,105,106,107,108,109,110,111,
981    112,113,114,115,116,117,118,119,
982    120,121,122,91,92,93,94,95,
983    96,65,66,67,68,69,70,71,
984    72,73,74,75,76,77,78,79,
985    80,81,82,83,84,85,86,87,
986    88,89,90,123,124,125,126,127,
987    128,129,130,131,132,133,134,135,
988    136,137,138,139,140,141,142,143,
989    144,145,146,147,148,149,150,151,
990    152,153,154,155,156,157,158,159,
991    160,161,162,163,164,165,166,167,
992    168,169,170,171,172,173,174,175,
993    176,177,178,179,180,181,182,183,
994    184,185,186,187,188,189,190,191,
995    224,225,226,227,228,229,230,231,
996    232,233,234,235,236,237,238,239,
997    240,241,242,243,244,245,246,215,
998    248,249,250,251,252,253,254,223,
999    192,193,194,195,196,197,198,199,
1000    200,201,202,203,204,205,206,207,
1001    208,209,210,211,212,213,214,247,
1002    216,217,218,219,220,221,222,255,
1003    0,62,0,0,1,0,0,0,
1004    0,0,0,0,0,0,0,0,
1005    32,0,0,0,1,0,0,0,
1006    0,0,0,0,0,0,0,0,
1007    0,0,0,0,0,0,255,3,
1008    126,0,0,0,126,0,0,0,
1009    0,0,0,0,0,0,0,0,
1010    0,0,0,0,0,0,0,0,
1011    0,0,0,0,0,0,255,3,
1012    0,0,0,0,0,0,0,0,
1013    0,0,0,0,0,0,12,2,
1014    0,0,0,0,0,0,0,0,
1015    0,0,0,0,0,0,0,0,
1016    254,255,255,7,0,0,0,0,
1017    0,0,0,0,0,0,0,0,
1018    255,255,127,127,0,0,0,0,
1019    0,0,0,0,0,0,0,0,
1020    0,0,0,0,254,255,255,7,
1021    0,0,0,0,0,4,32,4,
1022    0,0,0,128,255,255,127,255,
1023    0,0,0,0,0,0,255,3,
1024    254,255,255,135,254,255,255,7,
1025    0,0,0,0,0,4,44,6,
1026    255,255,127,255,255,255,127,255,
1027    0,0,0,0,254,255,255,255,
1028    255,255,255,255,255,255,255,127,
1029    0,0,0,0,254,255,255,255,
1030    255,255,255,255,255,255,255,255,
1031    0,2,0,0,255,255,255,255,
1032    255,255,255,255,255,255,255,127,
1033    0,0,0,0,255,255,255,255,
1034    255,255,255,255,255,255,255,255,
1035    0,0,0,0,254,255,0,252,
1036    1,0,0,248,1,0,0,120,
1037    0,0,0,0,254,255,255,255,
1038    0,0,128,0,0,0,128,0,
1039    255,255,255,255,0,0,0,0,
1040    0,0,0,0,0,0,0,128,
1041    255,255,255,255,0,0,0,0,
1042    0,0,0,0,0,0,0,0,
1043    128,0,0,0,0,0,0,0,
1044    0,1,1,0,1,1,0,0,
1045    0,0,0,0,0,0,0,0,
1046    0,0,0,0,0,0,0,0,
1047    1,0,0,0,128,0,0,0,
1048    128,128,128,128,0,0,128,0,
1049    28,28,28,28,28,28,28,28,
1050    28,28,0,0,0,0,0,128,
1051    0,26,26,26,26,26,26,18,
1052    18,18,18,18,18,18,18,18,
1053    18,18,18,18,18,18,18,18,
1054    18,18,18,128,128,0,128,16,
1055    0,26,26,26,26,26,26,18,
1056    18,18,18,18,18,18,18,18,
1057    18,18,18,18,18,18,18,18,
1058    18,18,18,128,128,0,0,0,
1059    0,0,0,0,0,1,0,0,
1060    0,0,0,0,0,0,0,0,
1061    0,0,0,0,0,0,0,0,
1062    0,0,0,0,0,0,0,0,
1063    1,0,0,0,0,0,0,0,
1064    0,0,18,0,0,0,0,0,
1065    0,0,20,20,0,18,0,0,
1066    0,20,18,0,0,0,0,0,
1067    18,18,18,18,18,18,18,18,
1068    18,18,18,18,18,18,18,18,
1069    18,18,18,18,18,18,18,0,
1070    18,18,18,18,18,18,18,18,
1071    18,18,18,18,18,18,18,18,
1072    18,18,18,18,18,18,18,18,
1073    18,18,18,18,18,18,18,0,
1074    18,18,18,18,18,18,18,18
1075  };  };
1076    
1077    
1078  static void print_internals(pcre *re)  
1079    
1080    #ifndef HAVE_STRERROR
1081    /*************************************************
1082    *     Provide strerror() for non-ANSI libraries  *
1083    *************************************************/
1084    
1085    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1086    in their libraries, but can provide the same facility by this simple
1087    alternative function. */
1088    
1089    extern int   sys_nerr;
1090    extern char *sys_errlist[];
1091    
1092    char *
1093    strerror(int n)
1094  {  {
1095  unsigned char *code = ((real_pcre *)re)->code;  if (n < 0 || n >= sys_nerr) return "unknown error number";
1096    return sys_errlist[n];
1097    }
1098    #endif /* HAVE_STRERROR */
1099    
1100    
1101    
1102    /*************************************************
1103    *       Print newline configuration              *
1104    *************************************************/
1105    
1106  fprintf(outfile, "------------------------------------------------------------------\n");  /*
1107    Arguments:
1108      rc         the return code from PCRE_CONFIG_NEWLINE
1109      isc        TRUE if called from "-C newline"
1110    Returns:     nothing
1111    */
1112    
1113  for(;;)  static void
1114    print_newline_config(int rc, BOOL isc)
1115    {
1116    const char *s = NULL;
1117    if (!isc) printf("  Newline sequence is ");
1118    switch(rc)
1119    {    {
1120    int c;    case CHAR_CR: s = "CR"; break;
1121    int charlength;    case CHAR_LF: s = "LF"; break;
1122      case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1123      case -1: s = "ANY"; break;
1124      case -2: s = "ANYCRLF"; break;
1125    
1126      default:
1127      printf("a non-standard value: 0x%04x\n", rc);
1128      return;
1129      }
1130    
1131    printf("%s\n", s);
1132    }
1133    
1134    
1135    
1136    /*************************************************
1137    *         JIT memory callback                    *
1138    *************************************************/
1139    
1140    static pcre_jit_stack* jit_callback(void *arg)
1141    {
1142    jit_was_used = TRUE;
1143    return (pcre_jit_stack *)arg;
1144    }
1145    
1146    
1147    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  #if !defined NOUTF || defined SUPPORT_PCRE16
1148    /*************************************************
1149    *            Convert UTF-8 string to value       *
1150    *************************************************/
1151    
1152    if (*code >= OP_BRA)  /* This function takes one or more bytes that represents a UTF-8 character,
1153    and returns the value of the character.
1154    
1155    Argument:
1156      utf8bytes   a pointer to the byte vector
1157      vptr        a pointer to an int to receive the value
1158    
1159    Returns:      >  0 => the number of bytes consumed
1160                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1161    */
1162    
1163    static int
1164    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1165    {
1166    int c = *utf8bytes++;
1167    int d = c;
1168    int i, j, s;
1169    
1170    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1171      {
1172      if ((d & 0x80) == 0) break;
1173      d <<= 1;
1174      }
1175    
1176    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1177    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1178    
1179    /* i now has a value in the range 1-5 */
1180    
1181    s = 6*i;
1182    d = (c & utf8_table3[i]) << s;
1183    
1184    for (j = 0; j < i; j++)
1185      {
1186      c = *utf8bytes++;
1187      if ((c & 0xc0) != 0x80) return -(j+1);
1188      s -= 6;
1189      d |= (c & 0x3f) << s;
1190      }
1191    
1192    /* Check that encoding was the correct unique one */
1193    
1194    for (j = 0; j < utf8_table1_size; j++)
1195      if (d <= utf8_table1[j]) break;
1196    if (j != i) return -(i+1);
1197    
1198    /* Valid value */
1199    
1200    *vptr = d;
1201    return i+1;
1202    }
1203    #endif /* NOUTF || SUPPORT_PCRE16 */
1204    
1205    
1206    
1207    #if !defined NOUTF || defined SUPPORT_PCRE16
1208    /*************************************************
1209    *       Convert character value to UTF-8         *
1210    *************************************************/
1211    
1212    /* This function takes an integer value in the range 0 - 0x7fffffff
1213    and encodes it as a UTF-8 character in 0 to 6 bytes.
1214    
1215    Arguments:
1216      cvalue     the character value
1217      utf8bytes  pointer to buffer for result - at least 6 bytes long
1218    
1219    Returns:     number of characters placed in the buffer
1220    */
1221    
1222    static int
1223    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1224    {
1225    register int i, j;
1226    for (i = 0; i < utf8_table1_size; i++)
1227      if (cvalue <= utf8_table1[i]) break;
1228    utf8bytes += i;
1229    for (j = i; j > 0; j--)
1230     {
1231     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1232     cvalue >>= 6;
1233     }
1234    *utf8bytes = utf8_table2[i] | cvalue;
1235    return i + 1;
1236    }
1237    #endif
1238    
1239    
1240    #ifdef SUPPORT_PCRE16
1241    /*************************************************
1242    *         Convert a string to 16-bit             *
1243    *************************************************/
1244    
1245    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1246    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1247    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1248    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1249    result is always left in buffer16.
1250    
1251    Note that this function does not object to surrogate values. This is
1252    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1253    for the purpose of testing that they are correctly faulted.
1254    
1255    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1256    in UTF-8 so that values greater than 255 can be handled.
1257    
1258    Arguments:
1259      data       TRUE if converting a data line; FALSE for a regex
1260      p          points to a byte string
1261      utf        true if UTF-8 (to be converted to UTF-16)
1262      len        number of bytes in the string (excluding trailing zero)
1263    
1264    Returns:     number of 16-bit data items used (excluding trailing zero)
1265                 OR -1 if a UTF-8 string is malformed
1266                 OR -2 if a value > 0x10ffff is encountered
1267                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1268    */
1269    
1270    static int
1271    to16(int data, pcre_uint8 *p, int utf, int len)
1272    {
1273    pcre_uint16 *pp;
1274    
1275    if (buffer16_size < 2*len + 2)
1276      {
1277      if (buffer16 != NULL) free(buffer16);
1278      buffer16_size = 2*len + 2;
1279      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1280      if (buffer16 == NULL)
1281      {      {
1282      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1283      code += 2;      exit(1);
1284      }      }
1285      }
1286    
1287    pp = buffer16;
1288    
1289    if (!utf && !data)
1290      {
1291      while (len-- > 0) *pp++ = *p++;
1292      }
1293    
1294    else switch(*code)  else
1295      {
1296      int c = 0;
1297      while (len > 0)
1298      {      {
1299      case OP_END:      int chlen = utf82ord(p, &c);
1300      fprintf(outfile, "    %s\n", OP_names[*code]);      if (chlen <= 0) return -1;
1301      fprintf(outfile, "------------------------------------------------------------------\n");      if (c > 0x10ffff) return -2;
1302      return;      p += chlen;
1303        len -= chlen;
1304        if (c < 0x10000) *pp++ = c; else
1305          {
1306          if (!utf) return -3;
1307          c -= 0x10000;
1308          *pp++ = 0xD800 | (c >> 10);
1309          *pp++ = 0xDC00 | (c & 0x3ff);
1310          }
1311        }
1312      }
1313    
1314    *pp = 0;
1315    return pp - buffer16;
1316    }
1317    #endif
1318    
1319    
1320    /*************************************************
1321    *        Read or extend an input line            *
1322    *************************************************/
1323    
1324    /* Input lines are read into buffer, but both patterns and data lines can be
1325    continued over multiple input lines. In addition, if the buffer fills up, we
1326    want to automatically expand it so as to be able to handle extremely large
1327    lines that are needed for certain stress tests. When the input buffer is
1328    expanded, the other two buffers must also be expanded likewise, and the
1329    contents of pbuffer, which are a copy of the input for callouts, must be
1330    preserved (for when expansion happens for a data line). This is not the most
1331    optimal way of handling this, but hey, this is just a test program!
1332    
1333    Arguments:
1334      f            the file to read
1335      start        where in buffer to start (this *must* be within buffer)
1336      prompt       for stdin or readline()
1337    
1338    Returns:       pointer to the start of new data
1339                   could be a copy of start, or could be moved
1340                   NULL if no data read and EOF reached
1341    */
1342    
1343    static pcre_uint8 *
1344    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1345    {
1346    pcre_uint8 *here = start;
1347    
1348    for (;;)
1349      {
1350      size_t rlen = (size_t)(buffer_size - (here - buffer));
1351    
1352      if (rlen > 1000)
1353        {
1354        int dlen;
1355    
1356        /* If libreadline or libedit support is required, use readline() to read a
1357        line if the input is a terminal. Note that readline() removes the trailing
1358        newline, so we must put it back again, to be compatible with fgets(). */
1359    
1360    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1361        if (isatty(fileno(f)))
1362          {
1363          size_t len;
1364          char *s = readline(prompt);
1365          if (s == NULL) return (here == start)? NULL : start;
1366          len = strlen(s);
1367          if (len > 0) add_history(s);
1368          if (len > rlen - 1) len = rlen - 1;
1369          memcpy(here, s, len);
1370          here[len] = '\n';
1371          here[len+1] = 0;
1372          free(s);
1373          }
1374        else
1375    #endif
1376    
1377        /* Read the next line by normal means, prompting if the file is stdin. */
1378    
1379          {
1380          if (f == stdin) printf("%s", prompt);
1381          if (fgets((char *)here, rlen,  f) == NULL)
1382            return (here == start)? NULL : start;
1383          }
1384    
1385        dlen = (int)strlen((char *)here);
1386        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1387        here += dlen;
1388        }
1389    
1390      else
1391        {
1392        int new_buffer_size = 2*buffer_size;
1393        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1394        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1395        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1396    
1397        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1398          {
1399          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1400          exit(1);
1401          }
1402    
1403        memcpy(new_buffer, buffer, buffer_size);
1404        memcpy(new_pbuffer, pbuffer, buffer_size);
1405    
1406        buffer_size = new_buffer_size;
1407    
1408        start = new_buffer + (start - buffer);
1409        here = new_buffer + (here - buffer);
1410    
1411        free(buffer);
1412        free(dbuffer);
1413        free(pbuffer);
1414    
1415        buffer = new_buffer;
1416        dbuffer = new_dbuffer;
1417        pbuffer = new_pbuffer;
1418        }
1419      }
1420    
1421    return NULL;  /* Control never gets here */
1422    }
1423    
1424    
1425    
1426    /*************************************************
1427    *          Read number from string               *
1428    *************************************************/
1429    
1430    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1431    around with conditional compilation, just do the job by hand. It is only used
1432    for unpicking arguments, so just keep it simple.
1433    
1434    Arguments:
1435      str           string to be converted
1436      endptr        where to put the end pointer
1437    
1438    Returns:        the unsigned long
1439    */
1440    
1441    static int
1442    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1443    {
1444    int result = 0;
1445    while(*str != 0 && isspace(*str)) str++;
1446    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1447    *endptr = str;
1448    return(result);
1449    }
1450    
1451    
1452    
1453    /*************************************************
1454    *             Print one character                *
1455    *************************************************/
1456    
1457    /* Print a single character either literally, or as a hex escape. */
1458    
1459    static int pchar(int c, FILE *f)
1460    {
1461    if (PRINTOK(c))
1462      {
1463      if (f != NULL) fprintf(f, "%c", c);
1464      return 1;
1465      }
1466    
1467    if (c < 0x100)
1468      {
1469      if (use_utf)
1470        {
1471        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1472        return 6;
1473        }
1474      else
1475        {
1476        if (f != NULL) fprintf(f, "\\x%02x", c);
1477        return 4;
1478        }
1479      }
1480    
1481    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1482    return (c <= 0x000000ff)? 6 :
1483           (c <= 0x00000fff)? 7 :
1484           (c <= 0x0000ffff)? 8 :
1485           (c <= 0x000fffff)? 9 : 10;
1486    }
1487    
1488    
1489    
1490    #ifdef SUPPORT_PCRE8
1491    /*************************************************
1492    *         Print 8-bit character string           *
1493    *************************************************/
1494    
1495    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1496    If handed a NULL file, just counts chars without printing. */
1497    
1498    static int pchars(pcre_uint8 *p, int length, FILE *f)
1499    {
1500    int c = 0;
1501    int yield = 0;
1502    
1503    if (length < 0)
1504      length = strlen((char *)p);
1505    
1506    while (length-- > 0)
1507      {
1508    #if !defined NOUTF
1509      if (use_utf)
1510        {
1511        int rc = utf82ord(p, &c);
1512        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1513          {
1514          length -= rc - 1;
1515          p += rc;
1516          yield += pchar(c, f);
1517          continue;
1518          }
1519        }
1520    #endif
1521      c = *p++;
1522      yield += pchar(c, f);
1523      }
1524    
1525    return yield;
1526    }
1527    #endif
1528    
1529    
1530    
1531    #ifdef SUPPORT_PCRE16
1532    /*************************************************
1533    *    Find length of 0-terminated 16-bit string   *
1534    *************************************************/
1535    
1536    static int strlen16(PCRE_SPTR16 p)
1537    {
1538    int len = 0;
1539    while (*p++ != 0) len++;
1540    return len;
1541    }
1542    #endif  /* SUPPORT_PCRE16 */
1543    
1544    
1545    #ifdef SUPPORT_PCRE16
1546    /*************************************************
1547    *           Print 16-bit character string        *
1548    *************************************************/
1549    
1550    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1551    If handed a NULL file, just counts chars without printing. */
1552    
1553    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1554    {
1555    int yield = 0;
1556    
1557    if (length < 0)
1558      length = strlen16(p);
1559    
1560    while (length-- > 0)
1561      {
1562      int c = *p++ & 0xffff;
1563    #if !defined NOUTF
1564      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1565        {
1566        int d = *p & 0xffff;
1567        if (d >= 0xDC00 && d < 0xDFFF)
1568          {
1569          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1570          length--;
1571          p++;
1572          }
1573        }
1574    #endif
1575      yield += pchar(c, f);
1576      }
1577    
1578    return yield;
1579    }
1580    #endif  /* SUPPORT_PCRE16 */
1581    
1582    
1583    
1584    #ifdef SUPPORT_PCRE8
1585    /*************************************************
1586    *     Read a capture name (8-bit) and check it   *
1587    *************************************************/
1588    
1589    static pcre_uint8 *
1590    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1591    {
1592    pcre_uint8 *npp = *pp;
1593    while (isalnum(*p)) *npp++ = *p++;
1594    *npp++ = 0;
1595    *npp = 0;
1596    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1597      {
1598      fprintf(outfile, "no parentheses with name \"");
1599      PCHARSV(*pp, 0, -1, outfile);
1600      fprintf(outfile, "\"\n");
1601      }
1602    
1603    *pp = npp;
1604    return p;
1605    }
1606    #endif  /* SUPPORT_PCRE8 */
1607    
1608    
1609    
1610    #ifdef SUPPORT_PCRE16
1611    /*************************************************
1612    *     Read a capture name (16-bit) and check it  *
1613    *************************************************/
1614    
1615    /* Note that the text being read is 8-bit. */
1616    
1617    static pcre_uint8 *
1618    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1619    {
1620    pcre_uint16 *npp = *pp;
1621    while (isalnum(*p)) *npp++ = *p++;
1622    *npp++ = 0;
1623    *npp = 0;
1624    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1625      {
1626      fprintf(outfile, "no parentheses with name \"");
1627      PCHARSV(*pp, 0, -1, outfile);
1628      fprintf(outfile, "\"\n");
1629      }
1630    *pp = npp;
1631    return p;
1632    }
1633    #endif  /* SUPPORT_PCRE16 */
1634    
1635    
1636    
1637    /*************************************************
1638    *              Callout function                  *
1639    *************************************************/
1640    
1641    /* Called from PCRE as a result of the (?C) item. We print out where we are in
1642    the match. Yield zero unless more callouts than the fail count, or the callout
1643    data is not zero. */
1644    
1645    static int callout(pcre_callout_block *cb)
1646    {
1647    FILE *f = (first_callout | callout_extra)? outfile : NULL;
1648    int i, pre_start, post_start, subject_length;
1649    
1650    if (callout_extra)
1651      {
1652      fprintf(f, "Callout %d: last capture = %d\n",
1653        cb->callout_number, cb->capture_last);
1654    
1655      for (i = 0; i < cb->capture_top * 2; i += 2)
1656        {
1657        if (cb->offset_vector[i] < 0)
1658          fprintf(f, "%2d: <unset>\n", i/2);
1659        else
1660          {
1661          fprintf(f, "%2d: ", i/2);
1662          PCHARSV(cb->subject, cb->offset_vector[i],
1663            cb->offset_vector[i+1] - cb->offset_vector[i], f);
1664          fprintf(f, "\n");
1665          }
1666        }
1667      }
1668    
1669    /* Re-print the subject in canonical form, the first time or if giving full
1670    datails. On subsequent calls in the same match, we use pchars just to find the
1671    printed lengths of the substrings. */
1672    
1673    if (f != NULL) fprintf(f, "--->");
1674    
1675    PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1676    PCHARS(post_start, cb->subject, cb->start_match,
1677      cb->current_position - cb->start_match, f);
1678    
1679    PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1680    
1681    PCHARSV(cb->subject, cb->current_position,
1682      cb->subject_length - cb->current_position, f);
1683    
1684    if (f != NULL) fprintf(f, "\n");
1685    
1686    /* Always print appropriate indicators, with callout number if not already
1687    shown. For automatic callouts, show the pattern offset. */
1688    
1689    if (cb->callout_number == 255)
1690      {
1691      fprintf(outfile, "%+3d ", cb->pattern_position);
1692      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
1693      }
1694    else
1695      {
1696      if (callout_extra) fprintf(outfile, "    ");
1697        else fprintf(outfile, "%3d ", cb->callout_number);
1698      }
1699    
1700    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1701    fprintf(outfile, "^");
1702    
1703    if (post_start > 0)
1704      {
1705      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1706      fprintf(outfile, "^");
1707      }
1708    
1709    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1710      fprintf(outfile, " ");
1711    
1712    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1713      pbuffer + cb->pattern_position);
1714    
1715    fprintf(outfile, "\n");
1716    first_callout = 0;
1717    
1718    if (cb->mark != last_callout_mark)
1719      {
1720      if (cb->mark == NULL)
1721        fprintf(outfile, "Latest Mark: <unset>\n");
1722      else
1723        {
1724        fprintf(outfile, "Latest Mark: ");
1725        PCHARSV(cb->mark, 0, -1, outfile);
1726        putc('\n', outfile);
1727        }
1728      last_callout_mark = cb->mark;
1729      }
1730    
1731    if (cb->callout_data != NULL)
1732      {
1733      int callout_data = *((int *)(cb->callout_data));
1734      if (callout_data != 0)
1735        {
1736        fprintf(outfile, "Callout data = %d\n", callout_data);
1737        return callout_data;
1738        }
1739      }
1740    
1741    return (cb->callout_number != callout_fail_id)? 0 :
1742           (++callout_count >= callout_fail_count)? 1 : 0;
1743    }
1744    
1745    
1746    /*************************************************
1747    *            Local malloc functions              *
1748    *************************************************/
1749    
1750    /* Alternative malloc function, to test functionality and save the size of a
1751    compiled re, which is the first store request that pcre_compile() makes. The
1752    show_malloc variable is set only during matching. */
1753    
1754    static void *new_malloc(size_t size)
1755    {
1756    void *block = malloc(size);
1757    gotten_store = size;
1758    if (first_gotten_store == 0) first_gotten_store = size;
1759    if (show_malloc)
1760      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1761    return block;
1762    }
1763    
1764    static void new_free(void *block)
1765    {
1766    if (show_malloc)
1767      fprintf(outfile, "free             %p\n", block);
1768    free(block);
1769    }
1770    
1771    /* For recursion malloc/free, to test stacking calls */
1772    
1773    static void *stack_malloc(size_t size)
1774    {
1775    void *block = malloc(size);
1776    if (show_malloc)
1777      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1778    return block;
1779    }
1780    
1781    static void stack_free(void *block)
1782    {
1783    if (show_malloc)
1784      fprintf(outfile, "stack_free       %p\n", block);
1785    free(block);
1786    }
1787    
1788    
1789    /*************************************************
1790    *          Call pcre_fullinfo()                  *
1791    *************************************************/
1792    
1793    /* Get one piece of information from the pcre_fullinfo() function. When only
1794    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1795    value, but the code is defensive.
1796    
1797    Arguments:
1798      re        compiled regex
1799      study     study data
1800      option    PCRE_INFO_xxx option
1801      ptr       where to put the data
1802    
1803    Returns:    0 when OK, < 0 on error
1804    */
1805    
1806    static int
1807    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1808    {
1809    int rc;
1810    
1811    if (use_pcre16)
1812    #ifdef SUPPORT_PCRE16
1813      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1814    #else
1815      rc = PCRE_ERROR_BADMODE;
1816    #endif
1817    else
1818    #ifdef SUPPORT_PCRE8
1819      rc = pcre_fullinfo(re, study, option, ptr);
1820    #else
1821      rc = PCRE_ERROR_BADMODE;
1822    #endif
1823    
1824    if (rc < 0)
1825      {
1826      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1827        use_pcre16? "16" : "", option);
1828      if (rc == PCRE_ERROR_BADMODE)
1829        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1830          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1831      }
1832    
1833    return rc;
1834    }
1835    
1836    
1837    
1838    /*************************************************
1839    *             Swap byte functions                *
1840    *************************************************/
1841    
1842    /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1843    value, respectively.
1844    
1845    Arguments:
1846      value        any number
1847    
1848    Returns:       the byte swapped value
1849    */
1850    
1851    static pcre_uint32
1852    swap_uint32(pcre_uint32 value)
1853    {
1854    return ((value & 0x000000ff) << 24) |
1855           ((value & 0x0000ff00) <<  8) |
1856           ((value & 0x00ff0000) >>  8) |
1857           (value >> 24);
1858    }
1859    
1860    static pcre_uint16
1861    swap_uint16(pcre_uint16 value)
1862    {
1863    return (value >> 8) | (value << 8);
1864    }
1865    
1866    
1867    
1868    /*************************************************
1869    *        Flip bytes in a compiled pattern        *
1870    *************************************************/
1871    
1872    /* This function is called if the 'F' option was present on a pattern that is
1873    to be written to a file. We flip the bytes of all the integer fields in the
1874    regex data block and the study block. In 16-bit mode this also flips relevant
1875    bytes in the pattern itself. This is to make it possible to test PCRE's
1876    ability to reload byte-flipped patterns, e.g. those compiled on a different
1877    architecture. */
1878    
1879    static void
1880    regexflip(pcre *ere, pcre_extra *extra)
1881    {
1882    REAL_PCRE *re = (REAL_PCRE *)ere;
1883    #ifdef SUPPORT_PCRE16
1884    int op;
1885    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1886    int length = re->name_count * re->name_entry_size;
1887    #ifdef SUPPORT_UTF
1888    BOOL utf = (re->options & PCRE_UTF16) != 0;
1889    BOOL utf16_char = FALSE;
1890    #endif /* SUPPORT_UTF */
1891    #endif /* SUPPORT_PCRE16 */
1892    
1893    /* Always flip the bytes in the main data block and study blocks. */
1894    
1895    re->magic_number = REVERSED_MAGIC_NUMBER;
1896    re->size = swap_uint32(re->size);
1897    re->options = swap_uint32(re->options);
1898    re->flags = swap_uint16(re->flags);
1899    re->top_bracket = swap_uint16(re->top_bracket);
1900    re->top_backref = swap_uint16(re->top_backref);
1901    re->first_char = swap_uint16(re->first_char);
1902    re->req_char = swap_uint16(re->req_char);
1903    re->name_table_offset = swap_uint16(re->name_table_offset);
1904    re->name_entry_size = swap_uint16(re->name_entry_size);
1905    re->name_count = swap_uint16(re->name_count);
1906    
1907    if (extra != NULL)
1908      {
1909      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1910      rsd->size = swap_uint32(rsd->size);
1911      rsd->flags = swap_uint32(rsd->flags);
1912      rsd->minlength = swap_uint32(rsd->minlength);
1913      }
1914    
1915      case OP_OPT:  /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1916      fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  in the name table, if present, and then in the pattern itself. */
     code++;  
     break;  
1917    
1918      case OP_COND:  #ifdef SUPPORT_PCRE16
1919      fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  if (!use_pcre16) return;
     code += 2;  
     break;  
1920    
1921      case OP_CREF:  while(TRUE)
1922      fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);    {
1923      code++;    /* Swap previous characters. */
1924      break;    while (length-- > 0)
1925        {
1926        *ptr = swap_uint16(*ptr);
1927        ptr++;
1928        }
1929    #ifdef SUPPORT_UTF
1930      if (utf16_char)
1931        {
1932        if ((ptr[-1] & 0xfc00) == 0xd800)
1933          {
1934          /* We know that there is only one extra character in UTF-16. */
1935          *ptr = swap_uint16(*ptr);
1936          ptr++;
1937          }
1938        }
1939      utf16_char = FALSE;
1940    #endif /* SUPPORT_UTF */
1941    
1942      case OP_CHARS:    /* Get next opcode. */
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
1943    
1944      case OP_KETRMAX:    length = 0;
1945      case OP_KETRMIN:    op = *ptr;
1946      case OP_ALT:    *ptr++ = swap_uint16(op);
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
1947    
1948      case OP_REVERSE:    switch (op)
1949      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      {
1950      code += 2;      case OP_END:
1951      break;      return;
1952    
1953    #ifdef SUPPORT_UTF
1954        case OP_CHAR:
1955        case OP_CHARI:
1956        case OP_NOT:
1957        case OP_NOTI:
1958      case OP_STAR:      case OP_STAR:
1959      case OP_MINSTAR:      case OP_MINSTAR:
1960      case OP_PLUS:      case OP_PLUS:
1961      case OP_MINPLUS:      case OP_MINPLUS:
1962      case OP_QUERY:      case OP_QUERY:
1963      case OP_MINQUERY:      case OP_MINQUERY:
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
1964      case OP_UPTO:      case OP_UPTO:
1965      case OP_MINUPTO:      case OP_MINUPTO:
1966      if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);      case OP_EXACT:
1967        else fprintf(outfile, "    \\x%02x{", c);      case OP_POSSTAR:
1968      if (*code != OP_EXACT) fprintf(outfile, ",");      case OP_POSPLUS:
1969      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_POSQUERY:
1970      if (*code == OP_MINUPTO) fprintf(outfile, "?");      case OP_POSUPTO:
1971      code += 3;      case OP_STARI:
1972      break;      case OP_MINSTARI:
1973        case OP_PLUSI:
1974      case OP_TYPEEXACT:      case OP_MINPLUSI:
1975      case OP_TYPEUPTO:      case OP_QUERYI:
1976      case OP_TYPEMINUPTO:      case OP_MINQUERYI:
1977      fprintf(outfile, "    %s{", OP_names[code[3]]);      case OP_UPTOI:
1978      if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");      case OP_MINUPTOI:
1979      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_EXACTI:
1980      if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");      case OP_POSSTARI:
1981      code += 3;      case OP_POSPLUSI:
1982      break;      case OP_POSQUERYI:
1983        case OP_POSUPTOI:
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
1984      case OP_NOTSTAR:      case OP_NOTSTAR:
1985      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
1986      case OP_NOTPLUS:      case OP_NOTPLUS:
1987      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
1988      case OP_NOTQUERY:      case OP_NOTQUERY:
1989      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
1990      case OP_NOTUPTO:      case OP_NOTUPTO:
1991      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
1992      if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);      case OP_NOTEXACT:
1993        else fprintf(outfile, "    [^\\x%02x]{", c);      case OP_NOTPOSSTAR:
1994      if (*code != OP_NOTEXACT) fprintf(outfile, ",");      case OP_NOTPOSPLUS:
1995      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_NOTPOSQUERY:
1996      if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");      case OP_NOTPOSUPTO:
1997      code += 3;      case OP_NOTSTARI:
1998      break;      case OP_NOTMINSTARI:
1999        case OP_NOTPLUSI:
2000        case OP_NOTMINPLUSI:
2001        case OP_NOTQUERYI:
2002        case OP_NOTMINQUERYI:
2003        case OP_NOTUPTOI:
2004        case OP_NOTMINUPTOI:
2005        case OP_NOTEXACTI:
2006        case OP_NOTPOSSTARI:
2007        case OP_NOTPOSPLUSI:
2008        case OP_NOTPOSQUERYI:
2009        case OP_NOTPOSUPTOI:
2010        if (utf) utf16_char = TRUE;
2011    #endif
2012        /* Fall through. */
2013    
2014      case OP_REF:      default:
2015      fprintf(outfile, "    \\%d", *(++code));      length = OP_lengths16[op] - 1;
2016      code++;      break;
     goto CLASS_REF_REPEAT;  
2017    
2018      case OP_CLASS:      case OP_CLASS:
2019        case OP_NCLASS:
2020        /* Skip the character bit map. */
2021        ptr += 32/sizeof(pcre_uint16);
2022        length = 0;
2023        break;
2024    
2025        case OP_XCLASS:
2026        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2027        if (LINK_SIZE > 1)
2028          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2029            - (1 + LINK_SIZE + 1));
2030        else
2031          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2032    
2033        /* Reverse the size of the XCLASS instance. */
2034        *ptr = swap_uint16(*ptr);
2035        ptr++;
2036        if (LINK_SIZE > 1)
2037        {        {
2038        int i, min, max;        *ptr = swap_uint16(*ptr);
2039        code++;        ptr++;
2040        fprintf(outfile, "    [");        }
2041    
2042        for (i = 0; i < 256; i++)      op = *ptr;
2043          {      *ptr = swap_uint16(op);
2044          if ((code[i/8] & (1 << (i&7))) != 0)      ptr++;
2045            {      if ((op & XCL_MAP) != 0)
2046            int j;        {
2047            for (j = i+1; j < 256; j++)        /* Skip the character bit map. */
2048              if ((code[j/8] & (1 << (j&7))) == 0) break;        ptr += 32/sizeof(pcre_uint16);
2049            if (i == '-' || i == ']') fprintf(outfile, "\\");        length -= 32/sizeof(pcre_uint16);
2050            if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);        }
2051            if (--j > i)      break;
2052              {      }
2053              fprintf(outfile, "-");    }
2054              if (j == '-' || j == ']') fprintf(outfile, "\\");  /* Control should never reach here in 16 bit mode. */
2055              if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  #endif /* SUPPORT_PCRE16 */
2056              }  }
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
2057    
       CLASS_REF_REPEAT:  
2058    
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
2059    
2060          case OP_CRRANGE:  /*************************************************
2061          case OP_CRMINRANGE:  *        Check match or recursion limit          *
2062          min = (code[1] << 8) + code[2];  *************************************************/
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
2063    
2064          default:  static int
2065          code--;  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2066          }    int start_offset, int options, int *use_offsets, int use_size_offsets,
2067        }    int flag, unsigned long int *limit, int errnumber, const char *msg)
2068      break;  {
2069    int count;
2070    int min = 0;
2071    int mid = 64;
2072    int max = -1;
2073    
2074      /* Anything else is just a one-node item */  extra->flags |= flag;
2075    
2076      default:  for (;;)
2077      fprintf(outfile, "    %s", OP_names[*code]);    {
2078      break;    *limit = mid;
2079    
2080      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2081        use_offsets, use_size_offsets);
2082    
2083      if (count == errnumber)
2084        {
2085        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2086        min = mid;
2087        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2088      }      }
2089    
2090    code++;    else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2091    fprintf(outfile, "\n");                           count == PCRE_ERROR_PARTIAL)
2092        {
2093        if (mid == min + 1)
2094          {
2095          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2096          break;
2097          }
2098        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2099        max = mid;
2100        mid = (min + mid)/2;
2101        }
2102      else break;    /* Some other error */
2103    }    }
2104    
2105    extra->flags &= ~flag;
2106    return count;
2107  }  }
2108    
2109    
2110    
2111  /* Character string printing function. */  /*************************************************
2112    *         Case-independent strncmp() function    *
2113    *************************************************/
2114    
2115    /*
2116    Arguments:
2117      s         first string
2118      t         second string
2119      n         number of characters to compare
2120    
2121  static void pchars(unsigned char *p, int length)  Returns:    < 0, = 0, or > 0, according to the comparison
2122    */
2123    
2124    static int
2125    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2126  {  {
2127  int c;  while (n--)
2128  while (length-- > 0)    {
2129    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    int c = tolower(*s++) - tolower(*t++);
2130      else fprintf(outfile, "\\x%02x", c);    if (c) return c;
2131      }
2132    return 0;
2133  }  }
2134    
2135    
2136    
2137  /* Alternative malloc function, to test functionality and show the size of the  /*************************************************
2138  compiled re. */  *         Check newline indicator                *
2139    *************************************************/
2140    
2141    /* This is used both at compile and run-time to check for <xxx> escapes. Print
2142    a message and return 0 if there is no match.
2143    
2144  static void *new_malloc(size_t size)  Arguments:
2145      p           points after the leading '<'
2146      f           file for error message
2147    
2148    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
2149    */
2150    
2151    static int
2152    check_newline(pcre_uint8 *p, FILE *f)
2153  {  {
2154  gotten_store = size;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2155  if (log_store)  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2156    fprintf(outfile, "Memory allocation (code space): %d\n",  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2157      (int)((int)size - offsetof(real_pcre, code[0])));  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2158  return malloc(size);  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2159    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2160    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2161    fprintf(f, "Unknown newline type at: <%s\n", p);
2162    return 0;
2163  }  }
2164    
2165    
2166    
2167    /*************************************************
2168    *             Usage function                     *
2169    *************************************************/
2170    
2171  /* Get one piece of information from the pcre_fullinfo() function */  static void
2172    usage(void)
 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  
2173  {  {
2174  int rc;  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2175  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  printf("Input and output default to stdin and stdout.\n");
2176    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2177    printf("If input is a terminal, readline() is used to read from it.\n");
2178    #else
2179    printf("This version of pcretest is not linked with readline().\n");
2180    #endif
2181    printf("\nOptions:\n");
2182    #ifdef SUPPORT_PCRE16
2183    printf("  -16      use the 16-bit library\n");
2184    #endif
2185    printf("  -b       show compiled code\n");
2186    printf("  -C       show PCRE compile-time options and exit\n");
2187    printf("  -C arg   show a specific compile-time option\n");
2188    printf("           and exit with its value. The arg can be:\n");
2189    printf("     linksize     internal link size [2, 3, 4]\n");
2190    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2191    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2192    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2193    printf("     ucp          Unicode Properties supported [0, 1]\n");
2194    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2195    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2196    printf("  -d       debug: show compiled code and information (-b and -i)\n");
2197    #if !defined NODFA
2198    printf("  -dfa     force DFA matching for all subjects\n");
2199    #endif
2200    printf("  -help    show usage information\n");
2201    printf("  -i       show information about compiled patterns\n"
2202           "  -M       find MATCH_LIMIT minimum for each subject\n"
2203           "  -m       output memory used information\n"
2204           "  -o <n>   set size of offsets vector to <n>\n");
2205    #if !defined NOPOSIX
2206    printf("  -p       use POSIX interface\n");
2207    #endif
2208    printf("  -q       quiet: do not output PCRE version number at start\n");
2209    printf("  -S <n>   set stack size to <n> megabytes\n");
2210    printf("  -s       force each pattern to be studied at basic level\n"
2211           "  -s+      force each pattern to be studied, using JIT if available\n"
2212           "  -s++     ditto, verifying when JIT was actually used\n"
2213           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2214           "             where 1 <= n <= 7 selects JIT options\n"
2215           "  -s++n    ditto, verifying when JIT was actually used\n"
2216           "  -t       time compilation and execution\n");
2217    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2218    printf("  -tm      time execution (matching) only\n");
2219    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
2220  }  }
2221    
2222    
2223    
2224    /*************************************************
2225    *                Main Program                    *
2226    *************************************************/
2227    
2228  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
2229  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 311  options, followed by a set of test data, Line 2232  options, followed by a set of test data,
2232  int main(int argc, char **argv)  int main(int argc, char **argv)
2233  {  {
2234  FILE *infile = stdin;  FILE *infile = stdin;
2235    const char *version;
2236  int options = 0;  int options = 0;
2237  int study_options = 0;  int study_options = 0;
2238    int default_find_match_limit = FALSE;
2239  int op = 1;  int op = 1;
2240  int timeit = 0;  int timeit = 0;
2241    int timeitm = 0;
2242  int showinfo = 0;  int showinfo = 0;
2243  int showstore = 0;  int showstore = 0;
2244  int posix = 0;  int force_study = -1;
2245    int force_study_options = 0;
2246    int quiet = 0;
2247    int size_offsets = 45;
2248    int size_offsets_max;
2249    int *offsets = NULL;
2250  int debug = 0;  int debug = 0;
2251  int done = 0;  int done = 0;
2252  unsigned char buffer[30000];  int all_use_dfa = 0;
2253  unsigned char dbuffer[1024];  int verify_jit = 0;
2254    int yield = 0;
2255    int stack_size;
2256    
2257    #if !defined NOPOSIX
2258    int posix = 0;
2259    #endif
2260    #if !defined NODFA
2261    int *dfa_workspace = NULL;
2262    #endif
2263    
2264    pcre_jit_stack *jit_stack = NULL;
2265    
2266    /* These vectors store, end-to-end, a list of zero-terminated captured
2267    substring names, each list itself being terminated by an empty name. Assume
2268    that 1024 is plenty long enough for the few names we'll be testing. It is
2269    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2270    for the actual memory, to ensure alignment. */
2271    
2272    pcre_uint16 copynames[1024];
2273    pcre_uint16 getnames[1024];
2274    
2275    #ifdef SUPPORT_PCRE16
2276    pcre_uint16 *cn16ptr;
2277    pcre_uint16 *gn16ptr;
2278    #endif
2279    
2280    #ifdef SUPPORT_PCRE8
2281    pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2282    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2283    pcre_uint8 *cn8ptr;
2284    pcre_uint8 *gn8ptr;
2285    #endif
2286    
2287    /* Get buffers from malloc() so that valgrind will check their misuse when
2288    debugging. They grow automatically when very long lines are read. The 16-bit
2289    buffer (buffer16) is obtained only if needed. */
2290    
2291    buffer = (pcre_uint8 *)malloc(buffer_size);
2292    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2293    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2294    
2295  /* Static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2296    
2297  outfile = stdout;  outfile = stdout;
2298    
2299    /* The following  _setmode() stuff is some Windows magic that tells its runtime
2300    library to translate CRLF into a single LF character. At least, that's what
2301    I've been told: never having used Windows I take this all on trust. Originally
2302    it set 0x8000, but then I was advised that _O_BINARY was better. */
2303    
2304    #if defined(_WIN32) || defined(WIN32)
2305    _setmode( _fileno( stdout ), _O_BINARY );
2306    #endif
2307    
2308    /* Get the version number: both pcre_version() and pcre16_version() give the
2309    same answer. We just need to ensure that we call one that is available. */
2310    
2311    #ifdef SUPPORT_PCRE8
2312    version = pcre_version();
2313    #else
2314    version = pcre16_version();
2315    #endif
2316    
2317  /* Scan options */  /* Scan options */
2318    
2319  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2320    {    {
2321    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    pcre_uint8 *endptr;
2322      showstore = 1;    char *arg = argv[op];
2323    else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
2324    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    if (strcmp(arg, "-m") == 0) showstore = 1;
2325    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(arg, "-s") == 0) force_study = 0;
2326    else if (strcmp(argv[op], "-p") == 0) posix = 1;  
2327      else if (strncmp(arg, "-s+", 3) == 0)
2328        {
2329        arg += 3;
2330        if (*arg == '+') { arg++; verify_jit = TRUE; }
2331        force_study = 1;
2332        if (*arg == 0)
2333          force_study_options = jit_study_bits[6];
2334        else if (*arg >= '1' && *arg <= '7')
2335          force_study_options = jit_study_bits[*arg - '1'];
2336        else goto BAD_ARG;
2337        }
2338      else if (strcmp(arg, "-16") == 0)
2339        {
2340    #ifdef SUPPORT_PCRE16
2341        use_pcre16 = 1;
2342    #else
2343        printf("** This version of PCRE was built without 16-bit support\n");
2344        exit(1);
2345    #endif
2346        }
2347      else if (strcmp(arg, "-q") == 0) quiet = 1;
2348      else if (strcmp(arg, "-b") == 0) debug = 1;
2349      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2350      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2351      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2352    #if !defined NODFA
2353      else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2354    #endif
2355      else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2356          ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2357            *endptr == 0))
2358        {
2359        op++;
2360        argc--;
2361        }
2362      else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2363        {
2364        int both = arg[2] == 0;
2365        int temp;
2366        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2367                         *endptr == 0))
2368          {
2369          timeitm = temp;
2370          op++;
2371          argc--;
2372          }
2373        else timeitm = LOOPREPEAT;
2374        if (both) timeit = timeitm;
2375        }
2376      else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2377          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2378            *endptr == 0))
2379        {
2380    #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2381        printf("PCRE: -S not supported on this OS\n");
2382        exit(1);
2383    #else
2384        int rc;
2385        struct rlimit rlim;
2386        getrlimit(RLIMIT_STACK, &rlim);
2387        rlim.rlim_cur = stack_size * 1024 * 1024;
2388        rc = setrlimit(RLIMIT_STACK, &rlim);
2389        if (rc != 0)
2390          {
2391        printf("PCRE: setrlimit() failed with error %d\n", rc);
2392        exit(1);
2393          }
2394        op++;
2395        argc--;
2396    #endif
2397        }
2398    #if !defined NOPOSIX
2399      else if (strcmp(arg, "-p") == 0) posix = 1;
2400    #endif
2401      else if (strcmp(arg, "-C") == 0)
2402        {
2403        int rc;
2404        unsigned long int lrc;
2405    
2406        if (argc > 2)
2407          {
2408          if (strcmp(argv[op + 1], "linksize") == 0)
2409            {
2410            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2411            printf("%d\n", rc);
2412            yield = rc;
2413            }
2414          else if (strcmp(argv[op + 1], "pcre8") == 0)
2415            {
2416    #ifdef SUPPORT_PCRE8
2417            printf("1\n");
2418            yield = 1;
2419    #else
2420            printf("0\n");
2421            yield = 0;
2422    #endif
2423            }
2424          else if (strcmp(argv[op + 1], "pcre16") == 0)
2425            {
2426    #ifdef SUPPORT_PCRE16
2427            printf("1\n");
2428            yield = 1;
2429    #else
2430            printf("0\n");
2431            yield = 0;
2432    #endif
2433            }
2434          else if (strcmp(argv[op + 1], "utf") == 0)
2435            {
2436    #ifdef SUPPORT_PCRE8
2437            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2438            printf("%d\n", rc);
2439            yield = rc;
2440    #else
2441            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2442            printf("%d\n", rc);
2443            yield = rc;
2444    #endif
2445            }
2446          else if (strcmp(argv[op + 1], "ucp") == 0)
2447            {
2448            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2449            printf("%d\n", rc);
2450            yield = rc;
2451            }
2452          else if (strcmp(argv[op + 1], "jit") == 0)
2453            {
2454            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2455            printf("%d\n", rc);
2456            yield = rc;
2457            }
2458          else if (strcmp(argv[op + 1], "newline") == 0)
2459            {
2460            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2461            print_newline_config(rc, TRUE);
2462            }
2463          else if (strcmp(argv[op + 1], "ebcdic") == 0)
2464            {
2465    #ifdef EBCDIC
2466            printf("1\n");
2467            yield = 1;
2468    #else
2469            printf("0\n");
2470    #endif
2471            }
2472          else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2473            {
2474    #ifdef EBCDIC
2475            printf("0x%02x\n", CHAR_LF);
2476    #else
2477            printf("0\n");
2478    #endif
2479            }
2480          else
2481            {
2482            printf("Unknown -C option: %s\n", argv[op + 1]);
2483            }
2484          goto EXIT;
2485          }
2486    
2487        /* No argument for -C: output all configuration information. */
2488    
2489        printf("PCRE version %s\n", version);
2490        printf("Compiled with\n");
2491    
2492    #ifdef EBCDIC
2493        printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2494    #endif
2495    
2496    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2497    are set, either both UTFs are supported or both are not supported. */
2498    
2499    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2500        printf("  8-bit and 16-bit support\n");
2501        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2502        if (rc)
2503          printf("  UTF-8 and UTF-16 support\n");
2504        else
2505          printf("  No UTF-8 or UTF-16 support\n");
2506    #elif defined SUPPORT_PCRE8
2507        printf("  8-bit support only\n");
2508        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2509        printf("  %sUTF-8 support\n", rc? "" : "No ");
2510    #else
2511        printf("  16-bit support only\n");
2512        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2513        printf("  %sUTF-16 support\n", rc? "" : "No ");
2514    #endif
2515    
2516        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2517        printf("  %sUnicode properties support\n", rc? "" : "No ");
2518        (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2519        if (rc)
2520          {
2521          const char *arch;
2522          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2523          printf("  Just-in-time compiler support: %s\n", arch);
2524          }
2525        else
2526          printf("  No just-in-time compiler support\n");
2527        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2528        print_newline_config(rc, FALSE);
2529        (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2530        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2531                                         "all Unicode newlines");
2532        (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2533        printf("  Internal link size = %d\n", rc);
2534        (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2535        printf("  POSIX malloc threshold = %d\n", rc);
2536        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2537        printf("  Default match limit = %ld\n", lrc);
2538        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2539        printf("  Default recursion depth limit = %ld\n", lrc);
2540        (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2541        printf("  Match recursion uses %s", rc? "stack" : "heap");
2542        if (showstore)
2543          {
2544          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2545          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2546          }
2547        printf("\n");
2548        goto EXIT;
2549        }
2550      else if (strcmp(arg, "-help") == 0 ||
2551               strcmp(arg, "--help") == 0)
2552        {
2553        usage();
2554        goto EXIT;
2555        }
2556    else    else
2557      {      {
2558      printf("*** Unknown option %s\n", argv[op]);      BAD_ARG:
2559      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("** Unknown or malformed option %s\n", arg);
2560      printf("  -d   debug: show compiled code; implies -i\n"      usage();
2561             "  -i   show information about compiled pattern\n"      yield = 1;
2562             "  -p   use POSIX interface\n"      goto EXIT;
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
2563      }      }
2564    op++;    op++;
2565    argc--;    argc--;
2566    }    }
2567    
2568    /* Get the store for the offsets vector, and remember what it was */
2569    
2570    size_offsets_max = size_offsets;
2571    offsets = (int *)malloc(size_offsets_max * sizeof(int));
2572    if (offsets == NULL)
2573      {
2574      printf("** Failed to get %d bytes of memory for offsets vector\n",
2575        (int)(size_offsets_max * sizeof(int)));
2576      yield = 1;
2577      goto EXIT;
2578      }
2579    
2580  /* Sort out the input and output files */  /* Sort out the input and output files */
2581    
2582  if (argc > 1)  if (argc > 1)
2583    {    {
2584    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
2585    if (infile == NULL)    if (infile == NULL)
2586      {      {
2587      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
2588      return 1;      yield = 1;
2589        goto EXIT;
2590      }      }
2591    }    }
2592    
2593  if (argc > 2)  if (argc > 2)
2594    {    {
2595    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
2596    if (outfile == NULL)    if (outfile == NULL)
2597      {      {
2598      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
2599      return 1;      yield = 1;
2600        goto EXIT;
2601      }      }
2602    }    }
2603    
2604  /* Set alternative malloc function */  /* Set alternative malloc function */
2605    
2606  pcre_malloc = new_malloc;  #ifdef SUPPORT_PCRE8
2607    pcre_malloc = new_malloc;
2608    pcre_free = new_free;
2609    pcre_stack_malloc = stack_malloc;
2610    pcre_stack_free = stack_free;
2611    #endif
2612    
2613    #ifdef SUPPORT_PCRE16
2614    pcre16_malloc = new_malloc;
2615    pcre16_free = new_free;
2616    pcre16_stack_malloc = stack_malloc;
2617    pcre16_stack_free = stack_free;
2618    #endif
2619    
2620    /* Heading line unless quiet, then prompt for first regex if stdin */
2621    
2622    if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2623    
2624    /* Main loop */
2625    
2626    while (!done)
2627      {
2628      pcre *re = NULL;
2629      pcre_extra *extra = NULL;
2630    
2631    #if !defined NOPOSIX  /* There are still compilers that require no indent */
2632      regex_t preg;
2633      int do_posix = 0;
2634    #endif
2635    
2636      const char *error;
2637      pcre_uint8 *markptr;
2638      pcre_uint8 *p, *pp, *ppp;
2639      pcre_uint8 *to_file = NULL;
2640      const pcre_uint8 *tables = NULL;
2641      unsigned long int get_options;
2642      unsigned long int true_size, true_study_size = 0;
2643      size_t size, regex_gotten_store;
2644      int do_allcaps = 0;
2645      int do_mark = 0;
2646      int do_study = 0;
2647      int no_force_study = 0;
2648      int do_debug = debug;
2649      int do_G = 0;
2650      int do_g = 0;
2651      int do_showinfo = showinfo;
2652      int do_showrest = 0;
2653      int do_showcaprest = 0;
2654      int do_flip = 0;
2655      int erroroffset, len, delimiter, poffset;
2656    
2657    #if !defined NODFA
2658      int dfa_matched = 0;
2659    #endif
2660    
2661      use_utf = 0;
2662      debug_lengths = 1;
2663    
2664      if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
2665      if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2666      fflush(outfile);
2667    
2668      p = buffer;
2669      while (isspace(*p)) p++;
2670      if (*p == 0) continue;
2671    
2672      /* See if the pattern is to be loaded pre-compiled from a file. */
2673    
2674      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2675        {
2676        pcre_uint32 magic;
2677        pcre_uint8 sbuf[8];
2678        FILE *f;
2679    
2680        p++;
2681        if (*p == '!')
2682          {
2683          do_debug = TRUE;
2684          do_showinfo = TRUE;
2685          p++;
2686          }
2687    
2688        pp = p + (int)strlen((char *)p);
2689        while (isspace(pp[-1])) pp--;
2690        *pp = 0;
2691    
2692        f = fopen((char *)p, "rb");
2693        if (f == NULL)
2694          {
2695          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2696          continue;
2697          }
2698    
2699        first_gotten_store = 0;
2700        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2701    
2702        true_size =
2703          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2704        true_study_size =
2705          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2706    
2707        re = (pcre *)new_malloc(true_size);
2708        if (re == NULL)
2709          {
2710          printf("** Failed to get %d bytes of memory for pcre object\n",
2711            (int)true_size);
2712          yield = 1;
2713          goto EXIT;
2714          }
2715        regex_gotten_store = first_gotten_store;
2716    
2717        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2718    
2719        magic = ((REAL_PCRE *)re)->magic_number;
2720        if (magic != MAGIC_NUMBER)
2721          {
2722          if (swap_uint32(magic) == MAGIC_NUMBER)
2723            {
2724            do_flip = 1;
2725            }
2726          else
2727            {
2728            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2729            new_free(re);
2730            fclose(f);
2731            continue;
2732            }
2733          }
2734    
2735        /* We hide the byte-invert info for little and big endian tests. */
2736        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2737          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2738    
2739        /* Now see if there is any following study data. */
2740    
2741  /* Heading line, then prompt for first regex if stdin */      if (true_study_size != 0)
2742          {
2743          pcre_study_data *psd;
2744    
2745  fprintf(outfile, "PCRE version %s\n\n", pcre_version());        extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2746          extra->flags = PCRE_EXTRA_STUDY_DATA;
2747    
2748  /* Main loop */        psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2749          extra->study_data = psd;
2750    
2751  while (!done)        if (fread(psd, 1, true_study_size, f) != true_study_size)
2752    {          {
2753    pcre *re = NULL;          FAIL_READ:
2754    pcre_extra *extra = NULL;          fprintf(outfile, "Failed to read data from %s\n", p);
2755            if (extra != NULL)
2756              {
2757              PCRE_FREE_STUDY(extra);
2758              }
2759            new_free(re);
2760            fclose(f);
2761            continue;
2762            }
2763          fprintf(outfile, "Study data loaded from %s\n", p);
2764          do_study = 1;     /* To get the data output if requested */
2765          }
2766        else fprintf(outfile, "No study data\n");
2767    
2768  #if !defined NOPOSIX  /* There are still compilers that require no indent */      /* Flip the necessary bytes. */
2769    regex_t preg;      if (do_flip)
2770  #endif        {
2771          int rc;
2772          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2773          if (rc == PCRE_ERROR_BADMODE)
2774            {
2775            /* Simulate the result of the function call below. */
2776            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2777              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2778            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2779              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2780            new_free(re);
2781            fclose(f);
2782            continue;
2783            }
2784          }
2785    
2786    const char *error;      /* Need to know if UTF-8 for printing data strings. */
   unsigned char *p, *pp, *ppp;  
   unsigned const char *tables = NULL;  
   int do_study = 0;  
   int do_debug = debug;  
   int do_G = 0;  
   int do_g = 0;  
   int do_showinfo = showinfo;  
   int do_showrest = 0;  
   int do_posix = 0;  
   int erroroffset, len, delimiter;  
2787    
2788    if (infile == stdin) printf("  re> ");      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2789    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;        {
2790    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);        new_free(re);
2791          fclose(f);
2792          continue;
2793          }
2794        use_utf = (get_options & PCRE_UTF8) != 0;
2795    
2796    p = buffer;      fclose(f);
2797    while (isspace(*p)) p++;      goto SHOW_INFO;
2798    if (*p == 0) continue;      }
2799    
2800    /* Get the delimiter and seek the end of the pattern; if is isn't    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2801    complete, read more. */    the pattern; if it isn't complete, read more. */
2802    
2803    delimiter = *p++;    delimiter = *p++;
2804    
2805    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
2806      {      {
2807      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2808      goto SKIP_DATA;      goto SKIP_DATA;
2809      }      }
2810    
2811    pp = p;    pp = p;
2812      poffset = (int)(p - buffer);
2813    
2814    for(;;)    for(;;)
2815      {      {
# Line 435  while (!done) Line 2820  while (!done)
2820        pp++;        pp++;
2821        }        }
2822      if (*pp != 0) break;      if (*pp != 0) break;
2823        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
2824        {        {
2825        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
2826        done = 1;        done = 1;
# Line 453  while (!done) Line 2829  while (!done)
2829      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2830      }      }
2831    
2832      /* The buffer may have moved while being extended; reset the start of data
2833      pointer to the correct relative point in the buffer. */
2834    
2835      p = buffer + poffset;
2836    
2837    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
2838    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
2839    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
2840    
2841    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
2842    
2843    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
2844      for callouts. */
2845    
2846    *pp++ = 0;    *pp++ = 0;
2847      strcpy((char *)pbuffer, (char *)p);
2848    
2849    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2850    
2851    options = 0;    options = 0;
2852    study_options = 0;    study_options = force_study_options;
2853    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
2854    
2855    while (*pp != 0)    while (*pp != 0)
2856      {      {
2857      switch (*pp++)      switch (*pp++)
2858        {        {
2859          case 'f': options |= PCRE_FIRSTLINE; break;
2860        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
2861        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
2862        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
2863        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2864        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2865    
2866        case '+': do_showrest = 1; break;        case '+':
2867          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2868          break;
2869    
2870          case '=': do_allcaps = 1; break;
2871        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2872          case 'B': do_debug = 1; break;
2873          case 'C': options |= PCRE_AUTO_CALLOUT; break;
2874        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
2875        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2876          case 'F': do_flip = 1; break;
2877        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2878        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2879          case 'J': options |= PCRE_DUPNAMES; break;
2880          case 'K': do_mark = 1; break;
2881        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2882          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2883    
2884  #if !defined NOPOSIX  #if !defined NOPOSIX
2885        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2886  #endif  #endif
2887    
2888        case 'S': do_study = 1; break;        case 'S':
2889          do_study = 1;
2890          for (;;)
2891            {
2892            switch (*pp++)
2893              {
2894              case 'S':
2895              do_study = 0;
2896              no_force_study = 1;
2897              break;
2898    
2899              case '!':
2900              study_options |= PCRE_STUDY_EXTRA_NEEDED;
2901              break;
2902    
2903              case '+':
2904              if (*pp == '+')
2905                {
2906                verify_jit = TRUE;
2907                pp++;
2908                }
2909              if (*pp >= '1' && *pp <= '7')
2910                study_options |= jit_study_bits[*pp++ - '1'];
2911              else
2912                study_options |= jit_study_bits[6];
2913              break;
2914    
2915              case '-':
2916              study_options &= ~PCRE_STUDY_ALLJIT;
2917              break;
2918    
2919              default:
2920              pp--;
2921              goto ENDLOOP;
2922              }
2923            }
2924          ENDLOOP:
2925          break;
2926    
2927        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2928          case 'W': options |= PCRE_UCP; break;
2929        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2930          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2931          case 'Z': debug_lengths = 0; break;
2932          case '8': options |= PCRE_UTF8; use_utf = 1; break;
2933          case '?': options |= PCRE_NO_UTF8_CHECK; break;
2934    
2935          case 'T':
2936          switch (*pp++)
2937            {
2938            case '0': tables = tables0; break;
2939            case '1': tables = tables1; break;
2940    
2941            case '\r':
2942            case '\n':
2943            case ' ':
2944            case 0:
2945            fprintf(outfile, "** Missing table number after /T\n");
2946            goto SKIP_DATA;
2947    
2948            default:
2949            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2950            goto SKIP_DATA;
2951            }
2952          break;
2953    
2954        case 'L':        case 'L':
2955        ppp = pp;        ppp = pp;
2956        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
2957          /* The '0' test is just in case this is an unterminated line. */
2958          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2959        *ppp = 0;        *ppp = 0;
2960        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2961          {          {
2962          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2963          goto SKIP_DATA;          goto SKIP_DATA;
2964          }          }
2965        tables = pcre_maketables();        locale_set = 1;
2966          tables = PCRE_MAKETABLES;
2967        pp = ppp;        pp = ppp;
2968        break;        break;
2969    
2970        case '\n': case ' ': break;        case '>':
2971          to_file = pp;
2972          while (*pp != 0) pp++;
2973          while (isspace(pp[-1])) pp--;
2974          *pp = 0;
2975          break;
2976    
2977          case '<':
2978            {
2979            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2980              {
2981              options |= PCRE_JAVASCRIPT_COMPAT;
2982              pp += 3;
2983              }
2984            else
2985              {
2986              int x = check_newline(pp, outfile);
2987              if (x == 0) goto SKIP_DATA;
2988              options |= x;
2989              while (*pp++ != '>');
2990              }
2991            }
2992          break;
2993    
2994          case '\r':                      /* So that it works in Windows */
2995          case '\n':
2996          case ' ':
2997          break;
2998    
2999        default:        default:
3000        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3001        goto SKIP_DATA;        goto SKIP_DATA;
# Line 517  while (!done) Line 3004  while (!done)
3004    
3005    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
3006    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
3007    local character tables. */    local character tables. Neither does it have 16-bit support. */
3008    
3009  #if !defined NOPOSIX  #if !defined NOPOSIX
3010    if (posix || do_posix)    if (posix || do_posix)
3011      {      {
3012      int rc;      int rc;
3013      int cflags = 0;      int cflags = 0;
3014    
3015      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3016      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3017        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3018        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3019        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3020        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3021        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3022    
3023        first_gotten_store = 0;
3024      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
3025    
3026      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 533  while (!done) Line 3028  while (!done)
3028    
3029      if (rc != 0)      if (rc != 0)
3030        {        {
3031        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3032        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3033        goto SKIP_DATA;        goto SKIP_DATA;
3034        }        }
# Line 545  while (!done) Line 3040  while (!done)
3040  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
3041    
3042      {      {
3043      if (timeit)      /* In 16-bit mode, convert the input. */
3044    
3045    #ifdef SUPPORT_PCRE16
3046        if (use_pcre16)
3047          {
3048          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3049            {
3050            case -1:
3051            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3052              "converted to UTF-16\n");
3053            goto SKIP_DATA;
3054    
3055            case -2:
3056            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3057              "cannot be converted to UTF-16\n");
3058            goto SKIP_DATA;
3059    
3060            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3061            fprintf(outfile, "**Failed: character value greater than 0xffff "
3062              "cannot be converted to 16-bit in non-UTF mode\n");
3063            goto SKIP_DATA;
3064    
3065            default:
3066            break;
3067            }
3068          p = (pcre_uint8 *)buffer16;
3069          }
3070    #endif
3071    
3072        /* Compile many times when timing */
3073    
3074        if (timeit > 0)
3075        {        {
3076        register int i;        register int i;
3077        clock_t time_taken;        clock_t time_taken;
3078        clock_t start_time = clock();        clock_t start_time = clock();
3079        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
3080          {          {
3081          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3082          if (re != NULL) free(re);          if (re != NULL) free(re);
3083          }          }
3084        time_taken = clock() - start_time;        time_taken = clock() - start_time;
3085        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
3086          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
3087          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
3088        }        }
3089    
3090      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
3091        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3092    
3093      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
3094      if non-interactive. */      if non-interactive. */
# Line 574  while (!done) Line 3101  while (!done)
3101          {          {
3102          for (;;)          for (;;)
3103            {            {
3104            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
3105              {              {
3106              done = 1;              done = 1;
3107              goto CONTINUE;              goto CONTINUE;
# Line 588  while (!done) Line 3115  while (!done)
3115        goto CONTINUE;        goto CONTINUE;
3116        }        }
3117    
3118      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
3119      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
3120      returns only limited data. Check that it agrees with the newer one. */      lines. */
3121    
3122      if (do_showinfo)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3123          goto SKIP_DATA;
3124        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3125    
3126        /* Extract the size for possible writing before possibly flipping it,
3127        and remember the store that was got. */
3128    
3129        true_size = ((REAL_PCRE *)re)->size;
3130        regex_gotten_store = first_gotten_store;
3131    
3132        /* Output code size information if requested */
3133    
3134        if (log_store)
3135          fprintf(outfile, "Memory allocation (code space): %d\n",
3136            (int)(first_gotten_store -
3137                  sizeof(REAL_PCRE) -
3138                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3139    
3140        /* If -s or /S was present, study the regex to generate additional info to
3141        help with the matching, unless the pattern has the SS option, which
3142        suppresses the effect of /S (used for a few test patterns where studying is
3143        never sensible). */
3144    
3145        if (do_study || (force_study >= 0 && !no_force_study))
3146        {        {
3147        int old_first_char, old_options, old_count;        if (timeit > 0)
3148        int count, backrefmax, first_char, need_char;          {
3149        size_t size;          register int i;
3150            clock_t time_taken;
3151        if (do_debug) print_internals(re);          clock_t start_time = clock();
3152            for (i = 0; i < timeit; i++)
3153        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);            {
3154        new_info(re, NULL, PCRE_INFO_SIZE, &size);            PCRE_STUDY(extra, re, study_options, &error);
3155        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            }
3156        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);          time_taken = clock() - start_time;
3157        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);          if (extra != NULL)
3158        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            {
3159              PCRE_FREE_STUDY(extra);
3160        old_count = pcre_info(re, &old_options, &old_first_char);            }
3161        if (count < 0) fprintf(outfile,          fprintf(outfile, "  Study time %.4f milliseconds\n",
3162          "Error %d from pcre_info()\n", count);            (((double)time_taken * 1000.0) / (double)timeit) /
3163        else              (double)CLOCKS_PER_SEC);
3164            }
3165          PCRE_STUDY(extra, re, study_options, &error);
3166          if (error != NULL)
3167            fprintf(outfile, "Failed to study: %s\n", error);
3168          else if (extra != NULL)
3169          {          {
3170          if (old_count != count) fprintf(outfile,          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3171            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,          if (log_store)
3172              old_count);            {
3173              size_t jitsize;
3174              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3175                  jitsize != 0)
3176                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3177              }
3178            }
3179          }
3180    
3181          if (old_first_char != first_char) fprintf(outfile,      /* If /K was present, we set up for handling MARK data. */
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
3182    
3183          if (old_options != options) fprintf(outfile,      if (do_mark)
3184            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,        {
3185              old_options);        if (extra == NULL)
3186            {
3187            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3188            extra->flags = 0;
3189          }          }
3190          extra->mark = &markptr;
3191          extra->flags |= PCRE_EXTRA_MARK;
3192          }
3193    
3194        /* Extract and display information from the compiled data if required. */
3195    
3196        SHOW_INFO:
3197    
3198        if (do_debug)
3199          {
3200          fprintf(outfile, "------------------------------------------------------------------\n");
3201          PCRE_PRINTINT(re, outfile, debug_lengths);
3202          }
3203    
3204        /* We already have the options in get_options (see above) */
3205    
3206        if (do_showinfo)
3207          {
3208          unsigned long int all_options;
3209          int count, backrefmax, first_char, need_char, okpartial, jchanged,
3210            hascrorlf, maxlookbehind;
3211          int nameentrysize, namecount;
3212          const pcre_uint8 *nametable;
3213    
3214          if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3215              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3216              new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3217              new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3218              new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3219              new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3220              new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3221              new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3222              new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3223              new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3224              new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3225              new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3226              != 0)
3227            goto SKIP_DATA;
3228    
3229        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3230          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3231          size, gotten_store);          (int)size, (int)regex_gotten_store);
3232    
3233        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
3234        if (backrefmax > 0)        if (backrefmax > 0)
3235          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
       if (options == 0) fprintf(outfile, "No options\n");  
         else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",  
           ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
           ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
           ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
           ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
           ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
           ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
           ((options & PCRE_EXTRA) != 0)? " extra" : "",  
           ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
3236    
3237        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (namecount > 0)
3238          fprintf(outfile, "Case state changes\n");          {
3239            fprintf(outfile, "Named capturing subpatterns:\n");
3240            while (namecount-- > 0)
3241              {
3242    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3243              int imm2_size = use_pcre16 ? 1 : 2;
3244    #else
3245              int imm2_size = IMM2_SIZE;
3246    #endif
3247              int length = (int)STRLEN(nametable + imm2_size);
3248              fprintf(outfile, "  ");
3249              PCHARSV(nametable, imm2_size, length, outfile);
3250              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3251    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3252              fprintf(outfile, "%3d\n", use_pcre16?
3253                 (int)(((PCRE_SPTR16)nametable)[0])
3254                :((int)nametable[0] << 8) | (int)nametable[1]);
3255              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3256    #else
3257              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3258    #ifdef SUPPORT_PCRE8
3259              nametable += nameentrysize;
3260    #else
3261              nametable += nameentrysize * 2;
3262    #endif
3263    #endif
3264              }
3265            }
3266    
3267          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3268          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3269    
3270          all_options = ((REAL_PCRE *)re)->options;
3271          if (do_flip) all_options = swap_uint32(all_options);
3272    
3273          if (get_options == 0) fprintf(outfile, "No options\n");
3274            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3275              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3276              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3277              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3278              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3279              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3280              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3281              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3282              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3283              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3284              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3285              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3286              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3287              ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3288              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3289              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3290              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3291              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3292    
3293          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3294    
3295          switch (get_options & PCRE_NEWLINE_BITS)
3296            {
3297            case PCRE_NEWLINE_CR:
3298            fprintf(outfile, "Forced newline sequence: CR\n");
3299            break;
3300    
3301            case PCRE_NEWLINE_LF:
3302            fprintf(outfile, "Forced newline sequence: LF\n");
3303            break;
3304    
3305            case PCRE_NEWLINE_CRLF:
3306            fprintf(outfile, "Forced newline sequence: CRLF\n");
3307            break;
3308    
3309            case PCRE_NEWLINE_ANYCRLF:
3310            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3311            break;
3312    
3313            case PCRE_NEWLINE_ANY:
3314            fprintf(outfile, "Forced newline sequence: ANY\n");
3315            break;
3316    
3317            default:
3318            break;
3319            }
3320    
3321        if (first_char == -1)        if (first_char == -1)
3322          {          {
3323          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
3324          }          }
3325        else if (first_char < 0)        else if (first_char < 0)
3326          {          {
# Line 656  while (!done) Line 3328  while (!done)
3328          }          }
3329        else        else
3330          {          {
3331          if (isprint(first_char))          const char *caseless =
3332            fprintf(outfile, "First char = \'%c\'\n", first_char);            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3333              "" : " (caseless)";
3334    
3335            if (PRINTOK(first_char))
3336              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3337          else          else
3338            fprintf(outfile, "First char = %d\n", first_char);            {
3339              fprintf(outfile, "First char = ");
3340              pchar(first_char, outfile);
3341              fprintf(outfile, "%s\n", caseless);
3342              }
3343          }          }
3344    
3345        if (need_char < 0)        if (need_char < 0)
# Line 668  while (!done) Line 3348  while (!done)
3348          }          }
3349        else        else
3350          {          {
3351          if (isprint(need_char))          const char *caseless =
3352            fprintf(outfile, "Need char = \'%c\'\n", need_char);            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3353              "" : " (caseless)";
3354    
3355            if (PRINTOK(need_char))
3356              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3357            else
3358              {
3359              fprintf(outfile, "Need char = ");
3360              pchar(need_char, outfile);
3361              fprintf(outfile, "%s\n", caseless);
3362              }
3363            }
3364    
3365          if (maxlookbehind > 0)
3366            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3367    
3368          /* Don't output study size; at present it is in any case a fixed
3369          value, but it varies, depending on the computer architecture, and
3370          so messes up the test suite. (And with the /F option, it might be
3371          flipped.) If study was forced by an external -s, don't show this
3372          information unless -i or -d was also present. This means that, except
3373          when auto-callouts are involved, the output from runs with and without
3374          -s should be identical. */
3375    
3376          if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3377            {
3378            if (extra == NULL)
3379              fprintf(outfile, "Study returned NULL\n");
3380          else          else
3381            fprintf(outfile, "Need char = %d\n", need_char);            {
3382              pcre_uint8 *start_bits = NULL;
3383              int minlength;
3384    
3385              if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3386                fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3387    
3388              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3389                {
3390                if (start_bits == NULL)
3391                  fprintf(outfile, "No set of starting bytes\n");
3392                else
3393                  {
3394                  int i;
3395                  int c = 24;
3396                  fprintf(outfile, "Starting byte set: ");
3397                  for (i = 0; i < 256; i++)
3398                    {
3399                    if ((start_bits[i/8] & (1<<(i&7))) != 0)
3400                      {
3401                      if (c > 75)
3402                        {
3403                        fprintf(outfile, "\n  ");
3404                        c = 2;
3405                        }
3406                      if (PRINTOK(i) && i != ' ')
3407                        {
3408                        fprintf(outfile, "%c ", i);
3409                        c += 2;
3410                        }
3411                      else
3412                        {
3413                        fprintf(outfile, "\\x%02x ", i);
3414                        c += 5;
3415                        }
3416                      }
3417                    }
3418                  fprintf(outfile, "\n");
3419                  }
3420                }
3421              }
3422    
3423            /* Show this only if the JIT was set by /S, not by -s. */
3424    
3425            if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3426                (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3427              {
3428              int jit;
3429              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3430                {
3431                if (jit)
3432                  fprintf(outfile, "JIT study was successful\n");
3433                else
3434    #ifdef SUPPORT_JIT
3435                  fprintf(outfile, "JIT study was not successful\n");
3436    #else
3437                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3438    #endif
3439                }
3440              }
3441          }          }
3442        }        }
3443    
3444      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
3445      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
3446        the study length, in big-endian order. */
3447    
3448      if (do_study)      if (to_file != NULL)
3449        {        {
3450        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
3451          if (f == NULL)
3452          {          {
3453          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           ((double)time_taken * 1000.0)/  
           ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
3454          }          }
3455          else
3456            {
3457            pcre_uint8 sbuf[8];
3458    
3459        extra = pcre_study(re, study_options, &error);          if (do_flip) regexflip(re, extra);
3460        if (error != NULL)          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3461          fprintf(outfile, "Failed to study: %s\n", error);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3462        else if (extra == NULL)          sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3463          fprintf(outfile, "Study returned NULL\n");          sbuf[3] = (pcre_uint8)((true_size) & 255);
3464            sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3465           &n