/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 43 by nigel, Sat Feb 24 21:39:21 2007 UTC revision 929 by zherczeg, Fri Feb 24 11:07:47 2012 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10    -----------------------------------------------------------------------------
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are met:
13    
14        * Redistributions of source code must retain the above copyright notice,
15          this list of conditions and the following disclaimer.
16    
17        * Redistributions in binary form must reproduce the above copyright
18          notice, this list of conditions and the following disclaimer in the
19          documentation and/or other materials provided with the distribution.
20    
21        * Neither the name of the University of Cambridge nor the names of its
22          contributors may be used to endorse or promote products derived from
23          this software without specific prior written permission.
24    
25    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35    POSSIBILITY OF SUCH DAMAGE.
36    -----------------------------------------------------------------------------
37    */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50    #ifdef HAVE_CONFIG_H
51    #include "config.h"
52    #endif
53    
54  #include <ctype.h>  #include <ctype.h>
55  #include <stdio.h>  #include <stdio.h>
56  #include <string.h>  #include <string.h>
57  #include <stdlib.h>  #include <stdlib.h>
58  #include <time.h>  #include <time.h>
59  #include <locale.h>  #include <locale.h>
60    #include <errno.h>
61    
62    #ifdef SUPPORT_LIBREADLINE
63    #ifdef HAVE_UNISTD_H
64    #include <unistd.h>
65    #endif
66    #include <readline/readline.h>
67    #include <readline/history.h>
68    #endif
69    
70    
71    /* A number of things vary for Windows builds. Originally, pcretest opened its
72    input and output without "b"; then I was told that "b" was needed in some
73    environments, so it was added for release 5.0 to both the input and output. (It
74    makes no difference on Unix-like systems.) Later I was told that it is wrong
75    for the input on Windows. I've now abstracted the modes into two macros that
76    are set here, to make it easier to fiddle with them, and removed "b" from the
77    input mode under Windows. */
78    
79    #if defined(_WIN32) || defined(WIN32)
80    #include <io.h>                /* For _setmode() */
81    #include <fcntl.h>             /* For _O_BINARY */
82    #define INPUT_MODE   "r"
83    #define OUTPUT_MODE  "wb"
84    
85    #ifndef isatty
86    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
87    #endif                         /* though in some environments they seem to   */
88                                   /* be already defined, hence the #ifndefs.    */
89    #ifndef fileno
90    #define fileno _fileno
91    #endif
92    
93    /* A user sent this fix for Borland Builder 5 under Windows. */
94    
95  /* Use the internal info for displaying the results of pcre_study(). */  #ifdef __BORLANDC__
96    #define _setmode(handle, mode) setmode(handle, mode)
97    #endif
98    
99  #include "internal.h"  /* Not Windows */
100    
101    #else
102    #include <sys/time.h>          /* These two includes are needed */
103    #include <sys/resource.h>      /* for setrlimit(). */
104    #define INPUT_MODE   "rb"
105    #define OUTPUT_MODE  "wb"
106    #endif
107    
108    #define PRIV(name) name
109    
110    /* We have to include pcre_internal.h because we need the internal info for
111    displaying the results of pcre_study() and we also need to know about the
112    internal macros, structures, and other internal data values; pcretest has
113    "inside information" compared to a program that strictly follows the PCRE API.
114    
115    Although pcre_internal.h does itself include pcre.h, we explicitly include it
116    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
117    appropriately for an application, not for building PCRE. */
118    
119    #include "pcre.h"
120    
121    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
122    /* Configure internal macros to 16 bit mode. */
123    #define COMPILE_PCRE16
124    #endif
125    
126    #include "pcre_internal.h"
127    
128    /* The pcre_printint() function, which prints the internal form of a compiled
129    regex, is held in a separate file so that (a) it can be compiled in either
130    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
131    when that is compiled in debug mode. */
132    
133    #ifdef SUPPORT_PCRE8
134    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
135    #endif
136    #ifdef SUPPORT_PCRE16
137    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
138    #endif
139    
140    /* We need access to some of the data tables that PCRE uses. So as not to have
141    to keep two copies, we include the source file here, changing the names of the
142    external symbols to prevent clashes. */
143    
144    #define PCRE_INCLUDED
145    
146    #include "pcre_tables.c"
147    
148    /* The definition of the macro PRINTABLE, which determines whether to print an
149    output character as-is or as a hex value when showing compiled patterns, is
150    the same as in the printint.src file. We uses it here in cases when the locale
151    has not been explicitly changed, so as to get consistent output from systems
152    that differ in their output from isprint() even in the "C" locale. */
153    
154    #ifdef EBCDIC
155    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
156    #else
157    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
158    #endif
159    
160    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
161    
162    /* Posix support is disabled in 16 bit only mode. */
163    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
164    #define NOPOSIX
165    #endif
166    
167  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
168  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 172  Makefile. */
172  #include "pcreposix.h"  #include "pcreposix.h"
173  #endif  #endif
174    
175    /* It is also possible, originally for the benefit of a version that was
176    imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
177    NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
178    automatically cut out the UTF support if PCRE is built without it. */
179    
180    #ifndef SUPPORT_UTF
181    #ifndef NOUTF
182    #define NOUTF
183    #endif
184    #endif
185    
186    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
187    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
188    only from one place and is handled differently). I couldn't dream up any way of
189    using a single macro to do this in a generic way, because of the many different
190    argument requirements. We know that at least one of SUPPORT_PCRE8 and
191    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
192    use these in the definitions of generic macros.
193    
194    **** Special note about the PCHARSxxx macros: the address of the string to be
195    printed is always given as two arguments: a base address followed by an offset.
196    The base address is cast to the correct data size for 8 or 16 bit data; the
197    offset is in units of this size. If the string were given as base+offset in one
198    argument, the casting might be incorrectly applied. */
199    
200    #ifdef SUPPORT_PCRE8
201    
202    #define PCHARS8(lv, p, offset, len, f) \
203      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
204    
205    #define PCHARSV8(p, offset, len, f) \
206      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
207    
208    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
209      p = read_capture_name8(p, cn8, re)
210    
211    #define STRLEN8(p) ((int)strlen((char *)p))
212    
213    #define SET_PCRE_CALLOUT8(callout) \
214      pcre_callout = callout
215    
216    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
217       pcre_assign_jit_stack(extra, callback, userdata)
218    
219    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
220      re = pcre_compile((char *)pat, options, error, erroffset, tables)
221    
222    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
223        namesptr, cbuffer, size) \
224      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
225        (char *)namesptr, cbuffer, size)
226    
227    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
228      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
229    
230    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
231        offsets, size_offsets, workspace, size_workspace) \
232      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
233        offsets, size_offsets, workspace, size_workspace)
234    
235    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
236        offsets, size_offsets) \
237      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
238        offsets, size_offsets)
239    
240    #define PCRE_FREE_STUDY8(extra) \
241      pcre_free_study(extra)
242    
243    #define PCRE_FREE_SUBSTRING8(substring) \
244      pcre_free_substring(substring)
245    
246    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
247      pcre_free_substring_list(listptr)
248    
249    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
250        getnamesptr, subsptr) \
251      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
252        (char *)getnamesptr, subsptr)
253    
254    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
255      n = pcre_get_stringnumber(re, (char *)ptr)
256    
257    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
258      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
259    
260    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
261      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
262    
263    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
264      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
265    
266    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
267      pcre_printint(re, outfile, debug_lengths)
268    
269    #define PCRE_STUDY8(extra, re, options, error) \
270      extra = pcre_study(re, options, error)
271    
272    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
273      pcre_jit_stack_alloc(startsize, maxsize)
274    
275    #define PCRE_JIT_STACK_FREE8(stack) \
276      pcre_jit_stack_free(stack)
277    
278    #endif /* SUPPORT_PCRE8 */
279    
280    /* -----------------------------------------------------------*/
281    
282    #ifdef SUPPORT_PCRE16
283    
284    #define PCHARS16(lv, p, offset, len, f) \
285      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
286    
287    #define PCHARSV16(p, offset, len, f) \
288      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
289    
290    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
291      p = read_capture_name16(p, cn16, re)
292    
293    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
294    
295    #define SET_PCRE_CALLOUT16(callout) \
296      pcre16_callout = (int (*)(pcre16_callout_block *))callout
297    
298    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
299      pcre16_assign_jit_stack((pcre16_extra *)extra, \
300        (pcre16_jit_callback)callback, userdata)
301    
302    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
303      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
304        tables)
305    
306    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
307        namesptr, cbuffer, size) \
308      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
309        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
310    
311    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
312      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
313        (PCRE_UCHAR16 *)cbuffer, size/2)
314    
315    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
316        offsets, size_offsets, workspace, size_workspace) \
317      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
318        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
319        workspace, size_workspace)
320    
321    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
322        offsets, size_offsets) \
323      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
324        len, start_offset, options, offsets, size_offsets)
325    
326    #define PCRE_FREE_STUDY16(extra) \
327      pcre16_free_study((pcre16_extra *)extra)
328    
329    #define PCRE_FREE_SUBSTRING16(substring) \
330      pcre16_free_substring((PCRE_SPTR16)substring)
331    
332    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
333      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
334    
335    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
336        getnamesptr, subsptr) \
337      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
338        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
339    
340    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
341      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
342    
343    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
344      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
345        (PCRE_SPTR16 *)(void*)subsptr)
346    
347    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
348      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
349        (PCRE_SPTR16 **)(void*)listptr)
350    
351    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
352      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
353        tables)
354    
355    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
356      pcre16_printint(re, outfile, debug_lengths)
357    
358    #define PCRE_STUDY16(extra, re, options, error) \
359      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
360    
361    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
362      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
363    
364    #define PCRE_JIT_STACK_FREE16(stack) \
365      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
366    
367    #endif /* SUPPORT_PCRE16 */
368    
369    
370    /* ----- Both modes are supported; a runtime test is needed, except for
371    pcre_config(), and the JIT stack functions, when it doesn't matter which
372    version is called. ----- */
373    
374    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
375    
376    #define CHAR_SIZE (use_pcre16? 2:1)
377    
378    #define PCHARS(lv, p, offset, len, f) \
379      if (use_pcre16) \
380        PCHARS16(lv, p, offset, len, f); \
381      else \
382        PCHARS8(lv, p, offset, len, f)
383    
384    #define PCHARSV(p, offset, len, f) \
385      if (use_pcre16) \
386        PCHARSV16(p, offset, len, f); \
387      else \
388        PCHARSV8(p, offset, len, f)
389    
390    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
391      if (use_pcre16) \
392        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
393      else \
394        READ_CAPTURE_NAME8(p, cn8, cn16, re)
395    
396    #define SET_PCRE_CALLOUT(callout) \
397      if (use_pcre16) \
398        SET_PCRE_CALLOUT16(callout); \
399      else \
400        SET_PCRE_CALLOUT8(callout)
401    
402    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
403    
404    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
405      if (use_pcre16) \
406        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
407      else \
408        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
409    
410    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
411      if (use_pcre16) \
412        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
413      else \
414        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
415    
416    #define PCRE_CONFIG pcre_config
417    
418    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
419        namesptr, cbuffer, size) \
420      if (use_pcre16) \
421        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
422          namesptr, cbuffer, size); \
423      else \
424        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
425          namesptr, cbuffer, size)
426    
427    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
428      if (use_pcre16) \
429        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
430      else \
431        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
432    
433    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
434        offsets, size_offsets, workspace, size_workspace) \
435      if (use_pcre16) \
436        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
437          offsets, size_offsets, workspace, size_workspace); \
438      else \
439        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
440          offsets, size_offsets, workspace, size_workspace)
441    
442    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
443        offsets, size_offsets) \
444      if (use_pcre16) \
445        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
446          offsets, size_offsets); \
447      else \
448        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
449          offsets, size_offsets)
450    
451    #define PCRE_FREE_STUDY(extra) \
452      if (use_pcre16) \
453        PCRE_FREE_STUDY16(extra); \
454      else \
455        PCRE_FREE_STUDY8(extra)
456    
457    #define PCRE_FREE_SUBSTRING(substring) \
458      if (use_pcre16) \
459        PCRE_FREE_SUBSTRING16(substring); \
460      else \
461        PCRE_FREE_SUBSTRING8(substring)
462    
463    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
464      if (use_pcre16) \
465        PCRE_FREE_SUBSTRING_LIST16(listptr); \
466      else \
467        PCRE_FREE_SUBSTRING_LIST8(listptr)
468    
469    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
470        getnamesptr, subsptr) \
471      if (use_pcre16) \
472        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
473          getnamesptr, subsptr); \
474      else \
475        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
476          getnamesptr, subsptr)
477    
478    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
479      if (use_pcre16) \
480        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
481      else \
482        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
483    
484    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
485      if (use_pcre16) \
486        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
487      else \
488        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
489    
490    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
491      if (use_pcre16) \
492        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
493      else \
494        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
495    
496    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
497      (use_pcre16 ? \
498         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
499        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
500    
501    #define PCRE_JIT_STACK_FREE(stack) \
502      if (use_pcre16) \
503        PCRE_JIT_STACK_FREE16(stack); \
504      else \
505        PCRE_JIT_STACK_FREE8(stack)
506    
507    #define PCRE_MAKETABLES \
508      (use_pcre16? pcre16_maketables() : pcre_maketables())
509    
510    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
511      if (use_pcre16) \
512        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
513      else \
514        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
515    
516    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
517      if (use_pcre16) \
518        PCRE_PRINTINT16(re, outfile, debug_lengths); \
519      else \
520        PCRE_PRINTINT8(re, outfile, debug_lengths)
521    
522    #define PCRE_STUDY(extra, re, options, error) \
523      if (use_pcre16) \
524        PCRE_STUDY16(extra, re, options, error); \
525      else \
526        PCRE_STUDY8(extra, re, options, error)
527    
528    /* ----- Only 8-bit mode is supported ----- */
529    
530    #elif defined SUPPORT_PCRE8
531    #define CHAR_SIZE                 1
532    #define PCHARS                    PCHARS8
533    #define PCHARSV                   PCHARSV8
534    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
535    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
536    #define STRLEN                    STRLEN8
537    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
538    #define PCRE_COMPILE              PCRE_COMPILE8
539    #define PCRE_CONFIG               pcre_config
540    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
541    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
542    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
543    #define PCRE_EXEC                 PCRE_EXEC8
544    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
545    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
546    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
547    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
548    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
549    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
550    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
551    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
552    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
553    #define PCRE_MAKETABLES           pcre_maketables()
554    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
555    #define PCRE_PRINTINT             PCRE_PRINTINT8
556    #define PCRE_STUDY                PCRE_STUDY8
557    
558    /* ----- Only 16-bit mode is supported ----- */
559    
560    #else
561    #define CHAR_SIZE                 2
562    #define PCHARS                    PCHARS16
563    #define PCHARSV                   PCHARSV16
564    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
565    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
566    #define STRLEN                    STRLEN16
567    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
568    #define PCRE_COMPILE              PCRE_COMPILE16
569    #define PCRE_CONFIG               pcre16_config
570    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
571    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
572    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
573    #define PCRE_EXEC                 PCRE_EXEC16
574    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
575    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
576    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
577    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
578    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
579    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
580    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
581    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
582    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
583    #define PCRE_MAKETABLES           pcre16_maketables()
584    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
585    #define PCRE_PRINTINT             PCRE_PRINTINT16
586    #define PCRE_STUDY                PCRE_STUDY16
587    #endif
588    
589    /* ----- End of mode-specific function call macros ----- */
590    
591    
592    /* Other parameters */
593    
594  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
595  #ifdef CLK_TCK  #ifdef CLK_TCK
596  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 599  Makefile. */
599  #endif  #endif
600  #endif  #endif
601    
602  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
603    
604    #define LOOPREPEAT 500000
605    
606    /* Static variables */
607    
608    static FILE *outfile;
609    static int log_store = 0;
610    static int callout_count;
611    static int callout_extra;
612    static int callout_fail_count;
613    static int callout_fail_id;
614    static int debug_lengths;
615    static int first_callout;
616    static int jit_was_used;
617    static int locale_set = 0;
618    static int show_malloc;
619    static int use_utf;
620    static size_t gotten_store;
621    static size_t first_gotten_store = 0;
622    static const unsigned char *last_callout_mark = NULL;
623    
624    /* The buffers grow automatically if very long input lines are encountered. */
625    
626    static int buffer_size = 50000;
627    static pcre_uint8 *buffer = NULL;
628    static pcre_uint8 *dbuffer = NULL;
629    static pcre_uint8 *pbuffer = NULL;
630    
631    /* Another buffer is needed translation to 16-bit character strings. It will
632    obtained and extended as required. */
633    
634    #ifdef SUPPORT_PCRE16
635    static int buffer16_size = 0;
636    static pcre_uint16 *buffer16 = NULL;
637    
638    #ifdef SUPPORT_PCRE8
639    
640    /* We need the table of operator lengths that is used for 16-bit compiling, in
641    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
642    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
643    appropriately for the 16-bit world. Just as a safety check, make sure that
644    COMPILE_PCRE16 is *not* set. */
645    
646    #ifdef COMPILE_PCRE16
647    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
648    #endif
649    
650    #if LINK_SIZE == 2
651    #undef LINK_SIZE
652    #define LINK_SIZE 1
653    #elif LINK_SIZE == 3 || LINK_SIZE == 4
654    #undef LINK_SIZE
655    #define LINK_SIZE 2
656    #else
657    #error LINK_SIZE must be either 2, 3, or 4
658    #endif
659    
660    #undef IMM2_SIZE
661    #define IMM2_SIZE 1
662    
663    #endif /* SUPPORT_PCRE8 */
664    
665    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
666    #endif  /* SUPPORT_PCRE16 */
667    
668    /* If we have 8-bit support, default use_pcre16 to false; if there is also
669    16-bit support, it can be changed by an option. If there is no 8-bit support,
670    there must be 16-bit support, so default it to 1. */
671    
672    #ifdef SUPPORT_PCRE8
673    static int use_pcre16 = 0;
674    #else
675    static int use_pcre16 = 1;
676    #endif
677    
678    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
679    
680    static int jit_study_bits[] =
681      {
682      PCRE_STUDY_JIT_COMPILE,
683      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
684      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
685      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
686      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
687      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
688      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
689        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
690    };
691    
692    /* Textual explanations for runtime error codes */
693    
694    static const char *errtexts[] = {
695      NULL,  /* 0 is no error */
696      NULL,  /* NOMATCH is handled specially */
697      "NULL argument passed",
698      "bad option value",
699      "magic number missing",
700      "unknown opcode - pattern overwritten?",
701      "no more memory",
702      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
703      "match limit exceeded",
704      "callout error code",
705      NULL,  /* BADUTF8/16 is handled specially */
706      NULL,  /* BADUTF8/16 offset is handled specially */
707      NULL,  /* PARTIAL is handled specially */
708      "not used - internal error",
709      "internal error - pattern overwritten?",
710      "bad count value",
711      "item unsupported for DFA matching",
712      "backreference condition or recursion test not supported for DFA matching",
713      "match limit not supported for DFA matching",
714      "workspace size exceeded in DFA matching",
715      "too much recursion for DFA matching",
716      "recursion limit exceeded",
717      "not used - internal error",
718      "invalid combination of newline options",
719      "bad offset value",
720      NULL,  /* SHORTUTF8/16 is handled specially */
721      "nested recursion at the same subject position",
722      "JIT stack limit reached",
723      "pattern compiled in wrong mode: 8-bit/16-bit error"
724    };
725    
726    
727    /*************************************************
728    *         Alternate character tables             *
729    *************************************************/
730    
731    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
732    using the default tables of the library. However, the T option can be used to
733    select alternate sets of tables, for different kinds of testing. Note also that
734    the L (locale) option also adjusts the tables. */
735    
736    /* This is the set of tables distributed as default with PCRE. It recognizes
737    only ASCII characters. */
738    
739    static const pcre_uint8 tables0[] = {
740    
741    /* This table is a lower casing table. */
742    
743        0,  1,  2,  3,  4,  5,  6,  7,
744        8,  9, 10, 11, 12, 13, 14, 15,
745       16, 17, 18, 19, 20, 21, 22, 23,
746       24, 25, 26, 27, 28, 29, 30, 31,
747       32, 33, 34, 35, 36, 37, 38, 39,
748       40, 41, 42, 43, 44, 45, 46, 47,
749       48, 49, 50, 51, 52, 53, 54, 55,
750       56, 57, 58, 59, 60, 61, 62, 63,
751       64, 97, 98, 99,100,101,102,103,
752      104,105,106,107,108,109,110,111,
753      112,113,114,115,116,117,118,119,
754      120,121,122, 91, 92, 93, 94, 95,
755       96, 97, 98, 99,100,101,102,103,
756      104,105,106,107,108,109,110,111,
757      112,113,114,115,116,117,118,119,
758      120,121,122,123,124,125,126,127,
759      128,129,130,131,132,133,134,135,
760      136,137,138,139,140,141,142,143,
761      144,145,146,147,148,149,150,151,
762      152,153,154,155,156,157,158,159,
763      160,161,162,163,164,165,166,167,
764      168,169,170,171,172,173,174,175,
765      176,177,178,179,180,181,182,183,
766      184,185,186,187,188,189,190,191,
767      192,193,194,195,196,197,198,199,
768      200,201,202,203,204,205,206,207,
769      208,209,210,211,212,213,214,215,
770      216,217,218,219,220,221,222,223,
771      224,225,226,227,228,229,230,231,
772      232,233,234,235,236,237,238,239,
773      240,241,242,243,244,245,246,247,
774      248,249,250,251,252,253,254,255,
775    
776    /* This table is a case flipping table. */
777    
778        0,  1,  2,  3,  4,  5,  6,  7,
779        8,  9, 10, 11, 12, 13, 14, 15,
780       16, 17, 18, 19, 20, 21, 22, 23,
781       24, 25, 26, 27, 28, 29, 30, 31,
782       32, 33, 34, 35, 36, 37, 38, 39,
783       40, 41, 42, 43, 44, 45, 46, 47,
784       48, 49, 50, 51, 52, 53, 54, 55,
785       56, 57, 58, 59, 60, 61, 62, 63,
786       64, 97, 98, 99,100,101,102,103,
787      104,105,106,107,108,109,110,111,
788      112,113,114,115,116,117,118,119,
789      120,121,122, 91, 92, 93, 94, 95,
790       96, 65, 66, 67, 68, 69, 70, 71,
791       72, 73, 74, 75, 76, 77, 78, 79,
792       80, 81, 82, 83, 84, 85, 86, 87,
793       88, 89, 90,123,124,125,126,127,
794      128,129,130,131,132,133,134,135,
795      136,137,138,139,140,141,142,143,
796      144,145,146,147,148,149,150,151,
797      152,153,154,155,156,157,158,159,
798      160,161,162,163,164,165,166,167,
799      168,169,170,171,172,173,174,175,
800      176,177,178,179,180,181,182,183,
801      184,185,186,187,188,189,190,191,
802      192,193,194,195,196,197,198,199,
803      200,201,202,203,204,205,206,207,
804      208,209,210,211,212,213,214,215,
805      216,217,218,219,220,221,222,223,
806      224,225,226,227,228,229,230,231,
807      232,233,234,235,236,237,238,239,
808      240,241,242,243,244,245,246,247,
809      248,249,250,251,252,253,254,255,
810    
811    /* This table contains bit maps for various character classes. Each map is 32
812    bytes long and the bits run from the least significant end of each byte. The
813    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
814    graph, print, punct, and cntrl. Other classes are built from combinations. */
815    
816      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
817      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820    
821      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
822      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
823      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
824      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825    
826      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
827      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
828      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
829      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830    
831      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
832      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
833      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
834      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835    
836      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
837      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
838      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
839      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840    
841      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
842      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
843      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845    
846      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
847      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850    
851      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
852      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
853      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
855    
856      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
857      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
858      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
859      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
860    
861      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
862      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
863      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
864      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
865    
866    /* This table identifies various classes of character by individual bits:
867      0x01   white space character
868      0x02   letter
869      0x04   decimal digit
870      0x08   hexadecimal digit
871      0x10   alphanumeric or '_'
872      0x80   regular expression metacharacter or binary zero
873    */
874    
875      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
876      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
877      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
878      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
879      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
880      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
881      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
882      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
883      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
884      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
885      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
886      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
887      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
888      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
889      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
890      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
891      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
892      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
893      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
894      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
895      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
896      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
897      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
898      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
899      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
900      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
901      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
902      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
903      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
904      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
905      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
906      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
907    
908    /* This is a set of tables that came orginally from a Windows user. It seems to
909    be at least an approximation of ISO 8859. In particular, there are characters
910    greater than 128 that are marked as spaces, letters, etc. */
911    
912    static const pcre_uint8 tables1[] = {
913    0,1,2,3,4,5,6,7,
914    8,9,10,11,12,13,14,15,
915    16,17,18,19,20,21,22,23,
916    24,25,26,27,28,29,30,31,
917    32,33,34,35,36,37,38,39,
918    40,41,42,43,44,45,46,47,
919    48,49,50,51,52,53,54,55,
920    56,57,58,59,60,61,62,63,
921    64,97,98,99,100,101,102,103,
922    104,105,106,107,108,109,110,111,
923    112,113,114,115,116,117,118,119,
924    120,121,122,91,92,93,94,95,
925    96,97,98,99,100,101,102,103,
926    104,105,106,107,108,109,110,111,
927    112,113,114,115,116,117,118,119,
928    120,121,122,123,124,125,126,127,
929    128,129,130,131,132,133,134,135,
930    136,137,138,139,140,141,142,143,
931    144,145,146,147,148,149,150,151,
932    152,153,154,155,156,157,158,159,
933    160,161,162,163,164,165,166,167,
934    168,169,170,171,172,173,174,175,
935    176,177,178,179,180,181,182,183,
936    184,185,186,187,188,189,190,191,
937    224,225,226,227,228,229,230,231,
938    232,233,234,235,236,237,238,239,
939    240,241,242,243,244,245,246,215,
940    248,249,250,251,252,253,254,223,
941    224,225,226,227,228,229,230,231,
942    232,233,234,235,236,237,238,239,
943    240,241,242,243,244,245,246,247,
944    248,249,250,251,252,253,254,255,
945    0,1,2,3,4,5,6,7,
946    8,9,10,11,12,13,14,15,
947    16,17,18,19,20,21,22,23,
948    24,25,26,27,28,29,30,31,
949    32,33,34,35,36,37,38,39,
950    40,41,42,43,44,45,46,47,
951    48,49,50,51,52,53,54,55,
952    56,57,58,59,60,61,62,63,
953    64,97,98,99,100,101,102,103,
954    104,105,106,107,108,109,110,111,
955    112,113,114,115,116,117,118,119,
956    120,121,122,91,92,93,94,95,
957    96,65,66,67,68,69,70,71,
958    72,73,74,75,76,77,78,79,
959    80,81,82,83,84,85,86,87,
960    88,89,90,123,124,125,126,127,
961    128,129,130,131,132,133,134,135,
962    136,137,138,139,140,141,142,143,
963    144,145,146,147,148,149,150,151,
964    152,153,154,155,156,157,158,159,
965    160,161,162,163,164,165,166,167,
966    168,169,170,171,172,173,174,175,
967    176,177,178,179,180,181,182,183,
968    184,185,186,187,188,189,190,191,
969    224,225,226,227,228,229,230,231,
970    232,233,234,235,236,237,238,239,
971    240,241,242,243,244,245,246,215,
972    248,249,250,251,252,253,254,223,
973    192,193,194,195,196,197,198,199,
974    200,201,202,203,204,205,206,207,
975    208,209,210,211,212,213,214,247,
976    216,217,218,219,220,221,222,255,
977    0,62,0,0,1,0,0,0,
978    0,0,0,0,0,0,0,0,
979    32,0,0,0,1,0,0,0,
980    0,0,0,0,0,0,0,0,
981    0,0,0,0,0,0,255,3,
982    126,0,0,0,126,0,0,0,
983    0,0,0,0,0,0,0,0,
984    0,0,0,0,0,0,0,0,
985    0,0,0,0,0,0,255,3,
986    0,0,0,0,0,0,0,0,
987    0,0,0,0,0,0,12,2,
988    0,0,0,0,0,0,0,0,
989    0,0,0,0,0,0,0,0,
990    254,255,255,7,0,0,0,0,
991    0,0,0,0,0,0,0,0,
992    255,255,127,127,0,0,0,0,
993    0,0,0,0,0,0,0,0,
994    0,0,0,0,254,255,255,7,
995    0,0,0,0,0,4,32,4,
996    0,0,0,128,255,255,127,255,
997    0,0,0,0,0,0,255,3,
998    254,255,255,135,254,255,255,7,
999    0,0,0,0,0,4,44,6,
1000    255,255,127,255,255,255,127,255,
1001    0,0,0,0,254,255,255,255,
1002    255,255,255,255,255,255,255,127,
1003    0,0,0,0,254,255,255,255,
1004    255,255,255,255,255,255,255,255,
1005    0,2,0,0,255,255,255,255,
1006    255,255,255,255,255,255,255,127,
1007    0,0,0,0,255,255,255,255,
1008    255,255,255,255,255,255,255,255,
1009    0,0,0,0,254,255,0,252,
1010    1,0,0,248,1,0,0,120,
1011    0,0,0,0,254,255,255,255,
1012    0,0,128,0,0,0,128,0,
1013    255,255,255,255,0,0,0,0,
1014    0,0,0,0,0,0,0,128,
1015    255,255,255,255,0,0,0,0,
1016    0,0,0,0,0,0,0,0,
1017    128,0,0,0,0,0,0,0,
1018    0,1,1,0,1,1,0,0,
1019    0,0,0,0,0,0,0,0,
1020    0,0,0,0,0,0,0,0,
1021    1,0,0,0,128,0,0,0,
1022    128,128,128,128,0,0,128,0,
1023    28,28,28,28,28,28,28,28,
1024    28,28,0,0,0,0,0,128,
1025    0,26,26,26,26,26,26,18,
1026    18,18,18,18,18,18,18,18,
1027    18,18,18,18,18,18,18,18,
1028    18,18,18,128,128,0,128,16,
1029    0,26,26,26,26,26,26,18,
1030    18,18,18,18,18,18,18,18,
1031    18,18,18,18,18,18,18,18,
1032    18,18,18,128,128,0,0,0,
1033    0,0,0,0,0,1,0,0,
1034    0,0,0,0,0,0,0,0,
1035    0,0,0,0,0,0,0,0,
1036    0,0,0,0,0,0,0,0,
1037    1,0,0,0,0,0,0,0,
1038    0,0,18,0,0,0,0,0,
1039    0,0,20,20,0,18,0,0,
1040    0,20,18,0,0,0,0,0,
1041    18,18,18,18,18,18,18,18,
1042    18,18,18,18,18,18,18,18,
1043    18,18,18,18,18,18,18,0,
1044    18,18,18,18,18,18,18,18,
1045    18,18,18,18,18,18,18,18,
1046    18,18,18,18,18,18,18,18,
1047    18,18,18,18,18,18,18,0,
1048    18,18,18,18,18,18,18,18
1049    };
1050    
1051    
1052    
1053    
1054    #ifndef HAVE_STRERROR
1055    /*************************************************
1056    *     Provide strerror() for non-ANSI libraries  *
1057    *************************************************/
1058    
1059    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1060    in their libraries, but can provide the same facility by this simple
1061    alternative function. */
1062    
1063    extern int   sys_nerr;
1064    extern char *sys_errlist[];
1065    
1066    char *
1067    strerror(int n)
1068    {
1069    if (n < 0 || n >= sys_nerr) return "unknown error number";
1070    return sys_errlist[n];
1071    }
1072    #endif /* HAVE_STRERROR */
1073    
1074    
1075    /*************************************************
1076    *         JIT memory callback                    *
1077    *************************************************/
1078    
1079    static pcre_jit_stack* jit_callback(void *arg)
1080    {
1081    jit_was_used = TRUE;
1082    return (pcre_jit_stack *)arg;
1083    }
1084    
1085    
1086    #if !defined NOUTF || defined SUPPORT_PCRE16
1087    /*************************************************
1088    *            Convert UTF-8 string to value       *
1089    *************************************************/
1090    
1091    /* This function takes one or more bytes that represents a UTF-8 character,
1092    and returns the value of the character.
1093    
1094    Argument:
1095      utf8bytes   a pointer to the byte vector
1096      vptr        a pointer to an int to receive the value
1097    
1098    Returns:      >  0 => the number of bytes consumed
1099                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1100    */
1101    
1102    static int
1103    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1104    {
1105    int c = *utf8bytes++;
1106    int d = c;
1107    int i, j, s;
1108    
1109    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1110      {
1111      if ((d & 0x80) == 0) break;
1112      d <<= 1;
1113      }
1114    
1115    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1116    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1117    
1118    /* i now has a value in the range 1-5 */
1119    
1120    s = 6*i;
1121    d = (c & utf8_table3[i]) << s;
1122    
1123    for (j = 0; j < i; j++)
1124      {
1125      c = *utf8bytes++;
1126      if ((c & 0xc0) != 0x80) return -(j+1);
1127      s -= 6;
1128      d |= (c & 0x3f) << s;
1129      }
1130    
1131    /* Check that encoding was the correct unique one */
1132    
1133    for (j = 0; j < utf8_table1_size; j++)
1134      if (d <= utf8_table1[j]) break;
1135    if (j != i) return -(i+1);
1136    
1137    /* Valid value */
1138    
1139    *vptr = d;
1140    return i+1;
1141    }
1142    #endif /* NOUTF || SUPPORT_PCRE16 */
1143    
1144    
1145    
1146    #if !defined NOUTF || defined SUPPORT_PCRE16
1147    /*************************************************
1148    *       Convert character value to UTF-8         *
1149    *************************************************/
1150    
1151    /* This function takes an integer value in the range 0 - 0x7fffffff
1152    and encodes it as a UTF-8 character in 0 to 6 bytes.
1153    
1154    Arguments:
1155      cvalue     the character value
1156      utf8bytes  pointer to buffer for result - at least 6 bytes long
1157    
1158    Returns:     number of characters placed in the buffer
1159    */
1160    
1161    static int
1162    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1163    {
1164    register int i, j;
1165    for (i = 0; i < utf8_table1_size; i++)
1166      if (cvalue <= utf8_table1[i]) break;
1167    utf8bytes += i;
1168    for (j = i; j > 0; j--)
1169     {
1170     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1171     cvalue >>= 6;
1172     }
1173    *utf8bytes = utf8_table2[i] | cvalue;
1174    return i + 1;
1175    }
1176    #endif
1177    
1178    
1179    #ifdef SUPPORT_PCRE16
1180    /*************************************************
1181    *         Convert a string to 16-bit             *
1182    *************************************************/
1183    
1184    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1185    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1186    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1187    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1188    result is always left in buffer16.
1189    
1190    Note that this function does not object to surrogate values. This is
1191    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1192    for the purpose of testing that they are correctly faulted.
1193    
1194    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1195    in UTF-8 so that values greater than 255 can be handled.
1196    
1197    Arguments:
1198      data       TRUE if converting a data line; FALSE for a regex
1199      p          points to a byte string
1200      utf        true if UTF-8 (to be converted to UTF-16)
1201      len        number of bytes in the string (excluding trailing zero)
1202    
1203    Returns:     number of 16-bit data items used (excluding trailing zero)
1204                 OR -1 if a UTF-8 string is malformed
1205                 OR -2 if a value > 0x10ffff is encountered
1206                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1207    */
1208    
1209    static int
1210    to16(int data, pcre_uint8 *p, int utf, int len)
1211    {
1212    pcre_uint16 *pp;
1213    
1214    if (buffer16_size < 2*len + 2)
1215      {
1216      if (buffer16 != NULL) free(buffer16);
1217      buffer16_size = 2*len + 2;
1218      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1219      if (buffer16 == NULL)
1220        {
1221        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1222        exit(1);
1223        }
1224      }
1225    
1226    pp = buffer16;
1227    
1228    if (!utf && !data)
1229      {
1230      while (len-- > 0) *pp++ = *p++;
1231      }
1232    
1233    else
1234      {
1235      int c = 0;
1236      while (len > 0)
1237        {
1238        int chlen = utf82ord(p, &c);
1239        if (chlen <= 0) return -1;
1240        if (c > 0x10ffff) return -2;
1241        p += chlen;
1242        len -= chlen;
1243        if (c < 0x10000) *pp++ = c; else
1244          {
1245          if (!utf) return -3;
1246          c -= 0x10000;
1247          *pp++ = 0xD800 | (c >> 10);
1248          *pp++ = 0xDC00 | (c & 0x3ff);
1249          }
1250        }
1251      }
1252    
1253    *pp = 0;
1254    return pp - buffer16;
1255    }
1256    #endif
1257    
1258    
1259    /*************************************************
1260    *        Read or extend an input line            *
1261    *************************************************/
1262    
1263    /* Input lines are read into buffer, but both patterns and data lines can be
1264    continued over multiple input lines. In addition, if the buffer fills up, we
1265    want to automatically expand it so as to be able to handle extremely large
1266    lines that are needed for certain stress tests. When the input buffer is
1267    expanded, the other two buffers must also be expanded likewise, and the
1268    contents of pbuffer, which are a copy of the input for callouts, must be
1269    preserved (for when expansion happens for a data line). This is not the most
1270    optimal way of handling this, but hey, this is just a test program!
1271    
1272    Arguments:
1273      f            the file to read
1274      start        where in buffer to start (this *must* be within buffer)
1275      prompt       for stdin or readline()
1276    
1277    Returns:       pointer to the start of new data
1278                   could be a copy of start, or could be moved
1279                   NULL if no data read and EOF reached
1280    */
1281    
1282    static pcre_uint8 *
1283    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1284    {
1285    pcre_uint8 *here = start;
1286    
1287    for (;;)
1288      {
1289      size_t rlen = (size_t)(buffer_size - (here - buffer));
1290    
1291      if (rlen > 1000)
1292        {
1293        int dlen;
1294    
1295        /* If libreadline support is required, use readline() to read a line if the
1296        input is a terminal. Note that readline() removes the trailing newline, so
1297        we must put it back again, to be compatible with fgets(). */
1298    
1299    #ifdef SUPPORT_LIBREADLINE
1300        if (isatty(fileno(f)))
1301          {
1302          size_t len;
1303          char *s = readline(prompt);
1304          if (s == NULL) return (here == start)? NULL : start;
1305          len = strlen(s);
1306          if (len > 0) add_history(s);
1307          if (len > rlen - 1) len = rlen - 1;
1308          memcpy(here, s, len);
1309          here[len] = '\n';
1310          here[len+1] = 0;
1311          free(s);
1312          }
1313        else
1314    #endif
1315    
1316        /* Read the next line by normal means, prompting if the file is stdin. */
1317    
1318          {
1319          if (f == stdin) printf("%s", prompt);
1320          if (fgets((char *)here, rlen,  f) == NULL)
1321            return (here == start)? NULL : start;
1322          }
1323    
1324        dlen = (int)strlen((char *)here);
1325        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1326        here += dlen;
1327        }
1328    
1329      else
1330        {
1331        int new_buffer_size = 2*buffer_size;
1332        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1333        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1334        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1335    
1336        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1337          {
1338          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1339          exit(1);
1340          }
1341    
1342        memcpy(new_buffer, buffer, buffer_size);
1343        memcpy(new_pbuffer, pbuffer, buffer_size);
1344    
1345        buffer_size = new_buffer_size;
1346    
1347        start = new_buffer + (start - buffer);
1348        here = new_buffer + (here - buffer);
1349    
1350        free(buffer);
1351        free(dbuffer);
1352        free(pbuffer);
1353    
1354        buffer = new_buffer;
1355        dbuffer = new_dbuffer;
1356        pbuffer = new_pbuffer;
1357        }
1358      }
1359    
1360    return NULL;  /* Control never gets here */
1361    }
1362    
1363    
1364    
1365    /*************************************************
1366    *          Read number from string               *
1367    *************************************************/
1368    
1369    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1370    around with conditional compilation, just do the job by hand. It is only used
1371    for unpicking arguments, so just keep it simple.
1372    
1373    Arguments:
1374      str           string to be converted
1375      endptr        where to put the end pointer
1376    
1377    Returns:        the unsigned long
1378    */
1379    
1380    static int
1381    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1382    {
1383    int result = 0;
1384    while(*str != 0 && isspace(*str)) str++;
1385    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1386    *endptr = str;
1387    return(result);
1388    }
1389    
1390    
1391    
1392    /*************************************************
1393    *             Print one character                *
1394    *************************************************/
1395    
1396    /* Print a single character either literally, or as a hex escape. */
1397    
1398    static int pchar(int c, FILE *f)
1399    {
1400    if (PRINTOK(c))
1401      {
1402      if (f != NULL) fprintf(f, "%c", c);
1403      return 1;
1404      }
1405    
1406    if (c < 0x100)
1407      {
1408      if (use_utf)
1409        {
1410        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1411        return 6;
1412        }
1413      else
1414        {
1415        if (f != NULL) fprintf(f, "\\x%02x", c);
1416        return 4;
1417        }
1418      }
1419    
1420    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1421    return (c <= 0x000000ff)? 6 :
1422           (c <= 0x00000fff)? 7 :
1423           (c <= 0x0000ffff)? 8 :
1424           (c <= 0x000fffff)? 9 : 10;
1425    }
1426    
1427    
1428    
1429    #ifdef SUPPORT_PCRE8
1430    /*************************************************
1431    *         Print 8-bit character string           *
1432    *************************************************/
1433    
1434    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1435    If handed a NULL file, just counts chars without printing. */
1436    
1437    static int pchars(pcre_uint8 *p, int length, FILE *f)
1438    {
1439    int c = 0;
1440    int yield = 0;
1441    
1442    if (length < 0)
1443      length = strlen((char *)p);
1444    
1445    while (length-- > 0)
1446      {
1447    #if !defined NOUTF
1448      if (use_utf)
1449        {
1450        int rc = utf82ord(p, &c);
1451        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1452          {
1453          length -= rc - 1;
1454          p += rc;
1455          yield += pchar(c, f);
1456          continue;
1457          }
1458        }
1459    #endif
1460      c = *p++;
1461      yield += pchar(c, f);
1462      }
1463    
1464    return yield;
1465    }
1466    #endif
1467    
1468    
1469    
1470    #ifdef SUPPORT_PCRE16
1471    /*************************************************
1472    *    Find length of 0-terminated 16-bit string   *
1473    *************************************************/
1474    
1475    static int strlen16(PCRE_SPTR16 p)
1476    {
1477    int len = 0;
1478    while (*p++ != 0) len++;
1479    return len;
1480    }
1481    #endif  /* SUPPORT_PCRE16 */
1482    
1483    
1484    #ifdef SUPPORT_PCRE16
1485    /*************************************************
1486    *           Print 16-bit character string        *
1487    *************************************************/
1488    
1489    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1490    If handed a NULL file, just counts chars without printing. */
1491    
1492    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1493    {
1494    int yield = 0;
1495    
1496    if (length < 0)
1497      length = strlen16(p);
1498    
1499    while (length-- > 0)
1500      {
1501      int c = *p++ & 0xffff;
1502    #if !defined NOUTF
1503      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1504        {
1505        int d = *p & 0xffff;
1506        if (d >= 0xDC00 && d < 0xDFFF)
1507          {
1508          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1509          length--;
1510          p++;
1511          }
1512        }
1513    #endif
1514      yield += pchar(c, f);
1515      }
1516    
1517    return yield;
1518    }
1519    #endif  /* SUPPORT_PCRE16 */
1520    
1521    
1522    
1523    #ifdef SUPPORT_PCRE8
1524    /*************************************************
1525    *     Read a capture name (8-bit) and check it   *
1526    *************************************************/
1527    
1528    static pcre_uint8 *
1529    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1530    {
1531    pcre_uint8 *npp = *pp;
1532    while (isalnum(*p)) *npp++ = *p++;
1533    *npp++ = 0;
1534    *npp = 0;
1535    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1536      {
1537      fprintf(outfile, "no parentheses with name \"");
1538      PCHARSV(*pp, 0, -1, outfile);
1539      fprintf(outfile, "\"\n");
1540      }
1541    
1542    *pp = npp;
1543    return p;
1544    }
1545    #endif  /* SUPPORT_PCRE8 */
1546    
1547    
1548    
1549    #ifdef SUPPORT_PCRE16
1550    /*************************************************
1551    *     Read a capture name (16-bit) and check it  *
1552    *************************************************/
1553    
1554    /* Note that the text being read is 8-bit. */
1555    
1556    static pcre_uint8 *
1557    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1558    {
1559    pcre_uint16 *npp = *pp;
1560    while (isalnum(*p)) *npp++ = *p++;
1561    *npp++ = 0;
1562    *npp = 0;
1563    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1564      {
1565      fprintf(outfile, "no parentheses with name \"");
1566      PCHARSV(*pp, 0, -1, outfile);
1567      fprintf(outfile, "\"\n");
1568      }
1569    *pp = npp;
1570    return p;
1571    }
1572    #endif  /* SUPPORT_PCRE16 */
1573    
1574    
1575    
1576    /*************************************************
1577    *              Callout function                  *
1578    *************************************************/
1579    
1580    /* Called from PCRE as a result of the (?C) item. We print out where we are in
1581    the match. Yield zero unless more callouts than the fail count, or the callout
1582    data is not zero. */
1583    
1584    static int callout(pcre_callout_block *cb)
1585    {
1586    FILE *f = (first_callout | callout_extra)? outfile : NULL;
1587    int i, pre_start, post_start, subject_length;
1588    
1589    if (callout_extra)
1590      {
1591      fprintf(f, "Callout %d: last capture = %d\n",
1592        cb->callout_number, cb->capture_last);
1593    
1594      for (i = 0; i < cb->capture_top * 2; i += 2)
1595        {
1596        if (cb->offset_vector[i] < 0)
1597          fprintf(f, "%2d: <unset>\n", i/2);
1598        else
1599          {
1600          fprintf(f, "%2d: ", i/2);
1601          PCHARSV(cb->subject, cb->offset_vector[i],
1602            cb->offset_vector[i+1] - cb->offset_vector[i], f);
1603          fprintf(f, "\n");
1604          }
1605        }
1606      }
1607    
1608    /* Re-print the subject in canonical form, the first time or if giving full
1609    datails. On subsequent calls in the same match, we use pchars just to find the
1610    printed lengths of the substrings. */
1611    
1612    if (f != NULL) fprintf(f, "--->");
1613    
1614    PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1615    PCHARS(post_start, cb->subject, cb->start_match,
1616      cb->current_position - cb->start_match, f);
1617    
1618    PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1619    
1620    PCHARSV(cb->subject, cb->current_position,
1621      cb->subject_length - cb->current_position, f);
1622    
1623    if (f != NULL) fprintf(f, "\n");
1624    
1625    /* Always print appropriate indicators, with callout number if not already
1626    shown. For automatic callouts, show the pattern offset. */
1627    
1628    if (cb->callout_number == 255)
1629      {
1630      fprintf(outfile, "%+3d ", cb->pattern_position);
1631      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
1632      }
1633    else
1634      {
1635      if (callout_extra) fprintf(outfile, "    ");
1636        else fprintf(outfile, "%3d ", cb->callout_number);
1637      }
1638    
1639    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1640    fprintf(outfile, "^");
1641    
1642    if (post_start > 0)
1643      {
1644      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1645      fprintf(outfile, "^");
1646      }
1647    
1648    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1649      fprintf(outfile, " ");
1650    
1651    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1652      pbuffer + cb->pattern_position);
1653    
1654    fprintf(outfile, "\n");
1655    first_callout = 0;
1656    
1657    if (cb->mark != last_callout_mark)
1658      {
1659      if (cb->mark == NULL)
1660        fprintf(outfile, "Latest Mark: <unset>\n");
1661      else
1662        {
1663        fprintf(outfile, "Latest Mark: ");
1664        PCHARSV(cb->mark, 0, -1, outfile);
1665        putc('\n', outfile);
1666        }
1667      last_callout_mark = cb->mark;
1668      }
1669    
1670    if (cb->callout_data != NULL)
1671      {
1672      int callout_data = *((int *)(cb->callout_data));
1673      if (callout_data != 0)
1674        {
1675        fprintf(outfile, "Callout data = %d\n", callout_data);
1676        return callout_data;
1677        }
1678      }
1679    
1680    return (cb->callout_number != callout_fail_id)? 0 :
1681           (++callout_count >= callout_fail_count)? 1 : 0;
1682    }
1683    
1684    
1685    /*************************************************
1686    *            Local malloc functions              *
1687    *************************************************/
1688    
1689    /* Alternative malloc function, to test functionality and save the size of a
1690    compiled re, which is the first store request that pcre_compile() makes. The
1691    show_malloc variable is set only during matching. */
1692    
1693    static void *new_malloc(size_t size)
1694    {
1695    void *block = malloc(size);
1696    gotten_store = size;
1697    if (first_gotten_store == 0) first_gotten_store = size;
1698    if (show_malloc)
1699      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1700    return block;
1701    }
1702    
1703    static void new_free(void *block)
1704    {
1705    if (show_malloc)
1706      fprintf(outfile, "free             %p\n", block);
1707    free(block);
1708    }
1709    
1710    /* For recursion malloc/free, to test stacking calls */
1711    
1712    static void *stack_malloc(size_t size)
1713    {
1714    void *block = malloc(size);
1715    if (show_malloc)
1716      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1717    return block;
1718    }
1719    
1720    static void stack_free(void *block)
1721    {
1722    if (show_malloc)
1723      fprintf(outfile, "stack_free       %p\n", block);
1724    free(block);
1725    }
1726    
1727    
1728    /*************************************************
1729    *          Call pcre_fullinfo()                  *
1730    *************************************************/
1731    
1732    /* Get one piece of information from the pcre_fullinfo() function. When only
1733    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1734    value, but the code is defensive.
1735    
1736    Arguments:
1737      re        compiled regex
1738      study     study data
1739      option    PCRE_INFO_xxx option
1740      ptr       where to put the data
1741    
1742    Returns:    0 when OK, < 0 on error
1743    */
1744    
1745    static int
1746    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1747    {
1748    int rc;
1749    
1750    if (use_pcre16)
1751    #ifdef SUPPORT_PCRE16
1752      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1753    #else
1754      rc = PCRE_ERROR_BADMODE;
1755    #endif
1756    else
1757    #ifdef SUPPORT_PCRE8
1758      rc = pcre_fullinfo(re, study, option, ptr);
1759    #else
1760      rc = PCRE_ERROR_BADMODE;
1761    #endif
1762    
1763    if (rc < 0)
1764      {
1765      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1766        use_pcre16? "16" : "", option);
1767      if (rc == PCRE_ERROR_BADMODE)
1768        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1769          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1770      }
1771    
1772    return rc;
1773    }
1774    
 static FILE *outfile;  
 static int log_store = 0;  
 static size_t gotten_store;  
1775    
1776    
1777    /*************************************************
1778    *             Swap byte functions                *
1779    *************************************************/
1780    
1781  /* Debugging function to print the internal form of the regex. This is the same  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1782  code as contained in pcre.c under the DEBUG macro. */  value, respectively.
1783    
1784  static const char *OP_names[] = {  Arguments:
1785    "End", "\\A", "\\B", "\\b", "\\D", "\\d",    value        any number
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
1786    
1787    Returns:       the byte swapped value
1788    */
1789    
1790  static void print_internals(pcre *re)  static pcre_uint32
1791    swap_uint32(pcre_uint32 value)
1792  {  {
1793  unsigned char *code = ((real_pcre *)re)->code;  return ((value & 0x000000ff) << 24) |
1794           ((value & 0x0000ff00) <<  8) |
1795           ((value & 0x00ff0000) >>  8) |
1796           (value >> 24);
1797    }
1798    
1799  fprintf(outfile, "------------------------------------------------------------------\n");  static pcre_uint16
1800    swap_uint16(pcre_uint16 value)
1801    {
1802    return (value >> 8) | (value << 8);
1803    }
1804    
 for(;;)  
   {  
   int c;  
   int charlength;  
1805    
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
1806    
1807    if (*code >= OP_BRA)  /*************************************************
1808      {  *        Flip bytes in a compiled pattern        *
1809      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  *************************************************/
     code += 2;  
     }  
1810    
1811    else switch(*code)  /* This function is called if the 'F' option was present on a pattern that is
1812      {  to be written to a file. We flip the bytes of all the integer fields in the
1813      case OP_END:  regex data block and the study block. In 16-bit mode this also flips relevant
1814      fprintf(outfile, "    %s\n", OP_names[*code]);  bytes in the pattern itself. This is to make it possible to test PCRE's
1815      fprintf(outfile, "------------------------------------------------------------------\n");  ability to reload byte-flipped patterns, e.g. those compiled on a different
1816      return;  architecture. */
1817    
1818      case OP_OPT:  static void
1819      fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  regexflip(pcre *ere, pcre_extra *extra)
1820      code++;  {
1821      break;  REAL_PCRE *re = (REAL_PCRE *)ere;
1822    #ifdef SUPPORT_PCRE16
1823    int op;
1824    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1825    int length = re->name_count * re->name_entry_size;
1826    #ifdef SUPPORT_UTF
1827    BOOL utf = (re->options & PCRE_UTF16) != 0;
1828    BOOL utf16_char = FALSE;
1829    #endif /* SUPPORT_UTF */
1830    #endif /* SUPPORT_PCRE16 */
1831    
1832    /* Always flip the bytes in the main data block and study blocks. */
1833    
1834    re->magic_number = REVERSED_MAGIC_NUMBER;
1835    re->size = swap_uint32(re->size);
1836    re->options = swap_uint32(re->options);
1837    re->flags = swap_uint16(re->flags);
1838    re->top_bracket = swap_uint16(re->top_bracket);
1839    re->top_backref = swap_uint16(re->top_backref);
1840    re->first_char = swap_uint16(re->first_char);
1841    re->req_char = swap_uint16(re->req_char);
1842    re->name_table_offset = swap_uint16(re->name_table_offset);
1843    re->name_entry_size = swap_uint16(re->name_entry_size);
1844    re->name_count = swap_uint16(re->name_count);
1845    
1846      case OP_COND:  if (extra != NULL)
1847      fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);    {
1848      code += 2;    pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1849      break;    rsd->size = swap_uint32(rsd->size);
1850      rsd->flags = swap_uint32(rsd->flags);
1851      rsd->minlength = swap_uint32(rsd->minlength);
1852      }
1853    
1854      case OP_CREF:  /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1855      fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  in the name table, if present, and then in the pattern itself. */
     code++;  
     break;  
1856    
1857      case OP_CHARS:  #ifdef SUPPORT_PCRE16
1858      charlength = *(++code);  if (!use_pcre16) return;
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
1859    
1860      case OP_KETRMAX:  while(TRUE)
1861      case OP_KETRMIN:    {
1862      case OP_ALT:    /* Swap previous characters. */
1863      case OP_KET:    while (length-- > 0)
1864      case OP_ASSERT:      {
1865      case OP_ASSERT_NOT:      *ptr = swap_uint16(*ptr);
1866      case OP_ASSERTBACK:      ptr++;
1867      case OP_ASSERTBACK_NOT:      }
1868      case OP_ONCE:  #ifdef SUPPORT_UTF
1869      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);    if (utf16_char)
1870      code += 2;      {
1871      break;      if ((ptr[-1] & 0xfc00) == 0xd800)
1872          {
1873          /* We know that there is only one extra character in UTF-16. */
1874          *ptr = swap_uint16(*ptr);
1875          ptr++;
1876          }
1877        }
1878      utf16_char = FALSE;
1879    #endif /* SUPPORT_UTF */
1880    
1881      case OP_REVERSE:    /* Get next opcode. */
1882      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
1883      code += 2;    length = 0;
1884      break;    op = *ptr;
1885      *ptr++ = swap_uint16(op);
1886    
1887      switch (op)
1888        {
1889        case OP_END:
1890        return;
1891    
1892    #ifdef SUPPORT_UTF
1893        case OP_CHAR:
1894        case OP_CHARI:
1895        case OP_NOT:
1896        case OP_NOTI:
1897      case OP_STAR:      case OP_STAR:
1898      case OP_MINSTAR:      case OP_MINSTAR:
1899      case OP_PLUS:      case OP_PLUS:
1900      case OP_MINPLUS:      case OP_MINPLUS:
1901      case OP_QUERY:      case OP_QUERY:
1902      case OP_MINQUERY:      case OP_MINQUERY:
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
1903      case OP_UPTO:      case OP_UPTO:
1904      case OP_MINUPTO:      case OP_MINUPTO:
1905      if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);      case OP_EXACT:
1906        else fprintf(outfile, "    \\x%02x{", c);      case OP_POSSTAR:
1907      if (*code != OP_EXACT) fprintf(outfile, ",");      case OP_POSPLUS:
1908      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_POSQUERY:
1909      if (*code == OP_MINUPTO) fprintf(outfile, "?");      case OP_POSUPTO:
1910      code += 3;      case OP_STARI:
1911      break;      case OP_MINSTARI:
1912        case OP_PLUSI:
1913      case OP_TYPEEXACT:      case OP_MINPLUSI:
1914      case OP_TYPEUPTO:      case OP_QUERYI:
1915      case OP_TYPEMINUPTO:      case OP_MINQUERYI:
1916      fprintf(outfile, "    %s{", OP_names[code[3]]);      case OP_UPTOI:
1917      if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");      case OP_MINUPTOI:
1918      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_EXACTI:
1919      if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");      case OP_POSSTARI:
1920      code += 3;      case OP_POSPLUSI:
1921      break;      case OP_POSQUERYI:
1922        case OP_POSUPTOI:
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
1923      case OP_NOTSTAR:      case OP_NOTSTAR:
1924      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
1925      case OP_NOTPLUS:      case OP_NOTPLUS:
1926      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
1927      case OP_NOTQUERY:      case OP_NOTQUERY:
1928      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
1929      case OP_NOTUPTO:      case OP_NOTUPTO:
1930      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
1931      if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);      case OP_NOTEXACT:
1932        else fprintf(outfile, "    [^\\x%02x]{", c);      case OP_NOTPOSSTAR:
1933      if (*code != OP_NOTEXACT) fprintf(outfile, ",");      case OP_NOTPOSPLUS:
1934      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_NOTPOSQUERY:
1935      if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");      case OP_NOTPOSUPTO:
1936      code += 3;      case OP_NOTSTARI:
1937      break;      case OP_NOTMINSTARI:
1938        case OP_NOTPLUSI:
1939        case OP_NOTMINPLUSI:
1940        case OP_NOTQUERYI:
1941        case OP_NOTMINQUERYI:
1942        case OP_NOTUPTOI:
1943        case OP_NOTMINUPTOI:
1944        case OP_NOTEXACTI:
1945        case OP_NOTPOSSTARI:
1946        case OP_NOTPOSPLUSI:
1947        case OP_NOTPOSQUERYI:
1948        case OP_NOTPOSUPTOI:
1949        if (utf) utf16_char = TRUE;
1950    #endif
1951        /* Fall through. */
1952    
1953      case OP_REF:      default:
1954      fprintf(outfile, "    \\%d", *(++code));      length = OP_lengths16[op] - 1;
1955      code++;      break;
     goto CLASS_REF_REPEAT;  
1956    
1957      case OP_CLASS:      case OP_CLASS:
1958        case OP_NCLASS:
1959        /* Skip the character bit map. */
1960        ptr += 32/sizeof(pcre_uint16);
1961        length = 0;
1962        break;
1963    
1964        case OP_XCLASS:
1965        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1966        if (LINK_SIZE > 1)
1967          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1968            - (1 + LINK_SIZE + 1));
1969        else
1970          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1971    
1972        /* Reverse the size of the XCLASS instance. */
1973        *ptr = swap_uint16(*ptr);
1974        ptr++;
1975        if (LINK_SIZE > 1)
1976          {
1977          *ptr = swap_uint16(*ptr);
1978          ptr++;
1979          }
1980    
1981        op = *ptr;
1982        *ptr = swap_uint16(op);
1983        ptr++;
1984        if ((op & XCL_MAP) != 0)
1985        {        {
1986        int i, min, max;        /* Skip the character bit map. */
1987        code++;        ptr += 32/sizeof(pcre_uint16);
1988        fprintf(outfile, "    [");        length -= 32/sizeof(pcre_uint16);
1989          }
1990        break;
1991        }
1992      }
1993    /* Control should never reach here in 16 bit mode. */
1994    #endif /* SUPPORT_PCRE16 */
1995    }
1996    
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
1997    
       CLASS_REF_REPEAT:  
1998    
1999        switch(*code)  /*************************************************
2000          {  *        Check match or recursion limit          *
2001          case OP_CRSTAR:  *************************************************/
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
2002    
2003          case OP_CRRANGE:  static int
2004          case OP_CRMINRANGE:  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2005          min = (code[1] << 8) + code[2];    int start_offset, int options, int *use_offsets, int use_size_offsets,
2006          max = (code[3] << 8) + code[4];    int flag, unsigned long int *limit, int errnumber, const char *msg)
2007          if (max == 0) fprintf(outfile, "{%d,}", min);  {
2008          else fprintf(outfile, "{%d,%d}", min, max);  int count;
2009          if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  int min = 0;
2010          code += 4;  int mid = 64;
2011          break;  int max = -1;
2012    
2013          default:  extra->flags |= flag;
         code--;  
         }  
       }  
     break;  
2014    
2015      /* Anything else is just a one-node item */  for (;;)
2016      {
2017      *limit = mid;
2018    
2019      default:    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2020      fprintf(outfile, "    %s", OP_names[*code]);      use_offsets, use_size_offsets);
2021      break;  
2022      if (count == errnumber)
2023        {
2024        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2025        min = mid;
2026        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2027      }      }
2028    
2029    code++;    else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2030    fprintf(outfile, "\n");                           count == PCRE_ERROR_PARTIAL)
2031        {
2032        if (mid == min + 1)
2033          {
2034          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2035          break;
2036          }
2037        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2038        max = mid;
2039        mid = (min + mid)/2;
2040        }
2041      else break;    /* Some other error */
2042    }    }
2043    
2044    extra->flags &= ~flag;
2045    return count;
2046  }  }
2047    
2048    
2049    
2050  /* Character string printing function. */  /*************************************************
2051    *         Case-independent strncmp() function    *
2052    *************************************************/
2053    
2054    /*
2055    Arguments:
2056      s         first string
2057      t         second string
2058      n         number of characters to compare
2059    
2060    Returns:    < 0, = 0, or > 0, according to the comparison
2061    */
2062    
2063  static void pchars(unsigned char *p, int length)  static int
2064    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2065  {  {
2066  int c;  while (n--)
2067  while (length-- > 0)    {
2068    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    int c = tolower(*s++) - tolower(*t++);
2069      else fprintf(outfile, "\\x%02x", c);    if (c) return c;
2070      }
2071    return 0;
2072  }  }
2073    
2074    
2075    
2076  /* Alternative malloc function, to test functionality and show the size of the  /*************************************************
2077  compiled re. */  *         Check newline indicator                *
2078    *************************************************/
2079    
2080  static void *new_malloc(size_t size)  /* This is used both at compile and run-time to check for <xxx> escapes. Print
2081    a message and return 0 if there is no match.
2082    
2083    Arguments:
2084      p           points after the leading '<'
2085      f           file for error message
2086    
2087    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
2088    */
2089    
2090    static int
2091    check_newline(pcre_uint8 *p, FILE *f)
2092  {  {
2093  gotten_store = size;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2094  if (log_store)  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2095    fprintf(outfile, "Memory allocation (code space): %d\n",  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2096      (int)((int)size - offsetof(real_pcre, code[0])));  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2097  return malloc(size);  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2098    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2099    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2100    fprintf(f, "Unknown newline type at: <%s\n", p);
2101    return 0;
2102  }  }
2103    
2104    
2105    
2106    /*************************************************
2107    *             Usage function                     *
2108    *************************************************/
2109    
2110  /* Get one piece of information from the pcre_fullinfo() function */  static void
2111    usage(void)
 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  
2112  {  {
2113  int rc;  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2114  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  printf("Input and output default to stdin and stdout.\n");
2115    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  #ifdef SUPPORT_LIBREADLINE
2116    printf("If input is a terminal, readline() is used to read from it.\n");
2117    #else
2118    printf("This version of pcretest is not linked with readline().\n");
2119    #endif
2120    printf("\nOptions:\n");
2121    #ifdef SUPPORT_PCRE16
2122    printf("  -16      use the 16-bit library\n");
2123    #endif
2124    printf("  -b       show compiled code\n");
2125    printf("  -C       show PCRE compile-time options and exit\n");
2126    printf("  -C arg   show a specific compile-time option\n");
2127    printf("           and exit with its value. The arg can be:\n");
2128    printf("     linksize     internal link size [2, 3, 4]\n");
2129    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2130    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2131    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2132    printf("     ucp          Unicode Properties supported [0, 1]\n");
2133    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2134    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2135    printf("  -d       debug: show compiled code and information (-b and -i)\n");
2136    #if !defined NODFA
2137    printf("  -dfa     force DFA matching for all subjects\n");
2138    #endif
2139    printf("  -help    show usage information\n");
2140    printf("  -i       show information about compiled patterns\n"
2141           "  -M       find MATCH_LIMIT minimum for each subject\n"
2142           "  -m       output memory used information\n"
2143           "  -o <n>   set size of offsets vector to <n>\n");
2144    #if !defined NOPOSIX
2145    printf("  -p       use POSIX interface\n");
2146    #endif
2147    printf("  -q       quiet: do not output PCRE version number at start\n");
2148    printf("  -S <n>   set stack size to <n> megabytes\n");
2149    printf("  -s       force each pattern to be studied at basic level\n"
2150           "  -s+      force each pattern to be studied, using JIT if available\n"
2151           "  -s++     ditto, verifying when JIT was actually used\n"
2152           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2153           "             where 1 <= n <= 7 selects JIT options\n"
2154           "  -s++n    ditto, verifying when JIT was actually used\n"
2155           "  -t       time compilation and execution\n");
2156    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2157    printf("  -tm      time execution (matching) only\n");
2158    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
2159  }  }
2160    
2161    
2162    
2163    /*************************************************
2164    *                Main Program                    *
2165    *************************************************/
2166    
2167  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
2168  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 311  options, followed by a set of test data, Line 2171  options, followed by a set of test data,
2171  int main(int argc, char **argv)  int main(int argc, char **argv)
2172  {  {
2173  FILE *infile = stdin;  FILE *infile = stdin;
2174    const char *version;
2175  int options = 0;  int options = 0;
2176  int study_options = 0;  int study_options = 0;
2177    int default_find_match_limit = FALSE;
2178  int op = 1;  int op = 1;
2179  int timeit = 0;  int timeit = 0;
2180    int timeitm = 0;
2181  int showinfo = 0;  int showinfo = 0;
2182  int showstore = 0;  int showstore = 0;
2183    int force_study = -1;
2184    int force_study_options = 0;
2185    int quiet = 0;
2186    int size_offsets = 45;
2187    int size_offsets_max;
2188    int *offsets = NULL;
2189    #if !defined NOPOSIX
2190  int posix = 0;  int posix = 0;
2191    #endif
2192  int debug = 0;  int debug = 0;
2193  int done = 0;  int done = 0;
2194  unsigned char buffer[30000];  int all_use_dfa = 0;
2195  unsigned char dbuffer[1024];  int verify_jit = 0;
2196    int yield = 0;
2197    int stack_size;
2198    
2199    pcre_jit_stack *jit_stack = NULL;
2200    
2201    /* These vectors store, end-to-end, a list of zero-terminated captured
2202    substring names, each list itself being terminated by an empty name. Assume
2203    that 1024 is plenty long enough for the few names we'll be testing. It is
2204    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2205    for the actual memory, to ensure alignment. */
2206    
2207    pcre_uint16 copynames[1024];
2208    pcre_uint16 getnames[1024];
2209    
2210    #ifdef SUPPORT_PCRE16
2211    pcre_uint16 *cn16ptr;
2212    pcre_uint16 *gn16ptr;
2213    #endif
2214    
2215  /* Static so that new_malloc can use it. */  #ifdef SUPPORT_PCRE8
2216    pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2217    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2218    pcre_uint8 *cn8ptr;
2219    pcre_uint8 *gn8ptr;
2220    #endif
2221    
2222    /* Get buffers from malloc() so that valgrind will check their misuse when
2223    debugging. They grow automatically when very long lines are read. The 16-bit
2224    buffer (buffer16) is obtained only if needed. */
2225    
2226    buffer = (pcre_uint8 *)malloc(buffer_size);
2227    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2228    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2229    
2230    /* The outfile variable is static so that new_malloc can use it. */
2231    
2232  outfile = stdout;  outfile = stdout;
2233    
2234    /* The following  _setmode() stuff is some Windows magic that tells its runtime
2235    library to translate CRLF into a single LF character. At least, that's what
2236    I've been told: never having used Windows I take this all on trust. Originally
2237    it set 0x8000, but then I was advised that _O_BINARY was better. */
2238    
2239    #if defined(_WIN32) || defined(WIN32)
2240    _setmode( _fileno( stdout ), _O_BINARY );
2241    #endif
2242    
2243    /* Get the version number: both pcre_version() and pcre16_version() give the
2244    same answer. We just need to ensure that we call one that is available. */
2245    
2246    #ifdef SUPPORT_PCRE8
2247    version = pcre_version();
2248    #else
2249    version = pcre16_version();
2250    #endif
2251    
2252  /* Scan options */  /* Scan options */
2253    
2254  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2255    {    {
2256    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    pcre_uint8 *endptr;
2257      showstore = 1;    char *arg = argv[op];
2258    else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
2259    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    if (strcmp(arg, "-m") == 0) showstore = 1;
2260    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(arg, "-s") == 0) force_study = 0;
2261    else if (strcmp(argv[op], "-p") == 0) posix = 1;  
2262      else if (strncmp(arg, "-s+", 3) == 0)
2263        {
2264        arg += 3;
2265        if (*arg == '+') { arg++; verify_jit = TRUE; }
2266        force_study = 1;
2267        if (*arg == 0)
2268          force_study_options = jit_study_bits[6];
2269        else if (*arg >= '1' && *arg <= '7')
2270          force_study_options = jit_study_bits[*arg - '1'];
2271        else goto BAD_ARG;
2272        }
2273      else if (strcmp(arg, "-16") == 0)
2274        {
2275    #ifdef SUPPORT_PCRE16
2276        use_pcre16 = 1;
2277    #else
2278        printf("** This version of PCRE was built without 16-bit support\n");
2279        exit(1);
2280    #endif
2281        }
2282      else if (strcmp(arg, "-q") == 0) quiet = 1;
2283      else if (strcmp(arg, "-b") == 0) debug = 1;
2284      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2285      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2286      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2287    #if !defined NODFA
2288      else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2289    #endif
2290      else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2291          ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2292            *endptr == 0))
2293        {
2294        op++;
2295        argc--;
2296        }
2297      else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2298        {
2299        int both = arg[2] == 0;
2300        int temp;
2301        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2302                         *endptr == 0))
2303          {
2304          timeitm = temp;
2305          op++;
2306          argc--;
2307          }
2308        else timeitm = LOOPREPEAT;
2309        if (both) timeit = timeitm;
2310        }
2311      else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2312          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2313            *endptr == 0))
2314        {
2315    #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2316        printf("PCRE: -S not supported on this OS\n");
2317        exit(1);
2318    #else
2319        int rc;
2320        struct rlimit rlim;
2321        getrlimit(RLIMIT_STACK, &rlim);
2322        rlim.rlim_cur = stack_size * 1024 * 1024;
2323        rc = setrlimit(RLIMIT_STACK, &rlim);
2324        if (rc != 0)
2325          {
2326        printf("PCRE: setrlimit() failed with error %d\n", rc);
2327        exit(1);
2328          }
2329        op++;
2330        argc--;
2331    #endif
2332        }
2333    #if !defined NOPOSIX
2334      else if (strcmp(arg, "-p") == 0) posix = 1;
2335    #endif
2336      else if (strcmp(arg, "-C") == 0)
2337        {
2338        int rc;
2339        unsigned long int lrc;
2340    
2341        if (argc > 2)
2342          {
2343          if (strcmp(argv[op + 1], "linksize") == 0)
2344            {
2345            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2346            printf("%d\n", rc);
2347            yield = rc;
2348            goto EXIT;
2349            }
2350          if (strcmp(argv[op + 1], "pcre8") == 0)
2351            {
2352    #ifdef SUPPORT_PCRE8
2353            printf("1\n");
2354            yield = 1;
2355    #else
2356            printf("0\n");
2357            yield = 0;
2358    #endif
2359            goto EXIT;
2360            }
2361          if (strcmp(argv[op + 1], "pcre16") == 0)
2362            {
2363    #ifdef SUPPORT_PCRE16
2364            printf("1\n");
2365            yield = 1;
2366    #else
2367            printf("0\n");
2368            yield = 0;
2369    #endif
2370            goto EXIT;
2371            }
2372          if (strcmp(argv[op + 1], "utf") == 0)
2373            {
2374    #ifdef SUPPORT_PCRE8
2375            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2376            printf("%d\n", rc);
2377            yield = rc;
2378    #else
2379            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2380            printf("%d\n", rc);
2381            yield = rc;
2382    #endif
2383            goto EXIT;
2384            }
2385          if (strcmp(argv[op + 1], "ucp") == 0)
2386            {
2387            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2388            printf("%d\n", rc);
2389            yield = rc;
2390            goto EXIT;
2391            }
2392          if (strcmp(argv[op + 1], "jit") == 0)
2393            {
2394            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2395            printf("%d\n", rc);
2396            yield = rc;
2397            goto EXIT;
2398            }
2399          if (strcmp(argv[op + 1], "newline") == 0)
2400            {
2401            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2402            /* Note that these values are always the ASCII values, even
2403            in EBCDIC environments. CR is 13 and NL is 10. */
2404            printf("%s\n", (rc == 13)? "CR" :
2405              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2406              (rc == -2)? "ANYCRLF" :
2407              (rc == -1)? "ANY" : "???");
2408            goto EXIT;
2409            }
2410          printf("Unknown -C option: %s\n", argv[op + 1]);
2411          goto EXIT;
2412          }
2413    
2414        printf("PCRE version %s\n", version);
2415        printf("Compiled with\n");
2416    
2417    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2418    are set, either both UTFs are supported or both are not supported. */
2419    
2420    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2421        printf("  8-bit and 16-bit support\n");
2422        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2423        if (rc)
2424          printf("  UTF-8 and UTF-16 support\n");
2425        else
2426          printf("  No UTF-8 or UTF-16 support\n");
2427    #elif defined SUPPORT_PCRE8
2428        printf("  8-bit support only\n");
2429        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2430        printf("  %sUTF-8 support\n", rc? "" : "No ");
2431    #else
2432        printf("  16-bit support only\n");
2433        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2434        printf("  %sUTF-16 support\n", rc? "" : "No ");
2435    #endif
2436    
2437        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2438        printf("  %sUnicode properties support\n", rc? "" : "No ");
2439        (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2440        if (rc)
2441          {
2442          const char *arch;
2443          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2444          printf("  Just-in-time compiler support: %s\n", arch);
2445          }
2446        else
2447          printf("  No just-in-time compiler support\n");
2448        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2449        /* Note that these values are always the ASCII values, even
2450        in EBCDIC environments. CR is 13 and NL is 10. */
2451        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2452          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2453          (rc == -2)? "ANYCRLF" :
2454          (rc == -1)? "ANY" : "???");
2455        (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2456        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2457                                         "all Unicode newlines");
2458        (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2459        printf("  Internal link size = %d\n", rc);
2460        (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2461        printf("  POSIX malloc threshold = %d\n", rc);
2462        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2463        printf("  Default match limit = %ld\n", lrc);
2464        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2465        printf("  Default recursion depth limit = %ld\n", lrc);
2466        (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2467        printf("  Match recursion uses %s", rc? "stack" : "heap");
2468        if (showstore)
2469          {
2470          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2471          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2472          }
2473        printf("\n");
2474        goto EXIT;
2475        }
2476      else if (strcmp(arg, "-help") == 0 ||
2477               strcmp(arg, "--help") == 0)
2478        {
2479        usage();
2480        goto EXIT;
2481        }
2482    else    else
2483      {      {
2484      printf("*** Unknown option %s\n", argv[op]);      BAD_ARG:
2485      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("** Unknown or malformed option %s\n", arg);
2486      printf("  -d   debug: show compiled code; implies -i\n"      usage();
2487             "  -i   show information about compiled pattern\n"      yield = 1;
2488             "  -p   use POSIX interface\n"      goto EXIT;
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
2489      }      }
2490    op++;    op++;
2491    argc--;    argc--;
2492    }    }
2493    
2494    /* Get the store for the offsets vector, and remember what it was */
2495    
2496    size_offsets_max = size_offsets;
2497    offsets = (int *)malloc(size_offsets_max * sizeof(int));
2498    if (offsets == NULL)
2499      {
2500      printf("** Failed to get %d bytes of memory for offsets vector\n",
2501        (int)(size_offsets_max * sizeof(int)));
2502      yield = 1;
2503      goto EXIT;
2504      }
2505    
2506  /* Sort out the input and output files */  /* Sort out the input and output files */
2507    
2508  if (argc > 1)  if (argc > 1)
2509    {    {
2510    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
2511    if (infile == NULL)    if (infile == NULL)
2512      {      {
2513      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
2514      return 1;      yield = 1;
2515        goto EXIT;
2516      }      }
2517    }    }
2518    
2519  if (argc > 2)  if (argc > 2)
2520    {    {
2521    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
2522    if (outfile == NULL)    if (outfile == NULL)
2523      {      {
2524      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
2525      return 1;      yield = 1;
2526        goto EXIT;
2527      }      }
2528    }    }
2529    
2530  /* Set alternative malloc function */  /* Set alternative malloc function */
2531    
2532    #ifdef SUPPORT_PCRE8
2533    pcre_malloc = new_malloc;
2534    pcre_free = new_free;
2535    pcre_stack_malloc = stack_malloc;
2536    pcre_stack_free = stack_free;
2537    #endif
2538    
2539    #ifdef SUPPORT_PCRE16
2540    pcre16_malloc = new_malloc;
2541    pcre16_free = new_free;
2542    pcre16_stack_malloc = stack_malloc;
2543    pcre16_stack_free = stack_free;
2544    #endif
2545    
2546    /* Heading line unless quiet, then prompt for first regex if stdin */
2547    
2548    if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2549    
2550    /* Main loop */
2551    
2552    while (!done)
2553      {
2554      pcre *re = NULL;
2555      pcre_extra *extra = NULL;
2556    
2557    #if !defined NOPOSIX  /* There are still compilers that require no indent */
2558      regex_t preg;
2559      int do_posix = 0;
2560    #endif
2561    
2562      const char *error;
2563      pcre_uint8 *markptr;
2564      pcre_uint8 *p, *pp, *ppp;
2565      pcre_uint8 *to_file = NULL;
2566      const pcre_uint8 *tables = NULL;
2567      unsigned long int get_options;
2568      unsigned long int true_size, true_study_size = 0;
2569      size_t size, regex_gotten_store;
2570      int do_allcaps = 0;
2571      int do_mark = 0;
2572      int do_study = 0;
2573      int no_force_study = 0;
2574      int do_debug = debug;
2575      int do_G = 0;
2576      int do_g = 0;
2577      int do_showinfo = showinfo;
2578      int do_showrest = 0;
2579      int do_showcaprest = 0;
2580      int do_flip = 0;
2581      int erroroffset, len, delimiter, poffset;
2582    
2583      use_utf = 0;
2584      debug_lengths = 1;
2585    
2586      if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
2587      if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2588      fflush(outfile);
2589    
2590      p = buffer;
2591      while (isspace(*p)) p++;
2592      if (*p == 0) continue;
2593    
2594      /* See if the pattern is to be loaded pre-compiled from a file. */
2595    
2596      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2597        {
2598        pcre_uint32 magic;
2599        pcre_uint8 sbuf[8];
2600        FILE *f;
2601    
2602        p++;
2603        if (*p == '!')
2604          {
2605          do_debug = TRUE;
2606          do_showinfo = TRUE;
2607          p++;
2608          }
2609    
2610        pp = p + (int)strlen((char *)p);
2611        while (isspace(pp[-1])) pp--;
2612        *pp = 0;
2613    
2614        f = fopen((char *)p, "rb");
2615        if (f == NULL)
2616          {
2617          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2618          continue;
2619          }
2620    
2621        first_gotten_store = 0;
2622        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2623    
2624        true_size =
2625          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2626        true_study_size =
2627          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2628    
2629        re = (pcre *)new_malloc(true_size);
2630        regex_gotten_store = first_gotten_store;
2631    
2632        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2633    
2634        magic = ((REAL_PCRE *)re)->magic_number;
2635        if (magic != MAGIC_NUMBER)
2636          {
2637          if (swap_uint32(magic) == MAGIC_NUMBER)
2638            {
2639            do_flip = 1;
2640            }
2641          else
2642            {
2643            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2644            fclose(f);
2645            continue;
2646            }
2647          }
2648    
2649        /* We hide the byte-invert info for little and big endian tests. */
2650        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2651          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2652    
2653  pcre_malloc = new_malloc;      /* Now see if there is any following study data. */
2654    
2655  /* Heading line, then prompt for first regex if stdin */      if (true_study_size != 0)
2656          {
2657          pcre_study_data *psd;
2658    
2659  fprintf(outfile, "PCRE version %s\n\n", pcre_version());        extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2660          extra->flags = PCRE_EXTRA_STUDY_DATA;
2661    
2662  /* Main loop */        psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2663          extra->study_data = psd;
2664    
2665  while (!done)        if (fread(psd, 1, true_study_size, f) != true_study_size)
2666    {          {
2667    pcre *re = NULL;          FAIL_READ:
2668    pcre_extra *extra = NULL;          fprintf(outfile, "Failed to read data from %s\n", p);
2669            if (extra != NULL)
2670              {
2671              PCRE_FREE_STUDY(extra);
2672              }
2673            if (re != NULL) new_free(re);
2674            fclose(f);
2675            continue;
2676            }
2677          fprintf(outfile, "Study data loaded from %s\n", p);
2678          do_study = 1;     /* To get the data output if requested */
2679          }
2680        else fprintf(outfile, "No study data\n");
2681    
2682  #if !defined NOPOSIX  /* There are still compilers that require no indent */      /* Flip the necessary bytes. */
2683    regex_t preg;      if (do_flip)
2684  #endif        {
2685          int rc;
2686          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2687          if (rc == PCRE_ERROR_BADMODE)
2688            {
2689            /* Simulate the result of the function call below. */
2690            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2691              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2692            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2693              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2694            continue;
2695            }
2696          }
2697    
2698    const char *error;      /* Need to know if UTF-8 for printing data strings. */
   unsigned char *p, *pp, *ppp;  
   unsigned const char *tables = NULL;  
   int do_study = 0;  
   int do_debug = debug;  
   int do_G = 0;  
   int do_g = 0;  
   int do_showinfo = showinfo;  
   int do_showrest = 0;  
   int do_posix = 0;  
   int erroroffset, len, delimiter;  
2699    
2700    if (infile == stdin) printf("  re> ");      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2701    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;      use_utf = (get_options & PCRE_UTF8) != 0;
   if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2702    
2703    p = buffer;      fclose(f);
2704    while (isspace(*p)) p++;      goto SHOW_INFO;
2705    if (*p == 0) continue;      }
2706    
2707    /* Get the delimiter and seek the end of the pattern; if is isn't    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2708    complete, read more. */    the pattern; if it isn't complete, read more. */
2709    
2710    delimiter = *p++;    delimiter = *p++;
2711    
2712    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
2713      {      {
2714      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2715      goto SKIP_DATA;      goto SKIP_DATA;
2716      }      }
2717    
2718    pp = p;    pp = p;
2719      poffset = (int)(p - buffer);
2720    
2721    for(;;)    for(;;)
2722      {      {
# Line 435  while (!done) Line 2727  while (!done)
2727        pp++;        pp++;
2728        }        }
2729      if (*pp != 0) break;      if (*pp != 0) break;
2730        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
2731        {        {
2732        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
2733        done = 1;        done = 1;
# Line 453  while (!done) Line 2736  while (!done)
2736      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2737      }      }
2738    
2739      /* The buffer may have moved while being extended; reset the start of data
2740      pointer to the correct relative point in the buffer. */
2741    
2742      p = buffer + poffset;
2743    
2744    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
2745    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
2746    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
2747    
2748    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
2749    
2750    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
2751      for callouts. */
2752    
2753    *pp++ = 0;    *pp++ = 0;
2754      strcpy((char *)pbuffer, (char *)p);
2755    
2756    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2757    
# Line 473  while (!done) Line 2763  while (!done)
2763      {      {
2764      switch (*pp++)      switch (*pp++)
2765        {        {
2766          case 'f': options |= PCRE_FIRSTLINE; break;
2767        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
2768        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
2769        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
2770        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2771        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2772    
2773        case '+': do_showrest = 1; break;        case '+':
2774          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2775          break;
2776    
2777          case '=': do_allcaps = 1; break;
2778        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2779          case 'B': do_debug = 1; break;
2780          case 'C': options |= PCRE_AUTO_CALLOUT; break;
2781        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
2782        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2783          case 'F': do_flip = 1; break;
2784        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2785        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2786          case 'J': options |= PCRE_DUPNAMES; break;
2787          case 'K': do_mark = 1; break;
2788        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2789          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2790    
2791  #if !defined NOPOSIX  #if !defined NOPOSIX
2792        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2793  #endif  #endif
2794    
2795        case 'S': do_study = 1; break;        case 'S':
2796          if (do_study == 0)
2797            {
2798            do_study = 1;
2799            if (*pp == '+')
2800              {
2801              if (*(++pp) == '+')
2802                {
2803                verify_jit = TRUE;
2804                pp++;
2805                }
2806              if (*pp >= '1' && *pp <= '7')
2807                study_options |= jit_study_bits[*pp++ - '1'];
2808              else
2809                study_options |= jit_study_bits[6];
2810              }
2811            }
2812          else
2813            {
2814            do_study = 0;
2815            no_force_study = 1;
2816            }
2817          break;
2818    
2819        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2820          case 'W': options |= PCRE_UCP; break;
2821        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2822          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2823          case 'Z': debug_lengths = 0; break;
2824          case '8': options |= PCRE_UTF8; use_utf = 1; break;
2825          case '?': options |= PCRE_NO_UTF8_CHECK; break;
2826    
2827          case 'T':
2828          switch (*pp++)
2829            {
2830            case '0': tables = tables0; break;
2831            case '1': tables = tables1; break;
2832    
2833            case '\r':
2834            case '\n':
2835            case ' ':
2836            case 0:
2837            fprintf(outfile, "** Missing table number after /T\n");
2838            goto SKIP_DATA;
2839    
2840            default:
2841            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2842            goto SKIP_DATA;
2843            }
2844          break;
2845    
2846        case 'L':        case 'L':
2847        ppp = pp;        ppp = pp;
2848        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
2849          /* The '0' test is just in case this is an unterminated line. */
2850          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2851        *ppp = 0;        *ppp = 0;
2852        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2853          {          {
2854          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2855          goto SKIP_DATA;          goto SKIP_DATA;
2856          }          }
2857        tables = pcre_maketables();        locale_set = 1;
2858          tables = PCRE_MAKETABLES;
2859        pp = ppp;        pp = ppp;
2860        break;        break;
2861    
2862        case '\n': case ' ': break;        case '>':
2863          to_file = pp;
2864          while (*pp != 0) pp++;
2865          while (isspace(pp[-1])) pp--;
2866          *pp = 0;
2867          break;
2868    
2869          case '<':
2870            {
2871            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2872              {
2873              options |= PCRE_JAVASCRIPT_COMPAT;
2874              pp += 3;
2875              }
2876            else
2877              {
2878              int x = check_newline(pp, outfile);
2879              if (x == 0) goto SKIP_DATA;
2880              options |= x;
2881              while (*pp++ != '>');
2882              }
2883            }
2884          break;
2885    
2886          case '\r':                      /* So that it works in Windows */
2887          case '\n':
2888          case ' ':
2889          break;
2890    
2891        default:        default:
2892        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2893        goto SKIP_DATA;        goto SKIP_DATA;
# Line 517  while (!done) Line 2896  while (!done)
2896    
2897    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2898    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2899    local character tables. */    local character tables. Neither does it have 16-bit support. */
2900    
2901  #if !defined NOPOSIX  #if !defined NOPOSIX
2902    if (posix || do_posix)    if (posix || do_posix)
2903      {      {
2904      int rc;      int rc;
2905      int cflags = 0;      int cflags = 0;
2906    
2907      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2908      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2909        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2910        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2911        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2912        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2913        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2914    
2915        first_gotten_store = 0;
2916      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2917    
2918      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 533  while (!done) Line 2920  while (!done)
2920    
2921      if (rc != 0)      if (rc != 0)
2922        {        {
2923        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2924        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2925        goto SKIP_DATA;        goto SKIP_DATA;
2926        }        }
# Line 545  while (!done) Line 2932  while (!done)
2932  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2933    
2934      {      {
2935      if (timeit)      /* In 16-bit mode, convert the input. */
2936    
2937    #ifdef SUPPORT_PCRE16
2938        if (use_pcre16)
2939          {
2940          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2941            {
2942            case -1:
2943            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2944              "converted to UTF-16\n");
2945            goto SKIP_DATA;
2946    
2947            case -2:
2948            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2949              "cannot be converted to UTF-16\n");
2950            goto SKIP_DATA;
2951    
2952            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2953            fprintf(outfile, "**Failed: character value greater than 0xffff "
2954              "cannot be converted to 16-bit in non-UTF mode\n");
2955            goto SKIP_DATA;
2956    
2957            default:
2958            break;
2959            }
2960          p = (pcre_uint8 *)buffer16;
2961          }
2962    #endif
2963    
2964        /* Compile many times when timing */
2965    
2966        if (timeit > 0)
2967        {        {
2968        register int i;        register int i;
2969        clock_t time_taken;        clock_t time_taken;
2970        clock_t start_time = clock();        clock_t start_time = clock();
2971        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
2972          {          {
2973          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2974          if (re != NULL) free(re);          if (re != NULL) free(re);
2975          }          }
2976        time_taken = clock() - start_time;        time_taken = clock() - start_time;
2977        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
2978          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
2979          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
2980        }        }
2981    
2982      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2983        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2984    
2985      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2986      if non-interactive. */      if non-interactive. */
# Line 574  while (!done) Line 2993  while (!done)
2993          {          {
2994          for (;;)          for (;;)
2995            {            {
2996            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
2997              {              {
2998              done = 1;              done = 1;
2999              goto CONTINUE;              goto CONTINUE;
# Line 588  while (!done) Line 3007  while (!done)
3007        goto CONTINUE;        goto CONTINUE;
3008        }        }
3009    
3010      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
3011      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
3012      returns only limited data. Check that it agrees with the newer one. */      lines. */
3013    
3014      if (do_showinfo)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3015          goto SKIP_DATA;
3016        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3017    
3018        /* Extract the size for possible writing before possibly flipping it,
3019        and remember the store that was got. */
3020    
3021        true_size = ((REAL_PCRE *)re)->size;
3022        regex_gotten_store = first_gotten_store;
3023    
3024        /* Output code size information if requested */
3025    
3026        if (log_store)
3027          fprintf(outfile, "Memory allocation (code space): %d\n",
3028            (int)(first_gotten_store -
3029                  sizeof(REAL_PCRE) -
3030                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3031    
3032        /* If -s or /S was present, study the regex to generate additional info to
3033        help with the matching, unless the pattern has the SS option, which
3034        suppresses the effect of /S (used for a few test patterns where studying is
3035        never sensible). */
3036    
3037        if (do_study || (force_study >= 0 && !no_force_study))
3038        {        {
3039        int old_first_char, old_options, old_count;        if (timeit > 0)
3040        int count, backrefmax, first_char, need_char;          {
3041        size_t size;          register int i;
3042            clock_t time_taken;
3043        if (do_debug) print_internals(re);          clock_t start_time = clock();
3044            for (i = 0; i < timeit; i++)
3045        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);            {
3046        new_info(re, NULL, PCRE_INFO_SIZE, &size);            PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3047        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            }
3048        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);          time_taken = clock() - start_time;
3049        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);          if (extra != NULL)
3050        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            {
3051              PCRE_FREE_STUDY(extra);
3052        old_count = pcre_info(re, &old_options, &old_first_char);            }
3053        if (count < 0) fprintf(outfile,          fprintf(outfile, "  Study time %.4f milliseconds\n",
3054          "Error %d from pcre_info()\n", count);            (((double)time_taken * 1000.0) / (double)timeit) /
3055        else              (double)CLOCKS_PER_SEC);
3056            }
3057          PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3058          if (error != NULL)
3059            fprintf(outfile, "Failed to study: %s\n", error);
3060          else if (extra != NULL)
3061          {          {
3062          if (old_count != count) fprintf(outfile,          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3063            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,          if (log_store)
3064              old_count);            {
3065              size_t jitsize;
3066              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3067                  jitsize != 0)
3068                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3069              }
3070            }
3071          }
3072    
3073          if (old_first_char != first_char) fprintf(outfile,      /* If /K was present, we set up for handling MARK data. */
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
3074    
3075          if (old_options != options) fprintf(outfile,      if (do_mark)
3076            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,        {
3077              old_options);        if (extra == NULL)
3078            {
3079            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3080            extra->flags = 0;
3081          }          }
3082          extra->mark = &markptr;
3083          extra->flags |= PCRE_EXTRA_MARK;
3084          }
3085    
3086        if (size != gotten_store) fprintf(outfile,      /* Extract and display information from the compiled data if required. */
3087    
3088        SHOW_INFO:
3089    
3090        if (do_debug)
3091          {
3092          fprintf(outfile, "------------------------------------------------------------------\n");
3093          PCRE_PRINTINT(re, outfile, debug_lengths);
3094          }
3095    
3096        /* We already have the options in get_options (see above) */
3097    
3098        if (do_showinfo)
3099          {
3100          unsigned long int all_options;
3101          int count, backrefmax, first_char, need_char, okpartial, jchanged,
3102            hascrorlf;
3103          int nameentrysize, namecount;
3104          const pcre_uint8 *nametable;
3105    
3106          if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3107              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3108              new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3109              new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3110              new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3111              new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3112              new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3113              new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3114              new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3115              new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3116              new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3117              != 0)
3118            goto SKIP_DATA;
3119    
3120          if (size != regex_gotten_store) fprintf(outfile,
3121          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3122          size, gotten_store);          (int)size, (int)regex_gotten_store);
3123    
3124        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
3125        if (backrefmax > 0)        if (backrefmax > 0)
3126          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
       if (options == 0) fprintf(outfile, "No options\n");  
         else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",  
           ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
           ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
           ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
           ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
           ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
           ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
           ((options & PCRE_EXTRA) != 0)? " extra" : "",  
           ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
3127    
3128        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (namecount > 0)
3129          fprintf(outfile, "Case state changes\n");          {
3130            fprintf(outfile, "Named capturing subpatterns:\n");
3131            while (namecount-- > 0)
3132              {
3133    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3134              int imm2_size = use_pcre16 ? 1 : 2;
3135    #else
3136              int imm2_size = IMM2_SIZE;
3137    #endif
3138              int length = (int)STRLEN(nametable + imm2_size);
3139              fprintf(outfile, "  ");
3140              PCHARSV(nametable, imm2_size, length, outfile);
3141              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3142    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3143              fprintf(outfile, "%3d\n", use_pcre16?
3144                 (int)(((PCRE_SPTR16)nametable)[0])
3145                :((int)nametable[0] << 8) | (int)nametable[1]);
3146              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3147    #else
3148              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3149    #ifdef SUPPORT_PCRE8
3150              nametable += nameentrysize;
3151    #else
3152              nametable += nameentrysize * 2;
3153    #endif
3154    #endif
3155              }
3156            }
3157    
3158          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3159          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3160    
3161          all_options = ((REAL_PCRE *)re)->options;
3162          if (do_flip) all_options = swap_uint32(all_options);
3163    
3164          if (get_options == 0) fprintf(outfile, "No options\n");
3165            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3166              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3167              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3168              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3169              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3170              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3171              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3172              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3173              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3174              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3175              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3176              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3177              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3178              ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3179              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3180              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3181              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3182              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3183    
3184          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3185    
3186          switch (get_options & PCRE_NEWLINE_BITS)
3187            {
3188            case PCRE_NEWLINE_CR:
3189            fprintf(outfile, "Forced newline sequence: CR\n");
3190            break;
3191    
3192            case PCRE_NEWLINE_LF:
3193            fprintf(outfile, "Forced newline sequence: LF\n");
3194            break;
3195    
3196            case PCRE_NEWLINE_CRLF:
3197            fprintf(outfile, "Forced newline sequence: CRLF\n");
3198            break;
3199    
3200            case PCRE_NEWLINE_ANYCRLF:
3201            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3202            break;
3203    
3204            case PCRE_NEWLINE_ANY:
3205            fprintf(outfile, "Forced newline sequence: ANY\n");
3206            break;
3207    
3208            default:
3209            break;
3210            }
3211    
3212        if (first_char == -1)        if (first_char == -1)
3213          {          {
3214          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
3215          }          }
3216        else if (first_char < 0)        else if (first_char < 0)
3217          {          {
# Line 656  while (!done) Line 3219  while (!done)
3219          }          }
3220        else        else
3221          {          {
3222          if (isprint(first_char))          const char *caseless =
3223            fprintf(outfile, "First char = \'%c\'\n", first_char);            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3224              "" : " (caseless)";
3225    
3226            if (PRINTOK(first_char))
3227              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3228          else          else
3229            fprintf(outfile, "First char = %d\n", first_char);            {
3230              fprintf(outfile, "First char = ");
3231              pchar(first_char, outfile);
3232              fprintf(outfile, "%s\n", caseless);
3233              }
3234          }          }
3235    
3236        if (need_char < 0)        if (need_char < 0)
# Line 668  while (!done) Line 3239  while (!done)
3239          }          }
3240        else        else
3241          {          {
3242          if (isprint(need_char))          const char *caseless =
3243            fprintf(outfile, "Need char = \'%c\'\n", need_char);            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3244              "" : " (caseless)";
3245    
3246            if (PRINTOK(need_char))
3247              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3248            else
3249              {
3250              fprintf(outfile, "Need char = ");
3251              pchar(need_char, outfile);
3252              fprintf(outfile, "%s\n", caseless);
3253              }
3254            }
3255    
3256          /* Don't output study size; at present it is in any case a fixed
3257          value, but it varies, depending on the computer architecture, and
3258          so messes up the test suite. (And with the /F option, it might be
3259          flipped.) If study was forced by an external -s, don't show this
3260          information unless -i or -d was also present. This means that, except
3261          when auto-callouts are involved, the output from runs with and without
3262          -s should be identical. */
3263    
3264          if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3265            {
3266            if (extra == NULL)
3267              fprintf(outfile, "Study returned NULL\n");
3268          else          else
3269            fprintf(outfile, "Need char = %d\n", need_char);            {
3270              pcre_uint8 *start_bits = NULL;
3271              int minlength;
3272    
3273              if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3274                fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3275    
3276              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3277                {
3278                if (start_bits == NULL)
3279                  fprintf(outfile, "No set of starting bytes\n");
3280                else
3281                  {
3282                  int i;
3283                  int c = 24;
3284                  fprintf(outfile, "Starting byte set: ");
3285                  for (i = 0; i < 256; i++)
3286                    {
3287                    if ((start_bits[i/8] & (1<<(i&7))) != 0)
3288                      {
3289                      if (c > 75)
3290                        {
3291                        fprintf(outfile, "\n  ");
3292                        c = 2;
3293                        }
3294                      if (PRINTOK(i) && i != ' ')
3295                        {
3296                        fprintf(outfile, "%c ", i);
3297                        c += 2;
3298                        }
3299                      else
3300                        {
3301                        fprintf(outfile, "\\x%02x ", i);
3302                        c += 5;
3303                        }
3304                      }
3305                    }
3306                  fprintf(outfile, "\n");
3307                  }
3308                }
3309              }
3310    
3311            /* Show this only if the JIT was set by /S, not by -s. */
3312    
3313            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3314              {
3315              int jit;
3316              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3317                {
3318                if (jit)
3319                  fprintf(outfile, "JIT study was successful\n");
3320                else
3321    #ifdef SUPPORT_JIT
3322                  fprintf(outfile, "JIT study was not successful\n");
3323    #else
3324                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3325    #endif
3326                }
3327              }
3328          }          }
3329        }        }
3330    
3331      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
3332      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
3333        the study length, in big-endian order. */
3334    
3335      if (do_study)      if (to_file != NULL)
3336        {        {
3337        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
3338          if (f == NULL)
3339          {          {
3340          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           ((double)time_taken * 1000.0)/  
           ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
3341          }          }
3342          else
3343            {
3344            pcre_uint8 sbuf[8];
3345    
3346        extra = pcre_study(re, study_options, &error);          if (do_flip) regexflip(re, extra);
3347        if (error != NULL)          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3348          fprintf(outfile, "Failed to study: %s\n", error);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3349        else if (extra == NULL)          sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3350          fprintf(outfile, "Study returned NULL\n");          sbuf[3] = (pcre_uint8)((true_size) & 255);
3351            sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3352            sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3353            sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3354            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3355    
3356        else if (do_showinfo)          if (fwrite(sbuf, 1, 8, f) < 8 ||
3357          {              fwrite(re, 1, true_size, f) < true_size)
3358          uschar *start_bits = NULL;            {
3359          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3360          if (start_bits == NULL)            }
           fprintf(outfile, "No starting character set\n");  
3361          else          else
3362            {            {
3363            int i;            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3364            int c = 24;  
3365            fprintf(outfile, "Starting character set: ");            /* If there is study data, write it. */
3366            for (i = 0; i < 256; i++)  
3367              if (extra != NULL)
3368              {              {
3369              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
3370                    true_study_size)
3371                {                {
3372                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
3373                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
3374                }                }
3375                else fprintf(outfile, "Study data written to %s\n", to_file);
3376              }              }
           fprintf(outfile, "\n");  
3377            }            }
3378            fclose(f);
3379            }
3380    
3381          new_free(re);
3382          if (extra != NULL)
3383            {
3384            PCRE_FREE_STUDY(extra);
3385            }
3386          if (locale_set)
3387            {
3388            new_free((void *)tables);
3389            setlocale(LC_CTYPE, "C");
3390            locale_set = 0;
3391          }          }
3392          continue;  /* With next regex */
3393        }        }
3394      }      }        /* End of non-POSIX compile */
3395    
3396    /* Read data lines and test them */    /* Read data lines and test them */
3397    
3398    for (;;)    for (;;)
3399      {      {
3400      unsigned char *q;      pcre_uint8 *q;
3401      unsigned char *bptr = dbuffer;      pcre_uint8 *bptr;
3402        int *use_offsets = offsets;
3403        int use_size_offsets = size_offsets;
3404        int callout_data = 0;
3405        int callout_data_set = 0;
3406      int count, c;      int count, c;
3407      int copystrings = 0;      int copystrings = 0;
3408        int find_match_limit = default_find_match_limit;
3409      int getstrings = 0;      int getstrings = 0;
3410      int getlist = 0;      int getlist = 0;
3411      int gmatched = 0;      int gmatched = 0;
3412      int start_offset = 0;      int start_offset = 0;
3413        int start_offset_sign = 1;
3414      int g_notempty = 0;      int g_notempty = 0;
3415      int offsets[45];      int use_dfa = 0;
3416      int size_offsets = sizeof(offsets)/sizeof(int);  
3417        *copynames = 0;
3418        *getnames = 0;
3419    
3420    #ifdef SUPPORT_PCRE16
3421        cn16ptr = copynames;
3422        gn16ptr = getnames;
3423    #endif
3424    #ifdef SUPPORT_PCRE8
3425        cn8ptr = copynames8;
3426        gn8ptr = getnames8;
3427    #endif
3428    
3429        SET_PCRE_CALLOUT(callout);
3430        first_callout = 1;
3431        last_callout_mark = NULL;
3432        callout_extra = 0;
3433        callout_count = 0;
3434        callout_fail_count = 999999;
3435        callout_fail_id = -1;
3436        show_malloc = 0;
3437      options = 0;      options = 0;
3438    
3439      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
3440      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3441    
3442        len = 0;
3443        for (;;)
3444        {        {
3445        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3446        goto CONTINUE;          {
3447            if (len > 0)    /* Reached EOF without hitting a newline */
3448              {
3449              fprintf(outfile, "\n");
3450              break;
3451              }
3452            done = 1;
3453            goto CONTINUE;
3454            }
3455          if (infile != stdin) fprintf(outfile, "%s", (char