/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 43 by nigel, Sat Feb 24 21:39:21 2007 UTC revision 1085 by chpe, Tue Oct 16 15:55:32 2012 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10    -----------------------------------------------------------------------------
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are met:
13    
14        * Redistributions of source code must retain the above copyright notice,
15          this list of conditions and the following disclaimer.
16    
17        * Redistributions in binary form must reproduce the above copyright
18          notice, this list of conditions and the following disclaimer in the
19          documentation and/or other materials provided with the distribution.
20    
21        * Neither the name of the University of Cambridge nor the names of its
22          contributors may be used to endorse or promote products derived from
23          this software without specific prior written permission.
24    
25    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35    POSSIBILITY OF SUCH DAMAGE.
36    -----------------------------------------------------------------------------
37    */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    #ifdef HAVE_CONFIG_H
50    #include "config.h"
51    #endif
52    
53  #include <ctype.h>  #include <ctype.h>
54  #include <stdio.h>  #include <stdio.h>
55  #include <string.h>  #include <string.h>
56  #include <stdlib.h>  #include <stdlib.h>
57  #include <time.h>  #include <time.h>
58  #include <locale.h>  #include <locale.h>
59    #include <errno.h>
60    
61    /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67    #ifdef HAVE_UNISTD_H
68    #include <unistd.h>
69    #endif
70    #if defined(SUPPORT_LIBREADLINE)
71    #include <readline/readline.h>
72    #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80    #endif
81    
82    /* A number of things vary for Windows builds. Originally, pcretest opened its
83    input and output without "b"; then I was told that "b" was needed in some
84    environments, so it was added for release 5.0 to both the input and output. (It
85    makes no difference on Unix-like systems.) Later I was told that it is wrong
86    for the input on Windows. I've now abstracted the modes into two macros that
87    are set here, to make it easier to fiddle with them, and removed "b" from the
88    input mode under Windows. */
89    
90    #if defined(_WIN32) || defined(WIN32)
91    #include <io.h>                /* For _setmode() */
92    #include <fcntl.h>             /* For _O_BINARY */
93    #define INPUT_MODE   "r"
94    #define OUTPUT_MODE  "wb"
95    
96    #ifndef isatty
97    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
98    #endif                         /* though in some environments they seem to   */
99                                   /* be already defined, hence the #ifndefs.    */
100    #ifndef fileno
101    #define fileno _fileno
102    #endif
103    
104    /* A user sent this fix for Borland Builder 5 under Windows. */
105    
106    #ifdef __BORLANDC__
107    #define _setmode(handle, mode) setmode(handle, mode)
108    #endif
109    
110    /* Not Windows */
111    
112    #else
113    #include <sys/time.h>          /* These two includes are needed */
114    #include <sys/resource.h>      /* for setrlimit(). */
115    #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
116    #define INPUT_MODE   "r"
117    #define OUTPUT_MODE  "w"
118    #else
119    #define INPUT_MODE   "rb"
120    #define OUTPUT_MODE  "wb"
121    #endif
122    #endif
123    
124    #define PRIV(name) name
125    
126    /* We have to include pcre_internal.h because we need the internal info for
127    displaying the results of pcre_study() and we also need to know about the
128    internal macros, structures, and other internal data values; pcretest has
129    "inside information" compared to a program that strictly follows the PCRE API.
130    
131    Although pcre_internal.h does itself include pcre.h, we explicitly include it
132    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133    appropriately for an application, not for building PCRE. */
134    
135    #include "pcre.h"
136    
137    #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138    /* Configure internal macros to 32 bit mode. */
139    #define COMPILE_PCRE32
140    #endif
141    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142    /* Configure internal macros to 16 bit mode. */
143    #define COMPILE_PCRE16
144    #endif
145    #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146    /* Configure internal macros to 16 bit mode. */
147    #define COMPILE_PCRE8
148    #endif
149    
150    #include "pcre_internal.h"
151    
152    /* The pcre_printint() function, which prints the internal form of a compiled
153    regex, is held in a separate file so that (a) it can be compiled in either
154    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
155    when that is compiled in debug mode. */
156    
157    #ifdef SUPPORT_PCRE8
158    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159    #endif
160    #ifdef SUPPORT_PCRE16
161    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162    #endif
163    #ifdef SUPPORT_PCRE32
164    void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165    #endif
166    
167  /* Use the internal info for displaying the results of pcre_study(). */  /* We need access to some of the data tables that PCRE uses. So as not to have
168    to keep two copies, we include the source files here, changing the names of the
169    external symbols to prevent clashes. */
170    
171    #define PCRE_INCLUDED
172    
173    #include "pcre_tables.c"
174    #include "pcre_ucd.c"
175    
176    /* The definition of the macro PRINTABLE, which determines whether to print an
177    output character as-is or as a hex value when showing compiled patterns, is
178    the same as in the printint.src file. We uses it here in cases when the locale
179    has not been explicitly changed, so as to get consistent output from systems
180    that differ in their output from isprint() even in the "C" locale. */
181    
182  #include "internal.h"  #ifdef EBCDIC
183    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
184    #else
185    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
186    #endif
187    
188    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189    
190    /* Posix support is disabled in 16 or 32 bit only mode. */
191    #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192    #define NOPOSIX
193    #endif
194    
195  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
196  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 200  Makefile. */
200  #include "pcreposix.h"  #include "pcreposix.h"
201  #endif  #endif
202    
203    /* It is also possible, originally for the benefit of a version that was
204    imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
205    NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
206    automatically cut out the UTF support if PCRE is built without it. */
207    
208    #ifndef SUPPORT_UTF
209    #ifndef NOUTF
210    #define NOUTF
211    #endif
212    #endif
213    
214    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
215    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216    only from one place and is handled differently). I couldn't dream up any way of
217    using a single macro to do this in a generic way, because of the many different
218    argument requirements. We know that at least one of SUPPORT_PCRE8 and
219    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
220    use these in the definitions of generic macros.
221    
222    **** Special note about the PCHARSxxx macros: the address of the string to be
223    printed is always given as two arguments: a base address followed by an offset.
224    The base address is cast to the correct data size for 8 or 16 bit data; the
225    offset is in units of this size. If the string were given as base+offset in one
226    argument, the casting might be incorrectly applied. */
227    
228    #ifdef SUPPORT_PCRE8
229    
230    #define PCHARS8(lv, p, offset, len, f) \
231      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
232    
233    #define PCHARSV8(p, offset, len, f) \
234      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235    
236    #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237      p = read_capture_name8(p, cn8, re)
238    
239    #define STRLEN8(p) ((int)strlen((char *)p))
240    
241    #define SET_PCRE_CALLOUT8(callout) \
242      pcre_callout = callout
243    
244    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
245       pcre_assign_jit_stack(extra, callback, userdata)
246    
247    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
248      re = pcre_compile((char *)pat, options, error, erroffset, tables)
249    
250    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251        namesptr, cbuffer, size) \
252      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
253        (char *)namesptr, cbuffer, size)
254    
255    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
256      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
257    
258    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259        offsets, size_offsets, workspace, size_workspace) \
260      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
261        offsets, size_offsets, workspace, size_workspace)
262    
263    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
264        offsets, size_offsets) \
265      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
266        offsets, size_offsets)
267    
268    #define PCRE_FREE_STUDY8(extra) \
269      pcre_free_study(extra)
270    
271    #define PCRE_FREE_SUBSTRING8(substring) \
272      pcre_free_substring(substring)
273    
274    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
275      pcre_free_substring_list(listptr)
276    
277    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
278        getnamesptr, subsptr) \
279      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
280        (char *)getnamesptr, subsptr)
281    
282    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
283      n = pcre_get_stringnumber(re, (char *)ptr)
284    
285    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
286      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
287    
288    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
289      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
290    
291    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
292      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
293    
294    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
295      pcre_printint(re, outfile, debug_lengths)
296    
297    #define PCRE_STUDY8(extra, re, options, error) \
298      extra = pcre_study(re, options, error)
299    
300    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
301      pcre_jit_stack_alloc(startsize, maxsize)
302    
303    #define PCRE_JIT_STACK_FREE8(stack) \
304      pcre_jit_stack_free(stack)
305    
306    #endif /* SUPPORT_PCRE8 */
307    
308    /* -----------------------------------------------------------*/
309    
310    #ifdef SUPPORT_PCRE16
311    
312    #define PCHARS16(lv, p, offset, len, f) \
313      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
314    
315    #define PCHARSV16(p, offset, len, f) \
316      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
317    
318    #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319      p = read_capture_name16(p, cn16, re)
320    
321    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
322    
323    #define SET_PCRE_CALLOUT16(callout) \
324      pcre16_callout = (int (*)(pcre16_callout_block *))callout
325    
326    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327      pcre16_assign_jit_stack((pcre16_extra *)extra, \
328        (pcre16_jit_callback)callback, userdata)
329    
330    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332        tables)
333    
334    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335        namesptr, cbuffer, size) \
336      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338    
339    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341        (PCRE_UCHAR16 *)cbuffer, size/2)
342    
343    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344        offsets, size_offsets, workspace, size_workspace) \
345      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347        workspace, size_workspace)
348    
349    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350        offsets, size_offsets) \
351      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352        len, start_offset, options, offsets, size_offsets)
353    
354    #define PCRE_FREE_STUDY16(extra) \
355      pcre16_free_study((pcre16_extra *)extra)
356    
357    #define PCRE_FREE_SUBSTRING16(substring) \
358      pcre16_free_substring((PCRE_SPTR16)substring)
359    
360    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362    
363    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364        getnamesptr, subsptr) \
365      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367    
368    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370    
371    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373        (PCRE_SPTR16 *)(void*)subsptr)
374    
375    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377        (PCRE_SPTR16 **)(void*)listptr)
378    
379    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381        tables)
382    
383    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384      pcre16_printint(re, outfile, debug_lengths)
385    
386    #define PCRE_STUDY16(extra, re, options, error) \
387      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388    
389    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391    
392    #define PCRE_JIT_STACK_FREE16(stack) \
393      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394    
395    #endif /* SUPPORT_PCRE16 */
396    
397    /* -----------------------------------------------------------*/
398    
399    #ifdef SUPPORT_PCRE32
400    
401    #define PCHARS32(lv, p, offset, len, f) \
402      lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)
403    
404    #define PCHARSV32(p, offset, len, f) \
405      (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)
406    
407    #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408      p = read_capture_name32(p, cn32, re)
409    
410    #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411    
412    #define SET_PCRE_CALLOUT32(callout) \
413      pcre32_callout = (int (*)(pcre32_callout_block *))callout
414    
415    #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416      pcre32_assign_jit_stack((pcre32_extra *)extra, \
417        (pcre32_jit_callback)callback, userdata)
418    
419    #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420      re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421        tables)
422    
423    #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424        namesptr, cbuffer, size) \
425      rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426        count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427    
428    #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429      rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430        (PCRE_UCHAR32 *)cbuffer, size/2)
431    
432    #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433        offsets, size_offsets, workspace, size_workspace) \
434      count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435        (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436        workspace, size_workspace)
437    
438    #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439        offsets, size_offsets) \
440      count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441        len, start_offset, options, offsets, size_offsets)
442    
443    #define PCRE_FREE_STUDY32(extra) \
444      pcre32_free_study((pcre32_extra *)extra)
445    
446    #define PCRE_FREE_SUBSTRING32(substring) \
447      pcre32_free_substring((PCRE_SPTR32)substring)
448    
449    #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450      pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
451    
452    #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453        getnamesptr, subsptr) \
454      rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455        count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456    
457    #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458      n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
459    
460    #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461      rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462        (PCRE_SPTR32 *)(void*)subsptr)
463    
464    #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465      rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466        (PCRE_SPTR32 **)(void*)listptr)
467    
468    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469      rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470        tables)
471    
472    #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473      pcre32_printint(re, outfile, debug_lengths)
474    
475    #define PCRE_STUDY32(extra, re, options, error) \
476      extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477    
478    #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479      (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480    
481    #define PCRE_JIT_STACK_FREE32(stack) \
482      pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483    
484    #endif /* SUPPORT_PCRE32 */
485    
486    
487    /* ----- Both modes are supported; a runtime test is needed, except for
488    pcre_config(), and the JIT stack functions, when it doesn't matter which
489    version is called. ----- */
490    
491    enum {
492      PCRE8_MODE,
493      PCRE16_MODE,
494      PCRE32_MODE
495    };
496    
497    #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2
498    
499    #define CHAR_SIZE (1 << pcre_mode)
500    
501    #define PCHARS(lv, p, offset, len, f) \
502      if (pcre_mode == PCRE32_MODE) \
503        PCHARS32(lv, p, offset, len, f); \
504      else if (pcre_mode == PCRE16_MODE) \
505        PCHARS16(lv, p, offset, len, f); \
506      else \
507        PCHARS8(lv, p, offset, len, f)
508    
509    #define PCHARSV(p, offset, len, f) \
510      if (pcre_mode == PCRE32_MODE) \
511        PCHARSV32(p, offset, len, f); \
512      else if (pcre_mode == PCRE16_MODE) \
513        PCHARSV16(p, offset, len, f); \
514      else \
515        PCHARSV8(p, offset, len, f)
516    
517    #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
518      if (pcre_mode == PCRE32_MODE) \
519        READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
520      else if (pcre_mode == PCRE16_MODE) \
521        READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
522      else \
523        READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
524    
525    #define SET_PCRE_CALLOUT(callout) \
526      if (pcre_mode == PCRE32_MODE) \
527        SET_PCRE_CALLOUT32(callout); \
528      else if (pcre_mode == PCRE16_MODE) \
529        SET_PCRE_CALLOUT16(callout); \
530      else \
531        SET_PCRE_CALLOUT8(callout)
532    
533    #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
534    
535    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
536      if (pcre_mode == PCRE32_MODE) \
537        PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
538      else if (pcre_mode == PCRE16_MODE) \
539        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
540      else \
541        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
542    
543    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
544      if (pcre_mode == PCRE32_MODE) \
545        PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
546      else if (pcre_mode == PCRE16_MODE) \
547        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
548      else \
549        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
550    
551    #define PCRE_CONFIG pcre_config
552    
553    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
554        namesptr, cbuffer, size) \
555      if (pcre_mode == PCRE32_MODE) \
556        PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
557          namesptr, cbuffer, size); \
558      else if (pcre_mode == PCRE16_MODE) \
559        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
560          namesptr, cbuffer, size); \
561      else \
562        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
563          namesptr, cbuffer, size)
564    
565    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
566      if (pcre_mode == PCRE32_MODE) \
567        PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
568      else if (pcre_mode == PCRE16_MODE) \
569        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
570      else \
571        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
572    
573    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
574        offsets, size_offsets, workspace, size_workspace) \
575      if (pcre_mode == PCRE32_MODE) \
576        PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
577          offsets, size_offsets, workspace, size_workspace); \
578      else if (pcre_mode == PCRE16_MODE) \
579        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
580          offsets, size_offsets, workspace, size_workspace); \
581      else \
582        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
583          offsets, size_offsets, workspace, size_workspace)
584    
585    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
586        offsets, size_offsets) \
587      if (pcre_mode == PCRE32_MODE) \
588        PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
589          offsets, size_offsets); \
590      else if (pcre_mode == PCRE16_MODE) \
591        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
592          offsets, size_offsets); \
593      else \
594        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
595          offsets, size_offsets)
596    
597    #define PCRE_FREE_STUDY(extra) \
598      if (pcre_mode == PCRE32_MODE) \
599        PCRE_FREE_STUDY32(extra); \
600      else if (pcre_mode == PCRE16_MODE) \
601        PCRE_FREE_STUDY16(extra); \
602      else \
603        PCRE_FREE_STUDY8(extra)
604    
605    #define PCRE_FREE_SUBSTRING(substring) \
606      if (pcre_mode == PCRE32_MODE) \
607        PCRE_FREE_SUBSTRING32(substring); \
608      else if (pcre_mode == PCRE16_MODE) \
609        PCRE_FREE_SUBSTRING16(substring); \
610      else \
611        PCRE_FREE_SUBSTRING8(substring)
612    
613    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
614      if (pcre_mode == PCRE32_MODE) \
615        PCRE_FREE_SUBSTRING_LIST32(listptr); \
616      else if (pcre_mode == PCRE16_MODE) \
617        PCRE_FREE_SUBSTRING_LIST16(listptr); \
618      else \
619        PCRE_FREE_SUBSTRING_LIST8(listptr)
620    
621    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
622        getnamesptr, subsptr) \
623      if (pcre_mode == PCRE32_MODE) \
624        PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
625          getnamesptr, subsptr); \
626      else if (pcre_mode == PCRE16_MODE) \
627        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
628          getnamesptr, subsptr); \
629      else \
630        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
631          getnamesptr, subsptr)
632    
633    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
634      if (pcre_mode == PCRE32_MODE) \
635        PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
636      else if (pcre_mode == PCRE16_MODE) \
637        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
638      else \
639        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
640    
641    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
642      if (pcre_mode == PCRE32_MODE) \
643        PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
644      else if (pcre_mode == PCRE16_MODE) \
645        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
646      else \
647        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
648    
649    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
650      if (pcre_mode == PCRE32_MODE) \
651        PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
652      else if (pcre_mode == PCRE16_MODE) \
653        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
654      else \
655        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
656    
657    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
658      (pcre_mode == PCRE32_MODE ? \
659         PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
660        : pcre_mode == PCRE16_MODE ? \
661          PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
662          : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
663    
664    #define PCRE_JIT_STACK_FREE(stack) \
665      if (pcre_mode == PCRE32_MODE) \
666        PCRE_JIT_STACK_FREE32(stack); \
667      else if (pcre_mode == PCRE16_MODE) \
668        PCRE_JIT_STACK_FREE16(stack); \
669      else \
670        PCRE_JIT_STACK_FREE8(stack)
671    
672    #define PCRE_MAKETABLES \
673      (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
674    
675    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
676      if (pcre_mode == PCRE32_MODE) \
677        PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
678      else if (pcre_mode == PCRE16_MODE) \
679        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
680      else \
681        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
682    
683    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
684      if (pcre_mode == PCRE32_MODE) \
685        PCRE_PRINTINT32(re, outfile, debug_lengths); \
686      else if (pcre_mode == PCRE16_MODE) \
687        PCRE_PRINTINT16(re, outfile, debug_lengths); \
688      else \
689        PCRE_PRINTINT8(re, outfile, debug_lengths)
690    
691    #define PCRE_STUDY(extra, re, options, error) \
692      if (pcre_mode == PCRE32_MODE) \
693        PCRE_STUDY32(extra, re, options, error); \
694      else if (pcre_mode == PCRE16_MODE) \
695        PCRE_STUDY16(extra, re, options, error); \
696      else \
697        PCRE_STUDY8(extra, re, options, error)
698    
699    /* ----- Only 8-bit mode is supported ----- */
700    
701    #elif defined SUPPORT_PCRE8
702    #define CHAR_SIZE                 1
703    #define PCHARS                    PCHARS8
704    #define PCHARSV                   PCHARSV8
705    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
706    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
707    #define STRLEN                    STRLEN8
708    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
709    #define PCRE_COMPILE              PCRE_COMPILE8
710    #define PCRE_CONFIG               pcre_config
711    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
712    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
713    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
714    #define PCRE_EXEC                 PCRE_EXEC8
715    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
716    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
717    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
718    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
719    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
720    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
721    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
722    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
723    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
724    #define PCRE_MAKETABLES           pcre_maketables()
725    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
726    #define PCRE_PRINTINT             PCRE_PRINTINT8
727    #define PCRE_STUDY                PCRE_STUDY8
728    
729    /* ----- Only 16-bit mode is supported ----- */
730    
731    #elif defined SUPPORT_PCRE16
732    #define CHAR_SIZE                 2
733    #define PCHARS                    PCHARS16
734    #define PCHARSV                   PCHARSV16
735    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
736    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
737    #define STRLEN                    STRLEN16
738    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
739    #define PCRE_COMPILE              PCRE_COMPILE16
740    #define PCRE_CONFIG               pcre16_config
741    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
742    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
743    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
744    #define PCRE_EXEC                 PCRE_EXEC16
745    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
746    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
747    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
748    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
749    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
750    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
751    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
752    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
753    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
754    #define PCRE_MAKETABLES           pcre16_maketables()
755    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
756    #define PCRE_PRINTINT             PCRE_PRINTINT16
757    #define PCRE_STUDY                PCRE_STUDY16
758    
759    /* ----- Only 32-bit mode is supported ----- */
760    
761    #elif defined SUPPORT_PCRE32
762    #define CHAR_SIZE                 4
763    #define PCHARS                    PCHARS32
764    #define PCHARSV                   PCHARSV32
765    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME32
766    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT32
767    #define STRLEN                    STRLEN32
768    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK32
769    #define PCRE_COMPILE              PCRE_COMPILE32
770    #define PCRE_CONFIG               pcre32_config
771    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
772    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING32
773    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC32
774    #define PCRE_EXEC                 PCRE_EXEC32
775    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY32
776    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING32
777    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST32
778    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING32
779    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER32
780    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING32
781    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST32
782    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC32
783    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE32
784    #define PCRE_MAKETABLES           pcre32_maketables()
785    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
786    #define PCRE_PRINTINT             PCRE_PRINTINT32
787    #define PCRE_STUDY                PCRE_STUDY32
788    
789    #endif
790    
791    /* ----- End of mode-specific function call macros ----- */
792    
793    
794    /* Other parameters */
795    
796  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
797  #ifdef CLK_TCK  #ifdef CLK_TCK
798  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 801  Makefile. */
801  #endif  #endif
802  #endif  #endif
803    
804  #define LOOPREPEAT 20000  #if !defined NODFA
805    #define DFA_WS_DIMENSION 1000
806    #endif
807    
808    /* This is the default loop count for timing. */
809    
810    #define LOOPREPEAT 500000
811    
812    /* Static variables */
813    
814  static FILE *outfile;  static FILE *outfile;
815  static int log_store = 0;  static int log_store = 0;
816    static int callout_count;
817    static int callout_extra;
818    static int callout_fail_count;
819    static int callout_fail_id;
820    static int debug_lengths;
821    static int first_callout;
822    static int jit_was_used;
823    static int locale_set = 0;
824    static int show_malloc;
825    static int use_utf;
826  static size_t gotten_store;  static size_t gotten_store;
827    static size_t first_gotten_store = 0;
828    static const unsigned char *last_callout_mark = NULL;
829    
830    /* The buffers grow automatically if very long input lines are encountered. */
831    
832    static int buffer_size = 50000;
833    static pcre_uint8 *buffer = NULL;
834    static pcre_uint8 *dbuffer = NULL;
835    static pcre_uint8 *pbuffer = NULL;
836    
837    /* Another buffer is needed translation to 16-bit character strings. It will
838    obtained and extended as required. */
839    
840    #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
841    
842    /* We need the table of operator lengths that is used for 16/32-bit compiling, in
843    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
844    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
845    appropriately for the 16/32-bit world. Just as a safety check, make sure that
846    COMPILE_PCRE[16|32] is *not* set. */
847    
848    #ifdef COMPILE_PCRE16
849    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
850    #endif
851    
852    #ifdef COMPILE_PCRE32
853    #error COMPILE_PCRE32 must not be set when compiling pcretest.c
854    #endif
855    
856    #if LINK_SIZE == 2
857    #undef LINK_SIZE
858    #define LINK_SIZE 1
859    #elif LINK_SIZE == 3 || LINK_SIZE == 4
860    #undef LINK_SIZE
861    #define LINK_SIZE 2
862    #else
863    #error LINK_SIZE must be either 2, 3, or 4
864    #endif
865    
866    #undef IMM2_SIZE
867    #define IMM2_SIZE 1
868    
869    #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
870    
871    #ifdef SUPPORT_PCRE16
872    static int buffer16_size = 0;
873    static pcre_uint16 *buffer16 = NULL;
874    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
875    #endif  /* SUPPORT_PCRE16 */
876    
877    #ifdef SUPPORT_PCRE32
878    static int buffer32_size = 0;
879    static pcre_uint32 *buffer32 = NULL;
880    static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
881    #endif  /* SUPPORT_PCRE32 */
882    
883    /* If we have 8-bit support, default to it; if there is also
884    16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
885    there must be 16-or 32-bit support, so default it to 1. */
886    
887    #if defined SUPPORT_PCRE8
888    static int pcre_mode = PCRE8_MODE;
889    #elif defined SUPPORT_PCRE16
890    static int pcre_mode = PCRE16_MODE;
891    #elif defined SUPPORT_PCRE32
892    static int pcre_mode = PCRE32_MODE;
893    #endif
894    
895    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
896    
897    static int jit_study_bits[] =
898      {
899      PCRE_STUDY_JIT_COMPILE,
900      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
901      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
902      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
903      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
904      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
905      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
906        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
907    };
908    
909    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
910      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
911    
912    /* Textual explanations for runtime error codes */
913    
914    static const char *errtexts[] = {
915      NULL,  /* 0 is no error */
916      NULL,  /* NOMATCH is handled specially */
917      "NULL argument passed",
918      "bad option value",
919      "magic number missing",
920      "unknown opcode - pattern overwritten?",
921      "no more memory",
922      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
923      "match limit exceeded",
924      "callout error code",
925      NULL,  /* BADUTF8/16 is handled specially */
926      NULL,  /* BADUTF8/16 offset is handled specially */
927      NULL,  /* PARTIAL is handled specially */
928      "not used - internal error",
929      "internal error - pattern overwritten?",
930      "bad count value",
931      "item unsupported for DFA matching",
932      "backreference condition or recursion test not supported for DFA matching",
933      "match limit not supported for DFA matching",
934      "workspace size exceeded in DFA matching",
935      "too much recursion for DFA matching",
936      "recursion limit exceeded",
937      "not used - internal error",
938      "invalid combination of newline options",
939      "bad offset value",
940      NULL,  /* SHORTUTF8/16 is handled specially */
941      "nested recursion at the same subject position",
942      "JIT stack limit reached",
943      "pattern compiled in wrong mode: 8-bit/16-bit error",
944      "pattern compiled with other endianness",
945      "invalid data in workspace for DFA restart"
946    };
947    
948    
949  /* Debugging function to print the internal form of the regex. This is the same  /*************************************************
950  code as contained in pcre.c under the DEBUG macro. */  *         Alternate character tables             *
951    *************************************************/
952    
953  static const char *OP_names[] = {  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
954    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  using the default tables of the library. However, the T option can be used to
955    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  select alternate sets of tables, for different kinds of testing. Note also that
956    "Opt", "^", "$", "Any", "chars", "not",  the L (locale) option also adjusts the tables. */
957    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
958    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  /* This is the set of tables distributed as default with PCRE. It recognizes
959    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  only ASCII characters. */
960    "*", "*?", "+", "+?", "?", "??", "{", "{",  
961    "class", "Ref", "Recurse",  static const pcre_uint8 tables0[] = {
962    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
963    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  /* This table is a lower casing table. */
964    "Brazero", "Braminzero", "Bra"  
965        0,  1,  2,  3,  4,  5,  6,  7,
966        8,  9, 10, 11, 12, 13, 14, 15,
967       16, 17, 18, 19, 20, 21, 22, 23,
968       24, 25, 26, 27, 28, 29, 30, 31,
969       32, 33, 34, 35, 36, 37, 38, 39,
970       40, 41, 42, 43, 44, 45, 46, 47,
971       48, 49, 50, 51, 52, 53, 54, 55,
972       56, 57, 58, 59, 60, 61, 62, 63,
973       64, 97, 98, 99,100,101,102,103,
974      104,105,106,107,108,109,110,111,
975      112,113,114,115,116,117,118,119,
976      120,121,122, 91, 92, 93, 94, 95,
977       96, 97, 98, 99,100,101,102,103,
978      104,105,106,107,108,109,110,111,
979      112,113,114,115,116,117,118,119,
980      120,121,122,123,124,125,126,127,
981      128,129,130,131,132,133,134,135,
982      136,137,138,139,140,141,142,143,
983      144,145,146,147,148,149,150,151,
984      152,153,154,155,156,157,158,159,
985      160,161,162,163,164,165,166,167,
986      168,169,170,171,172,173,174,175,
987      176,177,178,179,180,181,182,183,
988      184,185,186,187,188,189,190,191,
989      192,193,194,195,196,197,198,199,
990      200,201,202,203,204,205,206,207,
991      208,209,210,211,212,213,214,215,
992      216,217,218,219,220,221,222,223,
993      224,225,226,227,228,229,230,231,
994      232,233,234,235,236,237,238,239,
995      240,241,242,243,244,245,246,247,
996      248,249,250,251,252,253,254,255,
997    
998    /* This table is a case flipping table. */
999    
1000        0,  1,  2,  3,  4,  5,  6,  7,
1001        8,  9, 10, 11, 12, 13, 14, 15,
1002       16, 17, 18, 19, 20, 21, 22, 23,
1003       24, 25, 26, 27, 28, 29, 30, 31,
1004       32, 33, 34, 35, 36, 37, 38, 39,
1005       40, 41, 42, 43, 44, 45, 46, 47,
1006       48, 49, 50, 51, 52, 53, 54, 55,
1007       56, 57, 58, 59, 60, 61, 62, 63,
1008       64, 97, 98, 99,100,101,102,103,
1009      104,105,106,107,108,109,110,111,
1010      112,113,114,115,116,117,118,119,
1011      120,121,122, 91, 92, 93, 94, 95,
1012       96, 65, 66, 67, 68, 69, 70, 71,
1013       72, 73, 74, 75, 76, 77, 78, 79,
1014       80, 81, 82, 83, 84, 85, 86, 87,
1015       88, 89, 90,123,124,125,126,127,
1016      128,129,130,131,132,133,134,135,
1017      136,137,138,139,140,141,142,143,
1018      144,145,146,147,148,149,150,151,
1019      152,153,154,155,156,157,158,159,
1020      160,161,162,163,164,165,166,167,
1021      168,169,170,171,172,173,174,175,
1022      176,177,178,179,180,181,182,183,
1023      184,185,186,187,188,189,190,191,
1024      192,193,194,195,196,197,198,199,
1025      200,201,202,203,204,205,206,207,
1026      208,209,210,211,212,213,214,215,
1027      216,217,218,219,220,221,222,223,
1028      224,225,226,227,228,229,230,231,
1029      232,233,234,235,236,237,238,239,
1030      240,241,242,243,244,245,246,247,
1031      248,249,250,251,252,253,254,255,
1032    
1033    /* This table contains bit maps for various character classes. Each map is 32
1034    bytes long and the bits run from the least significant end of each byte. The
1035    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1036    graph, print, punct, and cntrl. Other classes are built from combinations. */
1037    
1038      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1039      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1040      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1041      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1042    
1043      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1044      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1045      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1046      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1047    
1048      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1049      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1050      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1051      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1052    
1053      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1054      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1055      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1056      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1057    
1058      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1059      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1060      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1061      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1062    
1063      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1064      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1065      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1066      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1067    
1068      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1069      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1070      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1071      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1072    
1073      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1074      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1075      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1076      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1077    
1078      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1079      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1080      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1081      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1082    
1083      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1084      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1085      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1086      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1087    
1088    /* This table identifies various classes of character by individual bits:
1089      0x01   white space character
1090      0x02   letter
1091      0x04   decimal digit
1092      0x08   hexadecimal digit
1093      0x10   alphanumeric or '_'
1094      0x80   regular expression metacharacter or binary zero
1095    */
1096    
1097      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
1098      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
1099      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
1100      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
1101      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
1102      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
1103      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
1104      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
1105      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
1106      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
1107      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
1108      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
1109      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
1110      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
1111      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
1112      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
1113      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1114      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1115      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1116      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1117      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1118      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1119      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1120      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1121      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1122      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1123      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1124      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1125      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1126      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1127      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1128      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1129    
1130    /* This is a set of tables that came orginally from a Windows user. It seems to
1131    be at least an approximation of ISO 8859. In particular, there are characters
1132    greater than 128 that are marked as spaces, letters, etc. */
1133    
1134    static const pcre_uint8 tables1[] = {
1135    0,1,2,3,4,5,6,7,
1136    8,9,10,11,12,13,14,15,
1137    16,17,18,19,20,21,22,23,
1138    24,25,26,27,28,29,30,31,
1139    32,33,34,35,36,37,38,39,
1140    40,41,42,43,44,45,46,47,
1141    48,49,50,51,52,53,54,55,
1142    56,57,58,59,60,61,62,63,
1143    64,97,98,99,100,101,102,103,
1144    104,105,106,107,108,109,110,111,
1145    112,113,114,115,116,117,118,119,
1146    120,121,122,91,92,93,94,95,
1147    96,97,98,99,100,101,102,103,
1148    104,105,106,107,108,109,110,111,
1149    112,113,114,115,116,117,118,119,
1150    120,121,122,123,124,125,126,127,
1151    128,129,130,131,132,133,134,135,
1152    136,137,138,139,140,141,142,143,
1153    144,145,146,147,148,149,150,151,
1154    152,153,154,155,156,157,158,159,
1155    160,161,162,163,164,165,166,167,
1156    168,169,170,171,172,173,174,175,
1157    176,177,178,179,180,181,182,183,
1158    184,185,186,187,188,189,190,191,
1159    224,225,226,227,228,229,230,231,
1160    232,233,234,235,236,237,238,239,
1161    240,241,242,243,244,245,246,215,
1162    248,249,250,251,252,253,254,223,
1163    224,225,226,227,228,229,230,231,
1164    232,233,234,235,236,237,238,239,
1165    240,241,242,243,244,245,246,247,
1166    248,249,250,251,252,253,254,255,
1167    0,1,2,3,4,5,6,7,
1168    8,9,10,11,12,13,14,15,
1169    16,17,18,19,20,21,22,23,
1170    24,25,26,27,28,29,30,31,
1171    32,33,34,35,36,37,38,39,
1172    40,41,42,43,44,45,46,47,
1173    48,49,50,51,52,53,54,55,
1174    56,57,58,59,60,61,62,63,
1175    64,97,98,99,100,101,102,103,
1176    104,105,106,107,108,109,110,111,
1177    112,113,114,115,116,117,118,119,
1178    120,121,122,91,92,93,94,95,
1179    96,65,66,67,68,69,70,71,
1180    72,73,74,75,76,77,78,79,
1181    80,81,82,83,84,85,86,87,
1182    88,89,90,123,124,125,126,127,
1183    128,129,130,131,132,133,134,135,
1184    136,137,138,139,140,141,142,143,
1185    144,145,146,147,148,149,150,151,
1186    152,153,154,155,156,157,158,159,
1187    160,161,162,163,164,165,166,167,
1188    168,169,170,171,172,173,174,175,
1189    176,177,178,179,180,181,182,183,
1190    184,185,186,187,188,189,190,191,
1191    224,225,226,227,228,229,230,231,
1192    232,233,234,235,236,237,238,239,
1193    240,241,242,243,244,245,246,215,
1194    248,249,250,251,252,253,254,223,
1195    192,193,194,195,196,197,198,199,
1196    200,201,202,203,204,205,206,207,
1197    208,209,210,211,212,213,214,247,
1198    216,217,218,219,220,221,222,255,
1199    0,62,0,0,1,0,0,0,
1200    0,0,0,0,0,0,0,0,
1201    32,0,0,0,1,0,0,0,
1202    0,0,0,0,0,0,0,0,
1203    0,0,0,0,0,0,255,3,
1204    126,0,0,0,126,0,0,0,
1205    0,0,0,0,0,0,0,0,
1206    0,0,0,0,0,0,0,0,
1207    0,0,0,0,0,0,255,3,
1208    0,0,0,0,0,0,0,0,
1209    0,0,0,0,0,0,12,2,
1210    0,0,0,0,0,0,0,0,
1211    0,0,0,0,0,0,0,0,
1212    254,255,255,7,0,0,0,0,
1213    0,0,0,0,0,0,0,0,
1214    255,255,127,127,0,0,0,0,
1215    0,0,0,0,0,0,0,0,
1216    0,0,0,0,254,255,255,7,
1217    0,0,0,0,0,4,32,4,
1218    0,0,0,128,255,255,127,255,
1219    0,0,0,0,0,0,255,3,
1220    254,255,255,135,254,255,255,7,
1221    0,0,0,0,0,4,44,6,
1222    255,255,127,255,255,255,127,255,
1223    0,0,0,0,254,255,255,255,
1224    255,255,255,255,255,255,255,127,
1225    0,0,0,0,254,255,255,255,
1226    255,255,255,255,255,255,255,255,
1227    0,2,0,0,255,255,255,255,
1228    255,255,255,255,255,255,255,127,
1229    0,0,0,0,255,255,255,255,
1230    255,255,255,255,255,255,255,255,
1231    0,0,0,0,254,255,0,252,
1232    1,0,0,248,1,0,0,120,
1233    0,0,0,0,254,255,255,255,
1234    0,0,128,0,0,0,128,0,
1235    255,255,255,255,0,0,0,0,
1236    0,0,0,0,0,0,0,128,
1237    255,255,255,255,0,0,0,0,
1238    0,0,0,0,0,0,0,0,
1239    128,0,0,0,0,0,0,0,
1240    0,1,1,0,1,1,0,0,
1241    0,0,0,0,0,0,0,0,
1242    0,0,0,0,0,0,0,0,
1243    1,0,0,0,128,0,0,0,
1244    128,128,128,128,0,0,128,0,
1245    28,28,28,28,28,28,28,28,
1246    28,28,0,0,0,0,0,128,
1247    0,26,26,26,26,26,26,18,
1248    18,18,18,18,18,18,18,18,
1249    18,18,18,18,18,18,18,18,
1250    18,18,18,128,128,0,128,16,
1251    0,26,26,26,26,26,26,18,
1252    18,18,18,18,18,18,18,18,
1253    18,18,18,18,18,18,18,18,
1254    18,18,18,128,128,0,0,0,
1255    0,0,0,0,0,1,0,0,
1256    0,0,0,0,0,0,0,0,
1257    0,0,0,0,0,0,0,0,
1258    0,0,0,0,0,0,0,0,
1259    1,0,0,0,0,0,0,0,
1260    0,0,18,0,0,0,0,0,
1261    0,0,20,20,0,18,0,0,
1262    0,20,18,0,0,0,0,0,
1263    18,18,18,18,18,18,18,18,
1264    18,18,18,18,18,18,18,18,
1265    18,18,18,18,18,18,18,0,
1266    18,18,18,18,18,18,18,18,
1267    18,18,18,18,18,18,18,18,
1268    18,18,18,18,18,18,18,18,
1269    18,18,18,18,18,18,18,0,
1270    18,18,18,18,18,18,18,18
1271  };  };
1272    
1273    
1274  static void print_internals(pcre *re)  
1275    
1276    #ifndef HAVE_STRERROR
1277    /*************************************************
1278    *     Provide strerror() for non-ANSI libraries  *
1279    *************************************************/
1280    
1281    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1282    in their libraries, but can provide the same facility by this simple
1283    alternative function. */
1284    
1285    extern int   sys_nerr;
1286    extern char *sys_errlist[];
1287    
1288    char *
1289    strerror(int n)
1290  {  {
1291  unsigned char *code = ((real_pcre *)re)->code;  if (n < 0 || n >= sys_nerr) return "unknown error number";
1292    return sys_errlist[n];
1293    }
1294    #endif /* HAVE_STRERROR */
1295    
1296    
1297    
1298    /*************************************************
1299    *       Print newline configuration              *
1300    *************************************************/
1301    
1302  fprintf(outfile, "------------------------------------------------------------------\n");  /*
1303    Arguments:
1304      rc         the return code from PCRE_CONFIG_NEWLINE
1305      isc        TRUE if called from "-C newline"
1306    Returns:     nothing
1307    */
1308    
1309  for(;;)  static void
1310    print_newline_config(int rc, BOOL isc)
1311    {
1312    const char *s = NULL;
1313    if (!isc) printf("  Newline sequence is ");
1314    switch(rc)
1315    {    {
1316    int c;    case CHAR_CR: s = "CR"; break;
1317    int charlength;    case CHAR_LF: s = "LF"; break;
1318      case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1319      case -1: s = "ANY"; break;
1320      case -2: s = "ANYCRLF"; break;
1321    
1322      default:
1323      printf("a non-standard value: 0x%04x\n", rc);
1324      return;
1325      }
1326    
1327    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  printf("%s\n", s);
1328    }
1329    
   if (*code >= OP_BRA)  
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
1330    
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
1331    
1332      case OP_OPT:  /*************************************************
1333      fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  *         JIT memory callback                    *
1334      code++;  *************************************************/
     break;  
1335    
1336      case OP_COND:  static pcre_jit_stack* jit_callback(void *arg)
1337      fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  {
1338      code += 2;  jit_was_used = TRUE;
1339      break;  return (pcre_jit_stack *)arg;
1340    }
1341    
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
1342    
1343      case OP_CHARS:  #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1344      charlength = *(++code);  /*************************************************
1345      fprintf(outfile, "%3d ", charlength);  *            Convert UTF-8 string to value       *
1346      while (charlength-- > 0)  *************************************************/
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
1347    
1348      case OP_KETRMAX:  /* This function takes one or more bytes that represents a UTF-8 character,
1349      case OP_KETRMIN:  and returns the value of the character.
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
1350    
1351      case OP_REVERSE:  Argument:
1352      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);    utf8bytes   a pointer to the byte vector
1353      code += 2;    vptr        a pointer to an int to receive the value
1354      break;  
1355    Returns:      >  0 => the number of bytes consumed
1356                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1357    */
1358    
1359      case OP_STAR:  static int
1360      case OP_MINSTAR:  utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1361      case OP_PLUS:  {
1362      case OP_MINPLUS:  int c = *utf8bytes++;
1363      case OP_QUERY:  int d = c;
1364      case OP_MINQUERY:  int i, j, s;
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
1365    
1366      case OP_EXACT:  for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1367      case OP_UPTO:    {
1368      case OP_MINUPTO:    if ((d & 0x80) == 0) break;
1369      if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);    d <<= 1;
1370        else fprintf(outfile, "    \\x%02x{", c);    }
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
1371    
1372      case OP_TYPEEXACT:  if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1373      case OP_TYPEUPTO:  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
1374    
1375      case OP_NOT:  /* i now has a value in the range 1-5 */
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
1376    
1377      case OP_NOTSTAR:  s = 6*i;
1378      case OP_NOTMINSTAR:  d = (c & utf8_table3[i]) << s;
1379      case OP_NOTPLUS:  
1380      case OP_NOTMINPLUS:  for (j = 0; j < i; j++)
1381      case OP_NOTQUERY:    {
1382      case OP_NOTMINQUERY:    c = *utf8bytes++;
1383      if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);    if ((c & 0xc0) != 0x80) return -(j+1);
1384        else fprintf(outfile, "    [^\\x%02x]", c);    s -= 6;
1385      fprintf(outfile, "%s", OP_names[*code++]);    d |= (c & 0x3f) << s;
1386      break;    }
1387    
1388    /* Check that encoding was the correct unique one */
1389    
1390    for (j = 0; j < utf8_table1_size; j++)
1391      if (d <= utf8_table1[j]) break;
1392    if (j != i) return -(i+1);
1393    
1394    /* Valid value */
1395    
1396    *vptr = d;
1397    return i+1;
1398    }
1399    #endif /* NOUTF || SUPPORT_PCRE16 */
1400    
1401    
1402    
1403    #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1404    /*************************************************
1405    *       Convert character value to UTF-8         *
1406    *************************************************/
1407    
1408    /* This function takes an integer value in the range 0 - 0x7fffffff
1409    and encodes it as a UTF-8 character in 0 to 6 bytes.
1410    
1411    Arguments:
1412      cvalue     the character value
1413      utf8bytes  pointer to buffer for result - at least 6 bytes long
1414    
1415    Returns:     number of characters placed in the buffer
1416    */
1417    
1418    static int
1419    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1420    {
1421    register int i, j;
1422    for (i = 0; i < utf8_table1_size; i++)
1423      if (cvalue <= utf8_table1[i]) break;
1424    utf8bytes += i;
1425    for (j = i; j > 0; j--)
1426     {
1427     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1428     cvalue >>= 6;
1429     }
1430    *utf8bytes = utf8_table2[i] | cvalue;
1431    return i + 1;
1432    }
1433    #endif
1434    
1435    
1436    #ifdef SUPPORT_PCRE16
1437    /*************************************************
1438    *         Convert a string to 16-bit             *
1439    *************************************************/
1440    
1441    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1442    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1443    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1444    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1445    result is always left in buffer16.
1446    
1447    Note that this function does not object to surrogate values. This is
1448    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1449    for the purpose of testing that they are correctly faulted.
1450    
1451    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1452    in UTF-8 so that values greater than 255 can be handled.
1453    
1454    Arguments:
1455      data       TRUE if converting a data line; FALSE for a regex
1456      p          points to a byte string
1457      utf        true if UTF-8 (to be converted to UTF-16)
1458      len        number of bytes in the string (excluding trailing zero)
1459    
1460    Returns:     number of 16-bit data items used (excluding trailing zero)
1461                 OR -1 if a UTF-8 string is malformed
1462                 OR -2 if a value > 0x10ffff is encountered
1463                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1464    */
1465    
1466    static int
1467    to16(int data, pcre_uint8 *p, int utf, int len)
1468    {
1469    pcre_uint16 *pp;
1470    
1471    if (buffer16_size < 2*len + 2)
1472      {
1473      if (buffer16 != NULL) free(buffer16);
1474      buffer16_size = 2*len + 2;
1475      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1476      if (buffer16 == NULL)
1477        {
1478        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1479        exit(1);
1480        }
1481      }
1482    
1483    pp = buffer16;
1484    
1485    if (!utf && !data)
1486      {
1487      while (len-- > 0) *pp++ = *p++;
1488      }
1489    
1490    else
1491      {
1492      int c = 0;
1493      while (len > 0)
1494        {
1495        int chlen = utf82ord(p, &c);
1496        if (chlen <= 0) return -1;
1497        if (c > 0x10ffff) return -2;
1498        p += chlen;
1499        len -= chlen;
1500        if (c < 0x10000) *pp++ = c; else
1501          {
1502          if (!utf) return -3;
1503          c -= 0x10000;
1504          *pp++ = 0xD800 | (c >> 10);
1505          *pp++ = 0xDC00 | (c & 0x3ff);
1506          }
1507        }
1508      }
1509    
1510    *pp = 0;
1511    return pp - buffer16;
1512    }
1513    #endif
1514    
1515    #ifdef SUPPORT_PCRE32
1516    /*************************************************
1517    *         Convert a string to 32-bit             *
1518    *************************************************/
1519    
1520    /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1521    8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1522    times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1523    in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1524    result is always left in buffer32.
1525    
1526    Note that this function does not object to surrogate values. This is
1527    deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1528    for the purpose of testing that they are correctly faulted.
1529    
1530    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1531    in UTF-8 so that values greater than 255 can be handled.
1532    
1533    Arguments:
1534      data       TRUE if converting a data line; FALSE for a regex
1535      p          points to a byte string
1536      utf        true if UTF-8 (to be converted to UTF-32)
1537      len        number of bytes in the string (excluding trailing zero)
1538    
1539    Returns:     number of 32-bit data items used (excluding trailing zero)
1540                 OR -1 if a UTF-8 string is malformed
1541                 OR -2 if a value > 0x10ffff is encountered
1542                 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1543    */
1544    
1545    static int
1546    to32(int data, pcre_uint8 *p, int utf, int len)
1547    {
1548    pcre_uint32 *pp;
1549    
1550    if (buffer32_size < 4*len + 4)
1551      {
1552      if (buffer32 != NULL) free(buffer32);
1553      buffer32_size = 4*len + 4;
1554      buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1555      if (buffer32 == NULL)
1556        {
1557        fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1558        exit(1);
1559        }
1560      }
1561    
1562    pp = buffer32;
1563    
1564    if (!utf && !data)
1565      {
1566      while (len-- > 0) *pp++ = *p++;
1567      }
1568    
1569    else
1570      {
1571      int c = 0;
1572      while (len > 0)
1573        {
1574        int chlen = utf82ord(p, &c);
1575        if (chlen <= 0) return -1;
1576        if (utf)
1577          {
1578          if (c > 0x10ffff) return -2;
1579          if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1580          }
1581    
1582        p += chlen;
1583        len -= chlen;
1584        *pp++ = c;
1585        }
1586      }
1587    
1588    *pp = 0;
1589    return pp - buffer32;
1590    }
1591    #endif
1592    
1593    /*************************************************
1594    *        Read or extend an input line            *
1595    *************************************************/
1596    
1597    /* Input lines are read into buffer, but both patterns and data lines can be
1598    continued over multiple input lines. In addition, if the buffer fills up, we
1599    want to automatically expand it so as to be able to handle extremely large
1600    lines that are needed for certain stress tests. When the input buffer is
1601    expanded, the other two buffers must also be expanded likewise, and the
1602    contents of pbuffer, which are a copy of the input for callouts, must be
1603    preserved (for when expansion happens for a data line). This is not the most
1604    optimal way of handling this, but hey, this is just a test program!
1605    
1606    Arguments:
1607      f            the file to read
1608      start        where in buffer to start (this *must* be within buffer)
1609      prompt       for stdin or readline()
1610    
1611    Returns:       pointer to the start of new data
1612                   could be a copy of start, or could be moved
1613                   NULL if no data read and EOF reached
1614    */
1615    
1616    static pcre_uint8 *
1617    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1618    {
1619    pcre_uint8 *here = start;
1620    
1621    for (;;)
1622      {
1623      size_t rlen = (size_t)(buffer_size - (here - buffer));
1624    
1625      if (rlen > 1000)
1626        {
1627        int dlen;
1628    
1629        /* If libreadline or libedit support is required, use readline() to read a
1630        line if the input is a terminal. Note that readline() removes the trailing
1631        newline, so we must put it back again, to be compatible with fgets(). */
1632    
1633    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1634        if (isatty(fileno(f)))
1635          {
1636          size_t len;
1637          char *s = readline(prompt);
1638          if (s == NULL) return (here == start)? NULL : start;
1639          len = strlen(s);
1640          if (len > 0) add_history(s);
1641          if (len > rlen - 1) len = rlen - 1;
1642          memcpy(here, s, len);
1643          here[len] = '\n';
1644          here[len+1] = 0;
1645          free(s);
1646          }
1647        else
1648    #endif
1649    
1650        /* Read the next line by normal means, prompting if the file is stdin. */
1651    
1652          {
1653          if (f == stdin) printf("%s", prompt);
1654          if (fgets((char *)here, rlen,  f) == NULL)
1655            return (here == start)? NULL : start;
1656          }
1657    
1658        dlen = (int)strlen((char *)here);
1659        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1660        here += dlen;
1661        }
1662    
1663      else
1664        {
1665        int new_buffer_size = 2*buffer_size;
1666        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1667        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1668        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1669    
1670        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1671          {
1672          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1673          exit(1);
1674          }
1675    
1676        memcpy(new_buffer, buffer, buffer_size);
1677        memcpy(new_pbuffer, pbuffer, buffer_size);
1678    
1679        buffer_size = new_buffer_size;
1680    
1681        start = new_buffer + (start - buffer);
1682        here = new_buffer + (here - buffer);
1683    
1684        free(buffer);
1685        free(dbuffer);
1686        free(pbuffer);
1687    
1688        buffer = new_buffer;
1689        dbuffer = new_dbuffer;
1690        pbuffer = new_pbuffer;
1691        }
1692      }
1693    
1694    return NULL;  /* Control never gets here */
1695    }
1696    
1697    
1698    
1699    /*************************************************
1700    *          Read number from string               *
1701    *************************************************/
1702    
1703    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1704    around with conditional compilation, just do the job by hand. It is only used
1705    for unpicking arguments, so just keep it simple.
1706    
1707    Arguments:
1708      str           string to be converted
1709      endptr        where to put the end pointer
1710    
1711    Returns:        the unsigned long
1712    */
1713    
1714    static int
1715    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1716    {
1717    int result = 0;
1718    while(*str != 0 && isspace(*str)) str++;
1719    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1720    *endptr = str;
1721    return(result);
1722    }
1723    
1724    
1725    
1726    /*************************************************
1727    *             Print one character                *
1728    *************************************************/
1729    
1730    /* Print a single character either literally, or as a hex escape. */
1731    
1732    static int pchar(pcre_uint32 c, FILE *f)
1733    {
1734    int n;
1735    if (PRINTOK(c))
1736      {
1737      if (f != NULL) fprintf(f, "%c", c);
1738      return 1;
1739      }
1740    
1741    if (c < 0x100)
1742      {
1743      if (use_utf)
1744        {
1745        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1746        return 6;
1747        }
1748      else
1749        {
1750        if (f != NULL) fprintf(f, "\\x%02x", c);
1751        return 4;
1752        }
1753      }
1754    
1755    if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1756    return n >= 0 ? n : 0;
1757    }
1758    
1759    
1760    
1761    #ifdef SUPPORT_PCRE8
1762    /*************************************************
1763    *         Print 8-bit character string           *
1764    *************************************************/
1765    
1766    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1767    If handed a NULL file, just counts chars without printing. */
1768    
1769    static int pchars(pcre_uint8 *p, int length, FILE *f)
1770    {
1771    int c = 0;
1772    int yield = 0;
1773    
1774    if (length < 0)
1775      length = strlen((char *)p);
1776    
1777    while (length-- > 0)
1778      {
1779    #if !defined NOUTF
1780      if (use_utf)
1781        {
1782        int rc = utf82ord(p, &c);
1783        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1784          {
1785          length -= rc - 1;
1786          p += rc;
1787          yield += pchar(c, f);
1788          continue;
1789          }
1790        }
1791    #endif
1792      c = *p++;
1793      yield += pchar(c, f);
1794      }
1795    
1796    return yield;
1797    }
1798    #endif
1799    
1800    
1801    
1802    #ifdef SUPPORT_PCRE16
1803    /*************************************************
1804    *    Find length of 0-terminated 16-bit string   *
1805    *************************************************/
1806    
1807    static int strlen16(PCRE_SPTR16 p)
1808    {
1809    int len = 0;
1810    while (*p++ != 0) len++;
1811    return len;
1812    }
1813    #endif  /* SUPPORT_PCRE16 */
1814    
1815    
1816    
1817    #ifdef SUPPORT_PCRE32
1818    /*************************************************
1819    *    Find length of 0-terminated 32-bit string   *
1820    *************************************************/
1821    
1822    static int strlen32(PCRE_SPTR32 p)
1823    {
1824    int len = 0;
1825    while (*p++ != 0) len++;
1826    return len;
1827    }
1828    #endif  /* SUPPORT_PCRE32 */
1829    
1830    
1831    
1832    #ifdef SUPPORT_PCRE16
1833    /*************************************************
1834    *           Print 16-bit character string        *
1835    *************************************************/
1836    
1837    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1838    If handed a NULL file, just counts chars without printing. */
1839    
1840    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1841    {
1842    int yield = 0;
1843    
1844    if (length < 0)
1845      length = strlen16(p);
1846    
1847    while (length-- > 0)
1848      {
1849      pcre_uint32 c = *p++ & 0xffff;
1850    #if !defined NOUTF
1851      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1852        {
1853        int d = *p & 0xffff;
1854        if (d >= 0xDC00 && d < 0xDFFF)
1855          {
1856          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1857          length--;
1858          p++;
1859          }
1860        }
1861    #endif
1862      yield += pchar(c, f);
1863      }
1864    
1865    return yield;
1866    }
1867    #endif  /* SUPPORT_PCRE16 */
1868    
1869    
1870    
1871    #ifdef SUPPORT_PCRE32
1872    /*************************************************
1873    *           Print 32-bit character string        *
1874    *************************************************/
1875    
1876    /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
1877    If handed a NULL file, just counts chars without printing. */
1878    
1879    static int pchars32(PCRE_SPTR32 p, int length, FILE *f)
1880    {
1881    int yield = 0;
1882    
1883    if (length < 0)
1884      length = strlen32(p);
1885    
1886    while (length-- > 0)
1887      {
1888      pcre_uint32 c = *p++;
1889      yield += pchar(c, f);
1890      }
1891    
1892    return yield;
1893    }
1894    #endif  /* SUPPORT_PCRE32 */
1895    
1896    
1897    
1898    #ifdef SUPPORT_PCRE8
1899    /*************************************************
1900    *     Read a capture name (8-bit) and check it   *
1901    *************************************************/
1902    
1903    static pcre_uint8 *
1904    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1905    {
1906    pcre_uint8 *npp = *pp;
1907    while (isalnum(*p)) *npp++ = *p++;
1908    *npp++ = 0;
1909    *npp = 0;
1910    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1911      {
1912      fprintf(outfile, "no parentheses with name \"");
1913      PCHARSV(*pp, 0, -1, outfile);
1914      fprintf(outfile, "\"\n");
1915      }
1916    
1917    *pp = npp;
1918    return p;
1919    }
1920    #endif  /* SUPPORT_PCRE8 */
1921    
1922    
1923    
1924    #ifdef SUPPORT_PCRE16
1925    /*************************************************
1926    *     Read a capture name (16-bit) and check it  *
1927    *************************************************/
1928    
1929    /* Note that the text being read is 8-bit. */
1930    
1931    static pcre_uint8 *
1932    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1933    {
1934    pcre_uint16 *npp = *pp;
1935    while (isalnum(*p)) *npp++ = *p++;
1936    *npp++ = 0;
1937    *npp = 0;
1938    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1939      {
1940      fprintf(outfile, "no parentheses with name \"");
1941      PCHARSV(*pp, 0, -1, outfile);
1942      fprintf(outfile, "\"\n");
1943      }
1944    *pp = npp;
1945    return p;
1946    }
1947    #endif  /* SUPPORT_PCRE16 */
1948    
1949    
1950    
1951    #ifdef SUPPORT_PCRE32
1952    /*************************************************
1953    *     Read a capture name (32-bit) and check it  *
1954    *************************************************/
1955    
1956    /* Note that the text being read is 8-bit. */
1957    
1958    static pcre_uint8 *
1959    read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
1960    {
1961    pcre_uint32 *npp = *pp;
1962    while (isalnum(*p)) *npp++ = *p++;
1963    *npp++ = 0;
1964    *npp = 0;
1965    if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
1966      {
1967      fprintf(outfile, "no parentheses with name \"");
1968      PCHARSV(*pp, 0, -1, outfile);
1969      fprintf(outfile, "\"\n");
1970      }
1971    *pp = npp;
1972    return p;
1973    }
1974    #endif  /* SUPPORT_PCRE32 */
1975    
1976    
1977    
1978    /*************************************************
1979    *              Callout function                  *
1980    *************************************************/
1981    
1982    /* Called from PCRE as a result of the (?C) item. We print out where we are in
1983    the match. Yield zero unless more callouts than the fail count, or the callout
1984    data is not zero. */
1985    
1986    static int callout(pcre_callout_block *cb)
1987    {
1988    FILE *f = (first_callout | callout_extra)? outfile : NULL;
1989    int i, pre_start, post_start, subject_length;
1990    
1991    if (callout_extra)
1992      {
1993      fprintf(f, "Callout %d: last capture = %d\n",
1994        cb->callout_number, cb->capture_last);
1995    
1996      for (i = 0; i < cb->capture_top * 2; i += 2)
1997        {
1998        if (cb->offset_vector[i] < 0)
1999          fprintf(f, "%2d: <unset>\n", i/2);
2000        else
2001          {
2002          fprintf(f, "%2d: ", i/2);
2003          PCHARSV(cb->subject, cb->offset_vector[i],
2004            cb->offset_vector[i+1] - cb->offset_vector[i], f);
2005          fprintf(f, "\n");
2006          }
2007        }
2008      }
2009    
2010    /* Re-print the subject in canonical form, the first time or if giving full
2011    datails. On subsequent calls in the same match, we use pchars just to find the
2012    printed lengths of the substrings. */
2013    
2014    if (f != NULL) fprintf(f, "--->");
2015    
2016    PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2017    PCHARS(post_start, cb->subject, cb->start_match,
2018      cb->current_position - cb->start_match, f);
2019    
2020    PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2021    
2022    PCHARSV(cb->subject, cb->current_position,
2023      cb->subject_length - cb->current_position, f);
2024    
2025    if (f != NULL) fprintf(f, "\n");
2026    
2027    /* Always print appropriate indicators, with callout number if not already
2028    shown. For automatic callouts, show the pattern offset. */
2029    
2030    if (cb->callout_number == 255)
2031      {
2032      fprintf(outfile, "%+3d ", cb->pattern_position);
2033      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
2034      }
2035    else
2036      {
2037      if (callout_extra) fprintf(outfile, "    ");
2038        else fprintf(outfile, "%3d ", cb->callout_number);
2039      }
2040    
2041    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2042    fprintf(outfile, "^");
2043    
2044    if (post_start > 0)
2045      {
2046      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2047      fprintf(outfile, "^");
2048      }
2049    
2050    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2051      fprintf(outfile, " ");
2052    
2053    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2054      pbuffer + cb->pattern_position);
2055    
2056    fprintf(outfile, "\n");
2057    first_callout = 0;
2058    
2059    if (cb->mark != last_callout_mark)
2060      {
2061      if (cb->mark == NULL)
2062        fprintf(outfile, "Latest Mark: <unset>\n");
2063      else
2064        {
2065        fprintf(outfile, "Latest Mark: ");
2066        PCHARSV(cb->mark, 0, -1, outfile);
2067        putc('\n', outfile);
2068        }
2069      last_callout_mark = cb->mark;
2070      }
2071    
2072    if (cb->callout_data != NULL)
2073      {
2074      int callout_data = *((int *)(cb->callout_data));
2075      if (callout_data != 0)
2076        {
2077        fprintf(outfile, "Callout data = %d\n", callout_data);
2078        return callout_data;
2079        }
2080      }
2081    
2082    return (cb->callout_number != callout_fail_id)? 0 :
2083           (++callout_count >= callout_fail_count)? 1 : 0;
2084    }
2085    
2086    
2087    /*************************************************
2088    *            Local malloc functions              *
2089    *************************************************/
2090    
2091    /* Alternative malloc function, to test functionality and save the size of a
2092    compiled re, which is the first store request that pcre_compile() makes. The
2093    show_malloc variable is set only during matching. */
2094    
2095    static void *new_malloc(size_t size)
2096    {
2097    void *block = malloc(size);
2098    gotten_store = size;
2099    if (first_gotten_store == 0) first_gotten_store = size;
2100    if (show_malloc)
2101      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
2102    return block;
2103    }
2104    
2105    static void new_free(void *block)
2106    {
2107    if (show_malloc)
2108      fprintf(outfile, "free             %p\n", block);
2109    free(block);
2110    }
2111    
2112    /* For recursion malloc/free, to test stacking calls */
2113    
2114    static void *stack_malloc(size_t size)
2115    {
2116    void *block = malloc(size);
2117    if (show_malloc)
2118      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2119    return block;
2120    }
2121    
2122    static void stack_free(void *block)
2123    {
2124    if (show_malloc)
2125      fprintf(outfile, "stack_free       %p\n", block);
2126    free(block);
2127    }
2128    
2129    
2130    /*************************************************
2131    *          Call pcre_fullinfo()                  *
2132    *************************************************/
2133    
2134    /* Get one piece of information from the pcre_fullinfo() function. When only
2135    one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2136    value, but the code is defensive.
2137    
2138    Arguments:
2139      re        compiled regex
2140      study     study data
2141      option    PCRE_INFO_xxx option
2142      ptr       where to put the data
2143    
2144    Returns:    0 when OK, < 0 on error
2145    */
2146    
2147    static int
2148    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2149    {
2150    int rc;
2151    
2152    if (pcre_mode == PCRE32_MODE)
2153    #ifdef SUPPORT_PCRE32
2154      rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2155    #else
2156      rc = PCRE_ERROR_BADMODE;
2157    #endif
2158    else if (pcre_mode == PCRE16_MODE)
2159    #ifdef SUPPORT_PCRE16
2160      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2161    #else
2162      rc = PCRE_ERROR_BADMODE;
2163    #endif
2164    else
2165    #ifdef SUPPORT_PCRE8
2166      rc = pcre_fullinfo(re, study, option, ptr);
2167    #else
2168      rc = PCRE_ERROR_BADMODE;
2169    #endif
2170    
2171    if (rc < 0)
2172      {
2173      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2174        pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2175      if (rc == PCRE_ERROR_BADMODE)
2176        fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2177          "%d-bit mode\n", 8 * CHAR_SIZE,
2178          8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2179      }
2180    
2181    return rc;
2182    }
2183    
2184    
2185    
2186    /*************************************************
2187    *             Swap byte functions                *
2188    *************************************************/
2189    
2190    /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2191    value, respectively.
2192    
2193    Arguments:
2194      value        any number
2195    
2196    Returns:       the byte swapped value
2197    */
2198    
2199    static pcre_uint32
2200    swap_uint32(pcre_uint32 value)
2201    {
2202    return ((value & 0x000000ff) << 24) |
2203           ((value & 0x0000ff00) <<  8) |
2204           ((value & 0x00ff0000) >>  8) |
2205           (value >> 24);
2206    }
2207    
2208    static pcre_uint16
2209    swap_uint16(pcre_uint16 value)
2210    {
2211    return (value >> 8) | (value << 8);
2212    }
2213    
2214    
2215    
2216    /*************************************************
2217    *        Flip bytes in a compiled pattern        *
2218    *************************************************/
2219    
2220    /* This function is called if the 'F' option was present on a pattern that is
2221    to be written to a file. We flip the bytes of all the integer fields in the
2222    regex data block and the study block. In 16-bit mode this also flips relevant
2223    bytes in the pattern itself. This is to make it possible to test PCRE's
2224    ability to reload byte-flipped patterns, e.g. those compiled on a different
2225    architecture. */
2226    
2227    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2228    static void
2229    regexflip8_or_16(pcre *ere, pcre_extra *extra)
2230    {
2231    real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2232    #ifdef SUPPORT_PCRE16
2233    int op;
2234    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2235    int length = re->name_count * re->name_entry_size;
2236    #ifdef SUPPORT_UTF
2237    BOOL utf = (re->options & PCRE_UTF16) != 0;
2238    BOOL utf16_char = FALSE;
2239    #endif /* SUPPORT_UTF */
2240    #endif /* SUPPORT_PCRE16 */
2241    
2242    /* Always flip the bytes in the main data block and study blocks. */
2243    
2244    re->magic_number = REVERSED_MAGIC_NUMBER;
2245    re->size = swap_uint32(re->size);
2246    re->options = swap_uint32(re->options);
2247    re->flags = swap_uint16(re->flags);
2248    re->top_bracket = swap_uint16(re->top_bracket);
2249    re->top_backref = swap_uint16(re->top_backref);
2250    re->first_char = swap_uint16(re->first_char);
2251    re->req_char = swap_uint16(re->req_char);
2252    re->name_table_offset = swap_uint16(re->name_table_offset);
2253    re->name_entry_size = swap_uint16(re->name_entry_size);
2254    re->name_count = swap_uint16(re->name_count);
2255    
2256    if (extra != NULL)
2257      {
2258      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2259      rsd->size = swap_uint32(rsd->size);
2260      rsd->flags = swap_uint32(rsd->flags);
2261      rsd->minlength = swap_uint32(rsd->minlength);
2262      }
2263    
2264    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2265    in the name table, if present, and then in the pattern itself. */
2266    
2267    #ifdef SUPPORT_PCRE16
2268    if (pcre_mode != PCRE16_MODE) return;
2269    
2270    while(TRUE)
2271      {
2272      /* Swap previous characters. */
2273      while (length-- > 0)
2274        {
2275        *ptr = swap_uint16(*ptr);
2276        ptr++;
2277        }
2278    #ifdef SUPPORT_UTF
2279      if (utf16_char)
2280        {
2281        if ((ptr[-1] & 0xfc00) == 0xd800)
2282          {
2283          /* We know that there is only one extra character in UTF-16. */
2284          *ptr = swap_uint16(*ptr);
2285          ptr++;
2286          }
2287        }
2288      utf16_char = FALSE;
2289    #endif /* SUPPORT_UTF */
2290    
2291      /* Get next opcode. */
2292    
2293      length = 0;
2294      op = *ptr;
2295      *ptr++ = swap_uint16(op);
2296    
2297      switch (op)
2298        {
2299        case OP_END:
2300        return;
2301    
2302    #ifdef SUPPORT_UTF
2303        case OP_CHAR:
2304        case OP_CHARI:
2305        case OP_NOT:
2306        case OP_NOTI:
2307        case OP_STAR:
2308        case OP_MINSTAR:
2309        case OP_PLUS:
2310        case OP_MINPLUS:
2311        case OP_QUERY:
2312        case OP_MINQUERY:
2313        case OP_UPTO:
2314        case OP_MINUPTO:
2315        case OP_EXACT:
2316        case OP_POSSTAR:
2317        case OP_POSPLUS:
2318        case OP_POSQUERY:
2319        case OP_POSUPTO:
2320        case OP_STARI:
2321        case OP_MINSTARI:
2322        case OP_PLUSI:
2323        case OP_MINPLUSI:
2324        case OP_QUERYI:
2325        case OP_MINQUERYI:
2326        case OP_UPTOI:
2327        case OP_MINUPTOI:
2328        case OP_EXACTI:
2329        case OP_POSSTARI:
2330        case OP_POSPLUSI:
2331        case OP_POSQUERYI:
2332        case OP_POSUPTOI:
2333        case OP_NOTSTAR:
2334        case OP_NOTMINSTAR:
2335        case OP_NOTPLUS:
2336        case OP_NOTMINPLUS:
2337        case OP_NOTQUERY:
2338        case OP_NOTMINQUERY:
2339        case OP_NOTUPTO:
2340        case OP_NOTMINUPTO:
2341        case OP_NOTEXACT:
2342        case OP_NOTPOSSTAR:
2343        case OP_NOTPOSPLUS:
2344        case OP_NOTPOSQUERY:
2345        case OP_NOTPOSUPTO:
2346        case OP_NOTSTARI:
2347        case OP_NOTMINSTARI:
2348        case OP_NOTPLUSI:
2349        case OP_NOTMINPLUSI:
2350        case OP_NOTQUERYI:
2351        case OP_NOTMINQUERYI:
2352        case OP_NOTUPTOI:
2353        case OP_NOTMINUPTOI:
2354        case OP_NOTEXACTI:
2355        case OP_NOTPOSSTARI:
2356        case OP_NOTPOSPLUSI:
2357        case OP_NOTPOSQUERYI:
2358        case OP_NOTPOSUPTOI:
2359        if (utf) utf16_char = TRUE;
2360    #endif
2361        /* Fall through. */
2362    
2363        default:
2364        length = OP_lengths16[op] - 1;
2365        break;
2366    
2367        case OP_CLASS:
2368        case OP_NCLASS:
2369        /* Skip the character bit map. */
2370        ptr += 32/sizeof(pcre_uint16);
2371        length = 0;
2372        break;
2373    
2374        case OP_XCLASS:
2375        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2376        if (LINK_SIZE > 1)
2377          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2378            - (1 + LINK_SIZE + 1));
2379        else
2380          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2381    
2382        /* Reverse the size of the XCLASS instance. */
2383        *ptr = swap_uint16(*ptr);
2384        ptr++;
2385        if (LINK_SIZE > 1)
2386          {
2387          *ptr = swap_uint16(*ptr);
2388          ptr++;
2389          }
2390    
2391        op = *ptr;
2392        *ptr = swap_uint16(op);
2393        ptr++;
2394        if ((op & XCL_MAP) != 0)
2395          {
2396          /* Skip the character bit map. */
2397          ptr += 32/sizeof(pcre_uint16);
2398          length -= 32/sizeof(pcre_uint16);
2399          }
2400        break;
2401        }
2402      }
2403    /* Control should never reach here in 16 bit mode. */
2404    #endif /* SUPPORT_PCRE16 */
2405    }
2406    #endif /* SUPPORT_PCRE[8|16] */
2407    
2408    
2409    
2410    #if defined SUPPORT_PCRE32
2411    static void
2412    regexflip_32(pcre *ere, pcre_extra *extra)
2413    {
2414    real_pcre32 *re = (real_pcre32 *)ere;
2415    int op;
2416    pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2417    int length = re->name_count * re->name_entry_size;
2418    #ifdef SUPPORT_UTF
2419    BOOL utf = (re->options & PCRE_UTF32) != 0;
2420    #endif /* SUPPORT_UTF */
2421    
2422    /* Always flip the bytes in the main data block and study blocks. */
2423    
2424    re->magic_number = REVERSED_MAGIC_NUMBER;
2425    re->size = swap_uint32(re->size);
2426    re->options = swap_uint32(re->options);
2427    re->flags = swap_uint16(re->flags);
2428    re->top_bracket = swap_uint16(re->top_bracket);
2429    re->top_backref = swap_uint16(re->top_backref);
2430    re->first_char = swap_uint32(re->first_char);
2431    re->req_char = swap_uint32(re->req_char);
2432    re->name_table_offset = swap_uint16(re->name_table_offset);
2433    re->name_entry_size = swap_uint16(re->name_entry_size);
2434    re->name_count = swap_uint16(re->name_count);
2435    
2436    if (extra != NULL)
2437      {
2438      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2439      rsd->size = swap_uint32(rsd->size);
2440      rsd->flags = swap_uint32(rsd->flags);
2441      rsd->minlength = swap_uint32(rsd->minlength);
2442      }
2443    
2444    /* In 32-bit mode we must swap bytes
2445    in the name table, if present, and then in the pattern itself. */
2446    
2447    while(TRUE)
2448      {
2449      /* Swap previous characters. */
2450      while (length-- > 0)
2451        {
2452        *ptr = swap_uint32(*ptr);
2453        ptr++;
2454        }
2455    
2456      /* Get next opcode. */
2457    
2458      length = 0;
2459      op = *ptr;
2460      *ptr++ = swap_uint32(op);
2461    
2462      switch (op)
2463        {
2464        case OP_END:
2465        return;
2466    
2467      case OP_NOTEXACT:      default:
2468      case OP_NOTUPTO:      length = OP_lengths32[op] - 1;
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
2469      break;      break;
2470    
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
2471      case OP_CLASS:      case OP_CLASS:
2472        case OP_NCLASS:
2473        /* Skip the character bit map. */
2474        ptr += 32/sizeof(pcre_uint32);
2475        length = 0;
2476        break;
2477    
2478        case OP_XCLASS:
2479        /* LINK_SIZE can only be 1 in 32-bit mode. */
2480        length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2481    
2482        /* Reverse the size of the XCLASS instance. */
2483        *ptr = swap_uint32(*ptr);
2484        ptr++;
2485    
2486        op = *ptr;
2487        *ptr = swap_uint32(op);
2488        ptr++;
2489        if ((op & XCL_MAP) != 0)
2490        {        {
2491        int i, min, max;        /* Skip the character bit map. */
2492        code++;        ptr += 32/sizeof(pcre_uint32);
2493        fprintf(outfile, "    [");        length -= 32/sizeof(pcre_uint32);
2494          }
2495        break;
2496        }
2497      }
2498    /* Control should never reach here in 32 bit mode. */
2499    }
2500    
2501        for (i = 0; i < 256; i++)  #endif /* SUPPORT_PCRE32 */
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
2502    
       CLASS_REF_REPEAT:  
2503    
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
2504    
2505          case OP_CRRANGE:  static void
2506          case OP_CRMINRANGE:  regexflip(pcre *ere, pcre_extra *extra)
2507          min = (code[1] << 8) + code[2];  {
2508          max = (code[3] << 8) + code[4];  #if defined SUPPORT_PCRE32
2509          if (max == 0) fprintf(outfile, "{%d,}", min);    if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2510          else fprintf(outfile, "{%d,%d}", min, max);      regexflip_32(ere, extra);
2511          if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  #endif
2512          code += 4;  #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2513          break;    if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2514        regexflip8_or_16(ere, extra);
2515    #endif
2516    }
2517    
         default:  
         code--;  
         }  
       }  
     break;  
2518    
     /* Anything else is just a one-node item */  
2519    
2520      default:  /*************************************************
2521      fprintf(outfile, "    %s", OP_names[*code]);  *        Check match or recursion limit          *
2522      break;  *************************************************/
2523    
2524    static int
2525    check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2526      int start_offset, int options, int *use_offsets, int use_size_offsets,
2527      int flag, unsigned long int *limit, int errnumber, const char *msg)
2528    {
2529    int count;
2530    int min = 0;
2531    int mid = 64;
2532    int max = -1;
2533    
2534    extra->flags |= flag;
2535    
2536    for (;;)
2537      {
2538      *limit = mid;
2539    
2540      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2541        use_offsets, use_size_offsets);
2542    
2543      if (count == errnumber)
2544        {
2545        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2546        min = mid;
2547        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2548      }      }
2549    
2550    code++;    else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2551    fprintf(outfile, "\n");                           count == PCRE_ERROR_PARTIAL)
2552        {
2553        if (mid == min + 1)
2554          {
2555          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2556          break;
2557          }
2558        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2559        max = mid;
2560        mid = (min + mid)/2;
2561        }
2562      else break;    /* Some other error */
2563    }    }
2564    
2565    extra->flags &= ~flag;
2566    return count;
2567  }  }
2568    
2569    
2570    
2571  /* Character string printing function. */  /*************************************************
2572    *         Case-independent strncmp() function    *
2573    *************************************************/
2574    
2575    /*
2576    Arguments:
2577      s         first string
2578      t         second string
2579      n         number of characters to compare
2580    
2581    Returns:    < 0, = 0, or > 0, according to the comparison
2582    */
2583    
2584  static void pchars(unsigned char *p, int length)  static int
2585    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2586  {  {
2587  int c;  while (n--)
2588  while (length-- > 0)    {
2589    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    int c = tolower(*s++) - tolower(*t++);
2590      else fprintf(outfile, "\\x%02x", c);    if (c) return c;
2591      }
2592    return 0;
2593  }  }
2594    
2595    
2596    
2597  /* Alternative malloc function, to test functionality and show the size of the  /*************************************************
2598  compiled re. */  *         Check newline indicator                *
2599    *************************************************/
2600    
2601    /* This is used both at compile and run-time to check for <xxx> escapes. Print
2602    a message and return 0 if there is no match.
2603    
2604  static void *new_malloc(size_t size)  Arguments:
2605      p           points after the leading '<'
2606      f           file for error message
2607    
2608    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
2609    */
2610    
2611    static int
2612    check_newline(pcre_uint8 *p, FILE *f)
2613  {  {
2614  gotten_store = size;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2615  if (log_store)  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2616    fprintf(outfile, "Memory allocation (code space): %d\n",  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2617      (int)((int)size - offsetof(real_pcre, code[0])));  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2618  return malloc(size);  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2619    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2620    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2621    fprintf(f, "Unknown newline type at: <%s\n", p);
2622    return 0;
2623  }  }
2624    
2625    
2626    
2627    /*************************************************
2628    *             Usage function                     *
2629    *************************************************/
2630    
2631  /* Get one piece of information from the pcre_fullinfo() function */  static void
2632    usage(void)
 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  
2633  {  {
2634  int rc;  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2635  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  printf("Input and output default to stdin and stdout.\n");
2636    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2637    printf("If input is a terminal, readline() is used to read from it.\n");
2638    #else
2639    printf("This version of pcretest is not linked with readline().\n");
2640    #endif
2641    printf("\nOptions:\n");
2642    #ifdef SUPPORT_PCRE16
2643    printf("  -16      use the 16-bit library\n");
2644    #endif
2645    #ifdef SUPPORT_PCRE32
2646    printf("  -32      use the 32-bit library\n");
2647    #endif
2648    printf("  -b       show compiled code\n");
2649    printf("  -C       show PCRE compile-time options and exit\n");
2650    printf("  -C arg   show a specific compile-time option\n");
2651    printf("           and exit with its value. The arg can be:\n");
2652    printf("     linksize     internal link size [2, 3, 4]\n");
2653    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2654    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2655    printf("     pcre32       32 bit library support enabled [0, 1]\n");
2656    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2657    printf("     ucp          Unicode Properties supported [0, 1]\n");
2658    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2659    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2660    printf("  -d       debug: show compiled code and information (-b and -i)\n");
2661    #if !defined NODFA
2662    printf("  -dfa     force DFA matching for all subjects\n");
2663    #endif
2664    printf("  -help    show usage information\n");
2665    printf("  -i       show information about compiled patterns\n"
2666           "  -M       find MATCH_LIMIT minimum for each subject\n"
2667           "  -m       output memory used information\n"
2668           "  -o <n>   set size of offsets vector to <n>\n");
2669    #if !defined NOPOSIX
2670    printf("  -p       use POSIX interface\n");
2671    #endif
2672    printf("  -q       quiet: do not output PCRE version number at start\n");
2673    printf("  -S <n>   set stack size to <n> megabytes\n");
2674    printf("  -s       force each pattern to be studied at basic level\n"
2675           "  -s+      force each pattern to be studied, using JIT if available\n"
2676           "  -s++     ditto, verifying when JIT was actually used\n"
2677           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2678           "             where 1 <= n <= 7 selects JIT options\n"
2679           "  -s++n    ditto, verifying when JIT was actually used\n"
2680           "  -t       time compilation and execution\n");
2681    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2682    printf("  -tm      time execution (matching) only\n");
2683    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
2684  }  }
2685    
2686    
2687    
2688    /*************************************************
2689    *                Main Program                    *
2690    *************************************************/
2691    
2692  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
2693  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 311  options, followed by a set of test data, Line 2696  options, followed by a set of test data,
2696  int main(int argc, char **argv)  int main(int argc, char **argv)
2697  {  {
2698  FILE *infile = stdin;  FILE *infile = stdin;
2699    const char *version;
2700  int options = 0;  int options = 0;
2701  int study_options = 0;  int study_options = 0;
2702    int default_find_match_limit = FALSE;
2703  int op = 1;  int op = 1;
2704  int timeit = 0;  int timeit = 0;
2705    int timeitm = 0;
2706  int showinfo = 0;  int showinfo = 0;
2707  int showstore = 0;  int showstore = 0;
2708  int posix = 0;  int force_study = -1;
2709    int force_study_options = 0;
2710    int quiet = 0;
2711    int size_offsets = 45;
2712    int size_offsets_max;
2713    int *offsets = NULL;
2714  int debug = 0;  int debug = 0;
2715  int done = 0;  int done = 0;
2716  unsigned char buffer[30000];  int all_use_dfa = 0;
2717  unsigned char dbuffer[1024];  int verify_jit = 0;
2718    int yield = 0;
2719    int stack_size;
2720    
2721    #if !defined NOPOSIX
2722    int posix = 0;
2723    #endif
2724    #if !defined NODFA
2725    int *dfa_workspace = NULL;
2726    #endif
2727    
2728    pcre_jit_stack *jit_stack = NULL;
2729    
2730    /* These vectors store, end-to-end, a list of zero-terminated captured
2731    substring names, each list itself being terminated by an empty name. Assume
2732    that 1024 is plenty long enough for the few names we'll be testing. It is
2733    easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2734    for the actual memory, to ensure alignment. */
2735    
2736    pcre_uint32 copynames[1024];
2737    pcre_uint32 getnames[1024];
2738    
2739    #ifdef SUPPORT_PCRE32
2740    pcre_uint32 *cn32ptr;
2741    pcre_uint32 *gn32ptr;
2742    #endif
2743    
2744  /* Static so that new_malloc can use it. */  #ifdef SUPPORT_PCRE16
2745    pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2746    pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2747    pcre_uint16 *cn16ptr;
2748    pcre_uint16 *gn16ptr;
2749    #endif
2750    
2751    #ifdef SUPPORT_PCRE8
2752    pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2753    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2754    pcre_uint8 *cn8ptr;
2755    pcre_uint8 *gn8ptr;
2756    #endif
2757    
2758    /* Get buffers from malloc() so that valgrind will check their misuse when
2759    debugging. They grow automatically when very long lines are read. The 16-
2760    and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2761    
2762    buffer = (pcre_uint8 *)malloc(buffer_size);
2763    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2764    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2765    
2766    /* The outfile variable is static so that new_malloc can use it. */
2767    
2768  outfile = stdout;  outfile = stdout;
2769    
2770    /* The following  _setmode() stuff is some Windows magic that tells its runtime
2771    library to translate CRLF into a single LF character. At least, that's what
2772    I've been told: never having used Windows I take this all on trust. Originally
2773    it set 0x8000, but then I was advised that _O_BINARY was better. */
2774    
2775    #if defined(_WIN32) || defined(WIN32)
2776    _setmode( _fileno( stdout ), _O_BINARY );
2777    #endif
2778    
2779    /* Get the version number: both pcre_version() and pcre16_version() give the
2780    same answer. We just need to ensure that we call one that is available. */
2781    
2782    #if defined SUPPORT_PCRE8
2783    version = pcre_version();
2784    #elif defined SUPPORT_PCRE16
2785    version = pcre16_version();
2786    #elif defined SUPPORT_PCRE32
2787    version = pcre32_version();
2788    #endif
2789    
2790  /* Scan options */  /* Scan options */
2791    
2792  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2793    {    {
2794    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    pcre_uint8 *endptr;
2795      showstore = 1;    char *arg = argv[op];
2796    else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
2797    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    if (strcmp(arg, "-m") == 0) showstore = 1;
2798    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(arg, "-s") == 0) force_study = 0;
2799    else if (strcmp(argv[op], "-p") == 0) posix = 1;  
2800      else if (strncmp(arg, "-s+", 3) == 0)
2801        {
2802        arg += 3;
2803        if (*arg == '+') { arg++; verify_jit = TRUE; }
2804        force_study = 1;
2805        if (*arg == 0)
2806          force_study_options = jit_study_bits[6];
2807        else if (*arg >= '1' && *arg <= '7')
2808          force_study_options = jit_study_bits[*arg - '1'];
2809        else goto BAD_ARG;
2810        }
2811      else if (strcmp(arg, "-16") == 0)
2812        {
2813    #ifdef SUPPORT_PCRE16
2814        pcre_mode = PCRE16_MODE;
2815    #else
2816        printf("** This version of PCRE was built without 16-bit support\n");
2817        exit(1);
2818    #endif
2819        }
2820      else if (strcmp(arg, "-32") == 0)
2821        {
2822    #ifdef SUPPORT_PCRE32
2823        pcre_mode = PCRE32_MODE;
2824    #else
2825        printf("** This version of PCRE was built without 32-bit support\n");
2826        exit(1);
2827    #endif
2828        }
2829      else if (strcmp(arg, "-q") == 0) quiet = 1;
2830      else if (strcmp(arg, "-b") == 0) debug = 1;
2831      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2832      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2833      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2834    #if !defined NODFA
2835      else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2836    #endif
2837      else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2838          ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2839            *endptr == 0))
2840        {
2841        op++;
2842        argc--;
2843        }
2844      else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2845        {
2846        int both = arg[2] == 0;
2847        int temp;
2848        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2849                         *endptr == 0))
2850          {
2851          timeitm = temp;
2852          op++;
2853          argc--;
2854          }
2855        else timeitm = LOOPREPEAT;
2856        if (both) timeit = timeitm;
2857        }
2858      else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2859          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2860            *endptr == 0))
2861        {
2862    #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2863        printf("PCRE: -S not supported on this OS\n");
2864        exit(1);
2865    #else
2866        int rc;
2867        struct rlimit rlim;
2868        getrlimit(RLIMIT_STACK, &rlim);
2869        rlim.rlim_cur = stack_size * 1024 * 1024;
2870        rc = setrlimit(RLIMIT_STACK, &rlim);
2871        if (rc != 0)
2872          {
2873        printf("PCRE: setrlimit() failed with error %d\n", rc);
2874        exit(1);
2875          }
2876        op++;
2877        argc--;
2878    #endif
2879        }
2880    #if !defined NOPOSIX
2881      else if (strcmp(arg, "-p") == 0) posix = 1;
2882    #endif
2883      else if (strcmp(arg, "-C") == 0)
2884        {
2885        int rc;
2886        unsigned long int lrc;
2887    
2888        if (argc > 2)
2889          {
2890          if (strcmp(argv[op + 1], "linksize") == 0)
2891            {
2892            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2893            printf("%d\n", rc);
2894            yield = rc;
2895            }
2896          else if (strcmp(argv[op + 1], "pcre8") == 0)
2897            {
2898    #ifdef SUPPORT_PCRE8
2899            printf("1\n");
2900            yield = 1;
2901    #else
2902            printf("0\n");
2903            yield = 0;
2904    #endif
2905            }
2906          else if (strcmp(argv[op + 1], "pcre16") == 0)
2907            {
2908    #ifdef SUPPORT_PCRE16
2909            printf("1\n");
2910            yield = 1;
2911    #else
2912            printf("0\n");
2913            yield = 0;
2914    #endif
2915            }
2916          else if (strcmp(argv[op + 1], "pcre32") == 0)
2917            {
2918    #ifdef SUPPORT_PCRE32
2919            printf("1\n");
2920            yield = 1;
2921    #else
2922            printf("0\n");
2923            yield = 0;
2924    #endif
2925            goto EXIT;
2926            }
2927          if (strcmp(argv[op + 1], "utf") == 0)
2928            {
2929    #ifdef SUPPORT_PCRE8
2930            if (pcre_mode == PCRE8_MODE)
2931              (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2932    #endif
2933    #ifdef SUPPORT_PCRE16
2934            if (pcre_mode == PCRE16_MODE)
2935              (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2936    #endif
2937    #ifdef SUPPORT_PCRE32
2938            if (pcre_mode == PCRE32_MODE)
2939              (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
2940    #endif
2941            printf("%d\n", rc);
2942            yield = rc;
2943            goto EXIT;
2944            }
2945          else if (strcmp(argv[op + 1], "ucp") == 0)
2946            {
2947            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2948            printf("%d\n", rc);
2949            yield = rc;
2950            }
2951          else if (strcmp(argv[op + 1], "jit") == 0)
2952            {
2953            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2954            printf("%d\n", rc);
2955            yield = rc;
2956            }
2957          else if (strcmp(argv[op + 1], "newline") == 0)
2958            {
2959            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2960            print_newline_config(rc, TRUE);
2961            }
2962          else if (strcmp(argv[op + 1], "ebcdic") == 0)
2963            {
2964    #ifdef EBCDIC
2965            printf("1\n");
2966            yield = 1;
2967    #else
2968            printf("0\n");
2969    #endif
2970            }
2971          else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2972            {
2973    #ifdef EBCDIC
2974            printf("0x%02x\n", CHAR_LF);
2975    #else
2976            printf("0\n");
2977    #endif
2978            }
2979          else
2980            {
2981            printf("Unknown -C option: %s\n", argv[op + 1]);
2982            }
2983          goto EXIT;
2984          }
2985    
2986        /* No argument for -C: output all configuration information. */
2987    
2988        printf("PCRE version %s\n", version);
2989        printf("Compiled with\n");
2990    
2991    #ifdef EBCDIC
2992        printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2993    #endif
2994    
2995    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2996    are set, either both UTFs are supported or both are not supported. */
2997    
2998    #ifdef SUPPORT_PCRE8
2999        printf("  8-bit support\n");
3000        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3001          printf ("  %sUTF-8 support\n", rc ? "" : "No ");
3002    #endif
3003    #ifdef SUPPORT_PCRE16
3004        printf("  16-bit support\n");
3005        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3006        printf ("  %sUTF-16 support\n", rc ? "" : "No ");
3007    #endif
3008    #ifdef SUPPORT_PCRE32
3009        printf("  32-bit support\n");
3010        (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3011        printf ("  %sUTF-32 support\n", rc ? "" : "No ");
3012    #endif
3013    
3014        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3015        printf("  %sUnicode properties support\n", rc? "" : "No ");
3016        (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3017        if (rc)
3018          {
3019          const char *arch;
3020          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3021          printf("  Just-in-time compiler support: %s\n", arch);
3022          }
3023        else
3024          printf("  No just-in-time compiler support\n");
3025        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3026        print_newline_config(rc, FALSE);
3027        (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3028        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3029                                         "all Unicode newlines");
3030        (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3031        printf("  Internal link size = %d\n", rc);
3032        (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3033        printf("  POSIX malloc threshold = %d\n", rc);
3034        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3035        printf("  Default match limit = %ld\n", lrc);
3036        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3037        printf("  Default recursion depth limit = %ld\n", lrc);
3038        (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3039        printf("  Match recursion uses %s", rc? "stack" : "heap");
3040        if (showstore)
3041          {
3042          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3043          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3044          }
3045        printf("\n");
3046        goto EXIT;
3047        }
3048      else if (strcmp(arg, "-help") == 0 ||
3049               strcmp(arg, "--help") == 0)
3050        {
3051        usage();
3052        goto EXIT;
3053        }
3054    else    else
3055      {      {
3056      printf("*** Unknown option %s\n", argv[op]);      BAD_ARG:
3057      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("** Unknown or malformed option %s\n", arg);
3058      printf("  -d   debug: show compiled code; implies -i\n"      usage();
3059             "  -i   show information about compiled pattern\n"      yield = 1;
3060             "  -p   use POSIX interface\n"      goto EXIT;
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
3061      }      }
3062    op++;    op++;
3063    argc--;    argc--;
3064    }    }
3065    
3066    /* Get the store for the offsets vector, and remember what it was */
3067    
3068    size_offsets_max = size_offsets;
3069    offsets = (int *)malloc(size_offsets_max * sizeof(int));
3070    if (offsets == NULL)
3071      {
3072      printf("** Failed to get %d bytes of memory for offsets vector\n",
3073        (int)(size_offsets_max * sizeof(int)));
3074      yield = 1;
3075      goto EXIT;
3076      }
3077    
3078  /* Sort out the input and output files */  /* Sort out the input and output files */
3079    
3080  if (argc > 1)  if (argc > 1)
3081    {    {
3082    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
3083    if (infile == NULL)    if (infile == NULL)
3084      {      {
3085      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
3086      return 1;      yield = 1;
3087        goto EXIT;
3088      }      }
3089    }    }
3090    
3091  if (argc > 2)  if (argc > 2)
3092    {    {
3093    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
3094    if (outfile == NULL)    if (outfile == NULL)
3095      {      {
3096      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
3097      return 1;      yield = 1;
3098        goto EXIT;
3099      }      }
3100    }    }
3101    
3102  /* Set alternative malloc function */  /* Set alternative malloc function */
3103    
3104    #ifdef SUPPORT_PCRE8
3105  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
3106    pcre_free = new_free;
3107    pcre_stack_malloc = stack_malloc;
3108    pcre_stack_free = stack_free;
3109    #endif
3110    
3111    #ifdef SUPPORT_PCRE16
3112    pcre16_malloc = new_malloc;
3113    pcre16_free = new_free;
3114    pcre16_stack_malloc = stack_malloc;
3115    pcre16_stack_free = stack_free;
3116    #endif
3117    
3118    #ifdef SUPPORT_PCRE32
3119    pcre32_malloc = new_malloc;
3120    pcre32_free = new_free;
3121    pcre32_stack_malloc = stack_malloc;
3122    pcre32_stack_free = stack_free;
3123    #endif
3124    
3125  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
3126    
3127  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3128    
3129  /* Main loop */  /* Main loop */
3130    
# Line 391  while (!done) Line 3135  while (!done)
3135    
3136  #if !defined NOPOSIX  /* There are still compilers that require no indent */  #if !defined NOPOSIX  /* There are still compilers that require no indent */
3137    regex_t preg;    regex_t preg;
3138      int do_posix = 0;
3139  #endif  #endif
3140    
3141    const char *error;    const char *error;
3142    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
3143    unsigned const char *tables = NULL;    pcre_uint8 *p, *pp, *ppp;
3144      pcre_uint8 *to_file = NULL;
3145      const pcre_uint8 *tables = NULL;
3146      unsigned long int get_options;
3147      unsigned long int true_size, true_study_size = 0;
3148      size_t size, regex_gotten_store;
3149      int do_allcaps = 0;
3150      int do_mark = 0;
3151    int do_study = 0;    int do_study = 0;
3152      int no_force_study = 0;
3153    int do_debug = debug;    int do_debug = debug;
3154    int do_G = 0;    int do_G = 0;
3155    int do_g = 0;    int do_g = 0;
3156    int do_showinfo = showinfo;    int do_showinfo = showinfo;
3157    int do_showrest = 0;    int do_showrest = 0;
3158    int do_posix = 0;    int do_showcaprest = 0;
3159    int erroroffset, len, delimiter;    int do_flip = 0;
3160      int erroroffset, len, delimiter, poffset;
3161    
3162    #if !defined NODFA
3163      int dfa_matched = 0;
3164    #endif
3165    
3166      use_utf = 0;
3167      debug_lengths = 1;
3168    
3169    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;  
3170    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3171      fflush(outfile);
3172    
3173    p = buffer;    p = buffer;
3174    while (isspace(*p)) p++;    while (isspace(*p)) p++;
3175    if (*p == 0) continue;    if (*p == 0) continue;
3176    
3177    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
3178    complete, read more. */  
3179      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3180        {
3181        pcre_uint32 magic;
3182        pcre_uint8 sbuf[8];
3183        FILE *f;
3184    
3185        p++;
3186        if (*p == '!')
3187          {
3188          do_debug = TRUE;
3189          do_showinfo = TRUE;
3190          p++;
3191          }
3192    
3193        pp = p + (int)strlen((char *)p);
3194        while (isspace(pp[-1])) pp--;
3195        *pp = 0;
3196    
3197        f = fopen((char *)p, "rb");
3198        if (f == NULL)
3199          {
3200          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3201          continue;
3202          }
3203    
3204        first_gotten_store = 0;
3205        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3206    
3207        true_size =
3208          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3209        true_study_size =
3210          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3211    
3212        re = (pcre *)new_malloc(true_size);
3213        if (re == NULL)
3214          {
3215          printf("** Failed to get %d bytes of memory for pcre object\n",
3216            (int)true_size);
3217          yield = 1;
3218          goto EXIT;
3219          }
3220        regex_gotten_store = first_gotten_store;
3221    
3222        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3223    
3224        magic = REAL_PCRE_MAGIC(re);
3225        if (magic != MAGIC_NUMBER)
3226          {
3227          if (swap_uint32(magic) == MAGIC_NUMBER)
3228            {
3229            do_flip = 1;
3230            }
3231          else
3232            {
3233            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3234            new_free(re);
3235            fclose(f);
3236            continue;
3237            }
3238          }
3239    
3240        /* We hide the byte-invert info for little and big endian tests. */
3241        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3242          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3243    
3244        /* Now see if there is any following study data. */
3245    
3246        if (true_study_size != 0)
3247          {
3248          pcre_study_data *psd;
3249    
3250          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3251          extra->flags = PCRE_EXTRA_STUDY_DATA;
3252    
3253          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3254          extra->study_data = psd;
3255    
3256          if (fread(psd, 1, true_study_size, f) != true_study_size)
3257            {
3258            FAIL_READ:
3259            fprintf(outfile, "Failed to read data from %s\n", p);
3260            if (extra != NULL)
3261              {
3262              PCRE_FREE_STUDY(extra);
3263              }
3264            new_free(re);
3265            fclose(f);
3266            continue;
3267            }
3268          fprintf(outfile, "Study data loaded from %s\n", p);
3269          do_study = 1;     /* To get the data output if requested */
3270          }
3271        else fprintf(outfile, "No study data\n");
3272    
3273        /* Flip the necessary bytes. */
3274        if (do_flip)
3275          {
3276          int rc;
3277          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3278          if (rc == PCRE_ERROR_BADMODE)
3279            {
3280            /* Simulate the result of the function call below. */
3281            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3282              pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3283              PCRE_INFO_OPTIONS);
3284            fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3285              "%d-bit mode\n", 8 * CHAR_SIZE,
3286              8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3287            new_free(re);
3288            fclose(f);
3289            continue;
3290            }
3291          }
3292    
3293        /* Need to know if UTF-8 for printing data strings. */
3294    
3295        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3296          {
3297          new_free(re);
3298          fclose(f);
3299          continue;
3300          }
3301        use_utf = (get_options & PCRE_UTF8) != 0;
3302    
3303        fclose(f);
3304        goto SHOW_INFO;
3305        }
3306    
3307      /* In-line pattern (the usual case). Get the delimiter and seek the end of
3308      the pattern; if it isn't complete, read more. */
3309    
3310    delimiter = *p++;    delimiter = *p++;
3311    
3312    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
3313      {      {
3314      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3315      goto SKIP_DATA;      goto SKIP_DATA;
3316      }      }
3317    
3318    pp = p;    pp = p;
3319      poffset = (int)(p - buffer);
3320    
3321    for(;;)    for(;;)
3322      {      {
# Line 435  while (!done) Line 3327  while (!done)
3327        pp++;        pp++;
3328        }        }
3329      if (*pp != 0) break;      if (*pp != 0) break;
3330        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
3331        {        {
3332        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
3333        done = 1;        done = 1;
# Line 453  while (!done) Line 3336  while (!done)
3336      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3337      }      }
3338    
3339      /* The buffer may have moved while being extended; reset the start of data
3340      pointer to the correct relative point in the buffer. */
3341    
3342      p = buffer + poffset;
3343    
3344    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
3345    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
3346    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
3347    
3348    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
3349    
3350    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
3351      for callouts. */
3352    
3353    *pp++ = 0;    *pp++ = 0;
3354      strcpy((char *)pbuffer, (char *)p);
3355    
3356    /* Look for options after final delimiter */    /* Look for options after final delimiter */
3357    
3358    options = 0;    options = 0;
3359    study_options = 0;    study_options = force_study_options;
3360    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
3361    
3362    while (*pp != 0)    while (*pp != 0)
3363      {      {
3364      switch (*pp++)      switch (*pp++)
3365        {        {
3366          case 'f': options |= PCRE_FIRSTLINE; break;
3367        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
3368        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
3369        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
3370        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
3371        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
3372    
3373        case '+': do_showrest = 1; break;        case '+':
3374          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3375          break;
3376    
3377          case '=': do_allcaps = 1; break;
3378        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
3379          case 'B': do_debug = 1; break;
3380          case 'C': options |= PCRE_AUTO_CALLOUT; break;
3381        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
3382        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3383          case 'F': do_flip = 1; break;
3384        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
3385        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
3386          case 'J': options |= PCRE_DUPNAMES; break;
3387          case 'K': do_mark = 1; break;
3388        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
3389          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3390    
3391    #if !defined NOPOSIX
3392          case 'P': do_posix = 1; break;
3393    #endif
3394    
3395          case 'S':
3396          do_study = 1;
3397          for (;;)
3398            {
3399            switch (*pp++)
3400              {
3401              case 'S':
3402              do_study = 0;
3403              no_force_study = 1;
3404              break;
3405    
3406              case '!':
3407              study_options |= PCRE_STUDY_EXTRA_NEEDED;
3408              break;
3409    
3410              case '+':
3411              if (*pp == '+')
3412                {
3413                verify_jit = TRUE;
3414                pp++;
3415                }
3416              if (*pp >= '1' && *pp <= '7')
3417                study_options |= jit_study_bits[*pp++ - '1'];
3418              else
3419                study_options |= jit_study_bits[6];
3420              break;
3421    
3422  #if !defined NOPOSIX            case '-':
3423        case 'P': do_posix = 1; break;            study_options &= ~PCRE_STUDY_ALLJIT;
3424  #endif            break;
3425    
3426              default:
3427              pp--;
3428              goto ENDLOOP;
3429              }
3430            }
3431          ENDLOOP:
3432          break;
3433    
       case 'S': do_study = 1; break;  
3434        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
3435          case 'W': options |= PCRE_UCP; break;
3436        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
3437          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3438          case 'Z': debug_lengths = 0; break;
3439          case '8': options |= PCRE_UTF8; use_utf = 1; break;
3440          case '?': options |= PCRE_NO_UTF8_CHECK; break;
3441    
3442          case 'T':
3443          switch (*pp++)
3444            {
3445            case '0': tables = tables0; break;
3446            case '1': tables = tables1; break;
3447    
3448            case '\r':
3449            case '\n':
3450            case ' ':
3451            case 0:
3452            fprintf(outfile, "** Missing table number after /T\n");
3453            goto SKIP_DATA;
3454    
3455            default:
3456            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3457            goto SKIP_DATA;
3458            }
3459          break;
3460    
3461        case 'L':        case 'L':
3462        ppp = pp;        ppp = pp;
3463        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
3464          /* The '0' test is just in case this is an unterminated line. */
3465          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3466        *ppp = 0;        *ppp = 0;
3467        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3468          {          {
3469          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3470          goto SKIP_DATA;          goto SKIP_DATA;
3471          }          }
3472        tables = pcre_maketables();        locale_set = 1;
3473          tables = PCRE_MAKETABLES;
3474        pp = ppp;        pp = ppp;
3475        break;        break;
3476    
3477        case '\n': case ' ': break;        case '>':
3478          to_file = pp;
3479          while (*pp != 0) pp++;
3480          while (isspace(pp[-1])) pp--;
3481          *pp = 0;
3482          break;
3483    
3484          case '<':
3485            {
3486            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3487              {
3488              options |= PCRE_JAVASCRIPT_COMPAT;
3489              pp += 3;
3490              }
3491            else
3492              {
3493              int x = check_newline(pp, outfile);
3494              if (x == 0) goto SKIP_DATA;
3495              options |= x;
3496              while (*pp++ != '>');
3497              }
3498            }
3499          break;
3500    
3501          case '\r':                      /* So that it works in Windows */
3502          case '\n':
3503          case ' ':
3504          break;
3505    
3506        default:        default:
3507        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3508        goto SKIP_DATA;        goto SKIP_DATA;
# Line 517  while (!done) Line 3511  while (!done)
3511    
3512    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
3513    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
3514    local character tables. */    local character tables. Neither does it have 16-bit support. */
3515    
3516  #if !defined NOPOSIX  #if !defined NOPOSIX
3517    if (posix || do_posix)    if (posix || do_posix)
3518      {      {
3519      int rc;      int rc;
3520      int cflags = 0;      int cflags = 0;
3521    
3522      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3523      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;