/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 43 by nigel, Sat Feb 24 21:39:21 2007 UTC revision 1090 by chpe, Tue Oct 16 15:55:48 2012 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10    -----------------------------------------------------------------------------
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are met:
13    
14        * Redistributions of source code must retain the above copyright notice,
15          this list of conditions and the following disclaimer.
16    
17        * Redistributions in binary form must reproduce the above copyright
18          notice, this list of conditions and the following disclaimer in the
19          documentation and/or other materials provided with the distribution.
20    
21        * Neither the name of the University of Cambridge nor the names of its
22          contributors may be used to endorse or promote products derived from
23          this software without specific prior written permission.
24    
25    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35    POSSIBILITY OF SUCH DAMAGE.
36    -----------------------------------------------------------------------------
37    */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    #ifdef HAVE_CONFIG_H
50    #include "config.h"
51    #endif
52    
53  #include <ctype.h>  #include <ctype.h>
54  #include <stdio.h>  #include <stdio.h>
55  #include <string.h>  #include <string.h>
56  #include <stdlib.h>  #include <stdlib.h>
57  #include <time.h>  #include <time.h>
58  #include <locale.h>  #include <locale.h>
59    #include <errno.h>
60    
61    /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67    #ifdef HAVE_UNISTD_H
68    #include <unistd.h>
69    #endif
70    #if defined(SUPPORT_LIBREADLINE)
71    #include <readline/readline.h>
72    #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80    #endif
81    
82    /* A number of things vary for Windows builds. Originally, pcretest opened its
83    input and output without "b"; then I was told that "b" was needed in some
84    environments, so it was added for release 5.0 to both the input and output. (It
85    makes no difference on Unix-like systems.) Later I was told that it is wrong
86    for the input on Windows. I've now abstracted the modes into two macros that
87    are set here, to make it easier to fiddle with them, and removed "b" from the
88    input mode under Windows. */
89    
90    #if defined(_WIN32) || defined(WIN32)
91    #include <io.h>                /* For _setmode() */
92    #include <fcntl.h>             /* For _O_BINARY */
93    #define INPUT_MODE   "r"
94    #define OUTPUT_MODE  "wb"
95    
96    #ifndef isatty
97    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
98    #endif                         /* though in some environments they seem to   */
99                                   /* be already defined, hence the #ifndefs.    */
100    #ifndef fileno
101    #define fileno _fileno
102    #endif
103    
104    /* A user sent this fix for Borland Builder 5 under Windows. */
105    
106    #ifdef __BORLANDC__
107    #define _setmode(handle, mode) setmode(handle, mode)
108    #endif
109    
110    /* Not Windows */
111    
112    #else
113    #include <sys/time.h>          /* These two includes are needed */
114    #include <sys/resource.h>      /* for setrlimit(). */
115    #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
116    #define INPUT_MODE   "r"
117    #define OUTPUT_MODE  "w"
118    #else
119    #define INPUT_MODE   "rb"
120    #define OUTPUT_MODE  "wb"
121    #endif
122    #endif
123    
124    #define PRIV(name) name
125    
126    /* We have to include pcre_internal.h because we need the internal info for
127    displaying the results of pcre_study() and we also need to know about the
128    internal macros, structures, and other internal data values; pcretest has
129    "inside information" compared to a program that strictly follows the PCRE API.
130    
131    Although pcre_internal.h does itself include pcre.h, we explicitly include it
132    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133    appropriately for an application, not for building PCRE. */
134    
135    #include "pcre.h"
136    
137    #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138    /* Configure internal macros to 32 bit mode. */
139    #define COMPILE_PCRE32
140    #endif
141    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142    /* Configure internal macros to 16 bit mode. */
143    #define COMPILE_PCRE16
144    #endif
145    #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146    /* Configure internal macros to 16 bit mode. */
147    #define COMPILE_PCRE8
148    #endif
149    
150    #include "pcre_internal.h"
151    
152    /* The pcre_printint() function, which prints the internal form of a compiled
153    regex, is held in a separate file so that (a) it can be compiled in either
154    8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
155    when that is compiled in debug mode. */
156    
157    #ifdef SUPPORT_PCRE8
158    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159    #endif
160    #ifdef SUPPORT_PCRE16
161    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162    #endif
163    #ifdef SUPPORT_PCRE32
164    void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165    #endif
166    
167  /* Use the internal info for displaying the results of pcre_study(). */  /* We need access to some of the data tables that PCRE uses. So as not to have
168    to keep two copies, we include the source files here, changing the names of the
169    external symbols to prevent clashes. */
170    
171    #define PCRE_INCLUDED
172    
173    #include "pcre_tables.c"
174    #include "pcre_ucd.c"
175    
176    /* The definition of the macro PRINTABLE, which determines whether to print an
177    output character as-is or as a hex value when showing compiled patterns, is
178    the same as in the printint.src file. We uses it here in cases when the locale
179    has not been explicitly changed, so as to get consistent output from systems
180    that differ in their output from isprint() even in the "C" locale. */
181    
182  #include "internal.h"  #ifdef EBCDIC
183    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
184    #else
185    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
186    #endif
187    
188    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189    
190    /* Posix support is disabled in 16 or 32 bit only mode. */
191    #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192    #define NOPOSIX
193    #endif
194    
195  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
196  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 200  Makefile. */
200  #include "pcreposix.h"  #include "pcreposix.h"
201  #endif  #endif
202    
203    /* It is also possible, originally for the benefit of a version that was
204    imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
205    NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
206    automatically cut out the UTF support if PCRE is built without it. */
207    
208    #ifndef SUPPORT_UTF
209    #ifndef NOUTF
210    #define NOUTF
211    #endif
212    #endif
213    
214    /* To make the code a bit tidier for 8/16/32-bit support, we define macros
215    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216    only from one place and is handled differently). I couldn't dream up any way of
217    using a single macro to do this in a generic way, because of the many different
218    argument requirements. We know that at least one of SUPPORT_PCRE8 and
219    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
220    use these in the definitions of generic macros.
221    
222    **** Special note about the PCHARSxxx macros: the address of the string to be
223    printed is always given as two arguments: a base address followed by an offset.
224    The base address is cast to the correct data size for 8 or 16 bit data; the
225    offset is in units of this size. If the string were given as base+offset in one
226    argument, the casting might be incorrectly applied. */
227    
228    #ifdef SUPPORT_PCRE8
229    
230    #define PCHARS8(lv, p, offset, len, f) \
231      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
232    
233    #define PCHARSV8(p, offset, len, f) \
234      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235    
236    #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237      p = read_capture_name8(p, cn8, re)
238    
239    #define STRLEN8(p) ((int)strlen((char *)p))
240    
241    #define SET_PCRE_CALLOUT8(callout) \
242      pcre_callout = callout
243    
244    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
245       pcre_assign_jit_stack(extra, callback, userdata)
246    
247    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
248      re = pcre_compile((char *)pat, options, error, erroffset, tables)
249    
250    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251        namesptr, cbuffer, size) \
252      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
253        (char *)namesptr, cbuffer, size)
254    
255    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
256      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
257    
258    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259        offsets, size_offsets, workspace, size_workspace) \
260      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
261        offsets, size_offsets, workspace, size_workspace)
262    
263    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
264        offsets, size_offsets) \
265      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
266        offsets, size_offsets)
267    
268    #define PCRE_FREE_STUDY8(extra) \
269      pcre_free_study(extra)
270    
271    #define PCRE_FREE_SUBSTRING8(substring) \
272      pcre_free_substring(substring)
273    
274    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
275      pcre_free_substring_list(listptr)
276    
277    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
278        getnamesptr, subsptr) \
279      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
280        (char *)getnamesptr, subsptr)
281    
282    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
283      n = pcre_get_stringnumber(re, (char *)ptr)
284    
285    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
286      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
287    
288    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
289      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
290    
291    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
292      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
293    
294    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
295      pcre_printint(re, outfile, debug_lengths)
296    
297    #define PCRE_STUDY8(extra, re, options, error) \
298      extra = pcre_study(re, options, error)
299    
300    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
301      pcre_jit_stack_alloc(startsize, maxsize)
302    
303    #define PCRE_JIT_STACK_FREE8(stack) \
304      pcre_jit_stack_free(stack)
305    
306    #endif /* SUPPORT_PCRE8 */
307    
308    /* -----------------------------------------------------------*/
309    
310    #ifdef SUPPORT_PCRE16
311    
312    #define PCHARS16(lv, p, offset, len, f) \
313      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
314    
315    #define PCHARSV16(p, offset, len, f) \
316      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
317    
318    #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319      p = read_capture_name16(p, cn16, re)
320    
321    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
322    
323    #define SET_PCRE_CALLOUT16(callout) \
324      pcre16_callout = (int (*)(pcre16_callout_block *))callout
325    
326    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327      pcre16_assign_jit_stack((pcre16_extra *)extra, \
328        (pcre16_jit_callback)callback, userdata)
329    
330    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332        tables)
333    
334    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335        namesptr, cbuffer, size) \
336      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338    
339    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341        (PCRE_UCHAR16 *)cbuffer, size/2)
342    
343    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344        offsets, size_offsets, workspace, size_workspace) \
345      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347        workspace, size_workspace)
348    
349    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350        offsets, size_offsets) \
351      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352        len, start_offset, options, offsets, size_offsets)
353    
354    #define PCRE_FREE_STUDY16(extra) \
355      pcre16_free_study((pcre16_extra *)extra)
356    
357    #define PCRE_FREE_SUBSTRING16(substring) \
358      pcre16_free_substring((PCRE_SPTR16)substring)
359    
360    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362    
363    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364        getnamesptr, subsptr) \
365      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367    
368    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370    
371    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373        (PCRE_SPTR16 *)(void*)subsptr)
374    
375    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377        (PCRE_SPTR16 **)(void*)listptr)
378    
379    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381        tables)
382    
383    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384      pcre16_printint(re, outfile, debug_lengths)
385    
386    #define PCRE_STUDY16(extra, re, options, error) \
387      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388    
389    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391    
392    #define PCRE_JIT_STACK_FREE16(stack) \
393      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394    
395    #endif /* SUPPORT_PCRE16 */
396    
397    /* -----------------------------------------------------------*/
398    
399    #ifdef SUPPORT_PCRE32
400    
401    #define PCHARS32(lv, p, offset, len, f) \
402      lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)
403    
404    #define PCHARSV32(p, offset, len, f) \
405      (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)
406    
407    #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408      p = read_capture_name32(p, cn32, re)
409    
410    #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411    
412    #define SET_PCRE_CALLOUT32(callout) \
413      pcre32_callout = (int (*)(pcre32_callout_block *))callout
414    
415    #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416      pcre32_assign_jit_stack((pcre32_extra *)extra, \
417        (pcre32_jit_callback)callback, userdata)
418    
419    #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420      re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421        tables)
422    
423    #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424        namesptr, cbuffer, size) \
425      rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426        count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427    
428    #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429      rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430        (PCRE_UCHAR32 *)cbuffer, size/2)
431    
432    #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433        offsets, size_offsets, workspace, size_workspace) \
434      count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435        (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436        workspace, size_workspace)
437    
438    #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439        offsets, size_offsets) \
440      count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441        len, start_offset, options, offsets, size_offsets)
442    
443    #define PCRE_FREE_STUDY32(extra) \
444      pcre32_free_study((pcre32_extra *)extra)
445    
446    #define PCRE_FREE_SUBSTRING32(substring) \
447      pcre32_free_substring((PCRE_SPTR32)substring)
448    
449    #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450      pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
451    
452    #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453        getnamesptr, subsptr) \
454      rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455        count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456    
457    #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458      n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
459    
460    #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461      rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462        (PCRE_SPTR32 *)(void*)subsptr)
463    
464    #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465      rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466        (PCRE_SPTR32 **)(void*)listptr)
467    
468    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469      rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470        tables)
471    
472    #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473      pcre32_printint(re, outfile, debug_lengths)
474    
475    #define PCRE_STUDY32(extra, re, options, error) \
476      extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477    
478    #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479      (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480    
481    #define PCRE_JIT_STACK_FREE32(stack) \
482      pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483    
484    #endif /* SUPPORT_PCRE32 */
485    
486    
487    /* ----- Both modes are supported; a runtime test is needed, except for
488    pcre_config(), and the JIT stack functions, when it doesn't matter which
489    version is called. ----- */
490    
491    enum {
492      PCRE8_MODE,
493      PCRE16_MODE,
494      PCRE32_MODE
495    };
496    
497    #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2
498    
499    #define CHAR_SIZE (1 << pcre_mode)
500    
501    #define PCHARS(lv, p, offset, len, f) \
502      if (pcre_mode == PCRE32_MODE) \
503        PCHARS32(lv, p, offset, len, f); \
504      else if (pcre_mode == PCRE16_MODE) \
505        PCHARS16(lv, p, offset, len, f); \
506      else \
507        PCHARS8(lv, p, offset, len, f)
508    
509    #define PCHARSV(p, offset, len, f) \
510      if (pcre_mode == PCRE32_MODE) \
511        PCHARSV32(p, offset, len, f); \
512      else if (pcre_mode == PCRE16_MODE) \
513        PCHARSV16(p, offset, len, f); \
514      else \
515        PCHARSV8(p, offset, len, f)
516    
517    #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
518      if (pcre_mode == PCRE32_MODE) \
519        READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
520      else if (pcre_mode == PCRE16_MODE) \
521        READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
522      else \
523        READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
524    
525    #define SET_PCRE_CALLOUT(callout) \
526      if (pcre_mode == PCRE32_MODE) \
527        SET_PCRE_CALLOUT32(callout); \
528      else if (pcre_mode == PCRE16_MODE) \
529        SET_PCRE_CALLOUT16(callout); \
530      else \
531        SET_PCRE_CALLOUT8(callout)
532    
533    #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
534    
535    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
536      if (pcre_mode == PCRE32_MODE) \
537        PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
538      else if (pcre_mode == PCRE16_MODE) \
539        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
540      else \
541        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
542    
543    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
544      if (pcre_mode == PCRE32_MODE) \
545        PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
546      else if (pcre_mode == PCRE16_MODE) \
547        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
548      else \
549        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
550    
551    #define PCRE_CONFIG pcre_config
552    
553    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
554        namesptr, cbuffer, size) \
555      if (pcre_mode == PCRE32_MODE) \
556        PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
557          namesptr, cbuffer, size); \
558      else if (pcre_mode == PCRE16_MODE) \
559        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
560          namesptr, cbuffer, size); \
561      else \
562        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
563          namesptr, cbuffer, size)
564    
565    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
566      if (pcre_mode == PCRE32_MODE) \
567        PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
568      else if (pcre_mode == PCRE16_MODE) \
569        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
570      else \
571        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
572    
573    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
574        offsets, size_offsets, workspace, size_workspace) \
575      if (pcre_mode == PCRE32_MODE) \
576        PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
577          offsets, size_offsets, workspace, size_workspace); \
578      else if (pcre_mode == PCRE16_MODE) \
579        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
580          offsets, size_offsets, workspace, size_workspace); \
581      else \
582        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
583          offsets, size_offsets, workspace, size_workspace)
584    
585    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
586        offsets, size_offsets) \
587      if (pcre_mode == PCRE32_MODE) \
588        PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
589          offsets, size_offsets); \
590      else if (pcre_mode == PCRE16_MODE) \
591        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
592          offsets, size_offsets); \
593      else \
594        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
595          offsets, size_offsets)
596    
597    #define PCRE_FREE_STUDY(extra) \
598      if (pcre_mode == PCRE32_MODE) \
599        PCRE_FREE_STUDY32(extra); \
600      else if (pcre_mode == PCRE16_MODE) \
601        PCRE_FREE_STUDY16(extra); \
602      else \
603        PCRE_FREE_STUDY8(extra)
604    
605    #define PCRE_FREE_SUBSTRING(substring) \
606      if (pcre_mode == PCRE32_MODE) \
607        PCRE_FREE_SUBSTRING32(substring); \
608      else if (pcre_mode == PCRE16_MODE) \
609        PCRE_FREE_SUBSTRING16(substring); \
610      else \
611        PCRE_FREE_SUBSTRING8(substring)
612    
613    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
614      if (pcre_mode == PCRE32_MODE) \
615        PCRE_FREE_SUBSTRING_LIST32(listptr); \
616      else if (pcre_mode == PCRE16_MODE) \
617        PCRE_FREE_SUBSTRING_LIST16(listptr); \
618      else \
619        PCRE_FREE_SUBSTRING_LIST8(listptr)
620    
621    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
622        getnamesptr, subsptr) \
623      if (pcre_mode == PCRE32_MODE) \
624        PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
625          getnamesptr, subsptr); \
626      else if (pcre_mode == PCRE16_MODE) \
627        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
628          getnamesptr, subsptr); \
629      else \
630        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
631          getnamesptr, subsptr)
632    
633    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
634      if (pcre_mode == PCRE32_MODE) \
635        PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
636      else if (pcre_mode == PCRE16_MODE) \
637        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
638      else \
639        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
640    
641    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
642      if (pcre_mode == PCRE32_MODE) \
643        PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
644      else if (pcre_mode == PCRE16_MODE) \
645        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
646      else \
647        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
648    
649    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
650      if (pcre_mode == PCRE32_MODE) \
651        PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
652      else if (pcre_mode == PCRE16_MODE) \
653        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
654      else \
655        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
656    
657    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
658      (pcre_mode == PCRE32_MODE ? \
659         PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
660        : pcre_mode == PCRE16_MODE ? \
661          PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
662          : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
663    
664    #define PCRE_JIT_STACK_FREE(stack) \
665      if (pcre_mode == PCRE32_MODE) \
666        PCRE_JIT_STACK_FREE32(stack); \
667      else if (pcre_mode == PCRE16_MODE) \
668        PCRE_JIT_STACK_FREE16(stack); \
669      else \
670        PCRE_JIT_STACK_FREE8(stack)
671    
672    #define PCRE_MAKETABLES \
673      (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
674    
675    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
676      if (pcre_mode == PCRE32_MODE) \
677        PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
678      else if (pcre_mode == PCRE16_MODE) \
679        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
680      else \
681        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
682    
683    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
684      if (pcre_mode == PCRE32_MODE) \
685        PCRE_PRINTINT32(re, outfile, debug_lengths); \
686      else if (pcre_mode == PCRE16_MODE) \
687        PCRE_PRINTINT16(re, outfile, debug_lengths); \
688      else \
689        PCRE_PRINTINT8(re, outfile, debug_lengths)
690    
691    #define PCRE_STUDY(extra, re, options, error) \
692      if (pcre_mode == PCRE32_MODE) \
693        PCRE_STUDY32(extra, re, options, error); \
694      else if (pcre_mode == PCRE16_MODE) \
695        PCRE_STUDY16(extra, re, options, error); \
696      else \
697        PCRE_STUDY8(extra, re, options, error)
698    
699    /* ----- Only 8-bit mode is supported ----- */
700    
701    #elif defined SUPPORT_PCRE8
702    #define CHAR_SIZE                 1
703    #define PCHARS                    PCHARS8
704    #define PCHARSV                   PCHARSV8
705    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
706    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
707    #define STRLEN                    STRLEN8
708    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
709    #define PCRE_COMPILE              PCRE_COMPILE8
710    #define PCRE_CONFIG               pcre_config
711    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
712    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
713    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
714    #define PCRE_EXEC                 PCRE_EXEC8
715    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
716    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
717    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
718    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
719    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
720    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
721    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
722    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
723    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
724    #define PCRE_MAKETABLES           pcre_maketables()
725    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
726    #define PCRE_PRINTINT             PCRE_PRINTINT8
727    #define PCRE_STUDY                PCRE_STUDY8
728    
729    /* ----- Only 16-bit mode is supported ----- */
730    
731    #elif defined SUPPORT_PCRE16
732    #define CHAR_SIZE                 2
733    #define PCHARS                    PCHARS16
734    #define PCHARSV                   PCHARSV16
735    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
736    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
737    #define STRLEN                    STRLEN16
738    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
739    #define PCRE_COMPILE              PCRE_COMPILE16
740    #define PCRE_CONFIG               pcre16_config
741    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
742    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
743    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
744    #define PCRE_EXEC                 PCRE_EXEC16
745    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
746    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
747    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
748    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
749    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
750    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
751    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
752    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
753    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
754    #define PCRE_MAKETABLES           pcre16_maketables()
755    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
756    #define PCRE_PRINTINT             PCRE_PRINTINT16
757    #define PCRE_STUDY                PCRE_STUDY16
758    
759    /* ----- Only 32-bit mode is supported ----- */
760    
761    #elif defined SUPPORT_PCRE32
762    #define CHAR_SIZE                 4
763    #define PCHARS                    PCHARS32
764    #define PCHARSV                   PCHARSV32
765    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME32
766    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT32
767    #define STRLEN                    STRLEN32
768    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK32
769    #define PCRE_COMPILE              PCRE_COMPILE32
770    #define PCRE_CONFIG               pcre32_config
771    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
772    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING32
773    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC32
774    #define PCRE_EXEC                 PCRE_EXEC32
775    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY32
776    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING32
777    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST32
778    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING32
779    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER32
780    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING32
781    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST32
782    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC32
783    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE32
784    #define PCRE_MAKETABLES           pcre32_maketables()
785    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
786    #define PCRE_PRINTINT             PCRE_PRINTINT32
787    #define PCRE_STUDY                PCRE_STUDY32
788    
789    #endif
790    
791    /* ----- End of mode-specific function call macros ----- */
792    
793    
794    /* Other parameters */
795    
796  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
797  #ifdef CLK_TCK  #ifdef CLK_TCK
798  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 801  Makefile. */
801  #endif  #endif
802  #endif  #endif
803    
804  #define LOOPREPEAT 20000  #if !defined NODFA
805    #define DFA_WS_DIMENSION 1000
806    #endif
807    
808    /* This is the default loop count for timing. */
809    
810    #define LOOPREPEAT 500000
811    
812    /* Static variables */
813    
814  static FILE *outfile;  static FILE *outfile;
815  static int log_store = 0;  static int log_store = 0;
816    static int callout_count;
817    static int callout_extra;
818    static int callout_fail_count;
819    static int callout_fail_id;
820    static int debug_lengths;
821    static int first_callout;
822    static int jit_was_used;
823    static int locale_set = 0;
824    static int show_malloc;
825    static int use_utf;
826  static size_t gotten_store;  static size_t gotten_store;
827    static size_t first_gotten_store = 0;
828    static const unsigned char *last_callout_mark = NULL;
829    
830    /* The buffers grow automatically if very long input lines are encountered. */
831    
832    static int buffer_size = 50000;
833    static pcre_uint8 *buffer = NULL;
834    static pcre_uint8 *pbuffer = NULL;
835    
836    /* Another buffer is needed translation to 16/32-bit character strings. It will
837    obtained and extended as required. */
838    
839    #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
840    
841    /* We need the table of operator lengths that is used for 16/32-bit compiling, in
842    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
843    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
844    appropriately for the 16/32-bit world. Just as a safety check, make sure that
845    COMPILE_PCRE[16|32] is *not* set. */
846    
847    #ifdef COMPILE_PCRE16
848    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
849    #endif
850    
851    #ifdef COMPILE_PCRE32
852    #error COMPILE_PCRE32 must not be set when compiling pcretest.c
853    #endif
854    
855    #if LINK_SIZE == 2
856    #undef LINK_SIZE
857    #define LINK_SIZE 1
858    #elif LINK_SIZE == 3 || LINK_SIZE == 4
859    #undef LINK_SIZE
860    #define LINK_SIZE 2
861    #else
862    #error LINK_SIZE must be either 2, 3, or 4
863    #endif
864    
865    #undef IMM2_SIZE
866    #define IMM2_SIZE 1
867    
868    #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
869    
870    #ifdef SUPPORT_PCRE16
871    static int buffer16_size = 0;
872    static pcre_uint16 *buffer16 = NULL;
873    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
874    #endif  /* SUPPORT_PCRE16 */
875    
876    #ifdef SUPPORT_PCRE32
877    static int buffer32_size = 0;
878    static pcre_uint32 *buffer32 = NULL;
879    static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
880    #endif  /* SUPPORT_PCRE32 */
881    
882    /* If we have 8-bit support, default to it; if there is also
883    16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
884    there must be 16-or 32-bit support, so default it to 1. */
885    
886    #if defined SUPPORT_PCRE8
887    static int pcre_mode = PCRE8_MODE;
888    #elif defined SUPPORT_PCRE16
889    static int pcre_mode = PCRE16_MODE;
890    #elif defined SUPPORT_PCRE32
891    static int pcre_mode = PCRE32_MODE;
892    #endif
893    
894    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
895    
896    static int jit_study_bits[] =
897      {
898      PCRE_STUDY_JIT_COMPILE,
899      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
900      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
901      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
902      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
903      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
904      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
905        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
906    };
907    
908    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
909      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
910    
911    /* Textual explanations for runtime error codes */
912    
913    static const char *errtexts[] = {
914      NULL,  /* 0 is no error */
915      NULL,  /* NOMATCH is handled specially */
916      "NULL argument passed",
917      "bad option value",
918      "magic number missing",
919      "unknown opcode - pattern overwritten?",
920      "no more memory",
921      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
922      "match limit exceeded",
923      "callout error code",
924      NULL,  /* BADUTF8/16 is handled specially */
925      NULL,  /* BADUTF8/16 offset is handled specially */
926      NULL,  /* PARTIAL is handled specially */
927      "not used - internal error",
928      "internal error - pattern overwritten?",
929      "bad count value",
930      "item unsupported for DFA matching",
931      "backreference condition or recursion test not supported for DFA matching",
932      "match limit not supported for DFA matching",
933      "workspace size exceeded in DFA matching",
934      "too much recursion for DFA matching",
935      "recursion limit exceeded",
936      "not used - internal error",
937      "invalid combination of newline options",
938      "bad offset value",
939      NULL,  /* SHORTUTF8/16 is handled specially */
940      "nested recursion at the same subject position",
941      "JIT stack limit reached",
942      "pattern compiled in wrong mode: 8-bit/16-bit error",
943      "pattern compiled with other endianness",
944      "invalid data in workspace for DFA restart"
945    };
946    
947    
948  /* Debugging function to print the internal form of the regex. This is the same  /*************************************************
949  code as contained in pcre.c under the DEBUG macro. */  *         Alternate character tables             *
950    *************************************************/
951    
952  static const char *OP_names[] = {  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
953    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  using the default tables of the library. However, the T option can be used to
954    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  select alternate sets of tables, for different kinds of testing. Note also that
955    "Opt", "^", "$", "Any", "chars", "not",  the L (locale) option also adjusts the tables. */
956    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
957    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  /* This is the set of tables distributed as default with PCRE. It recognizes
958    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  only ASCII characters. */
959    "*", "*?", "+", "+?", "?", "??", "{", "{",  
960    "class", "Ref", "Recurse",  static const pcre_uint8 tables0[] = {
961    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
962    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  /* This table is a lower casing table. */
963    "Brazero", "Braminzero", "Bra"  
964        0,  1,  2,  3,  4,  5,  6,  7,
965        8,  9, 10, 11, 12, 13, 14, 15,
966       16, 17, 18, 19, 20, 21, 22, 23,
967       24, 25, 26, 27, 28, 29, 30, 31,
968       32, 33, 34, 35, 36, 37, 38, 39,
969       40, 41, 42, 43, 44, 45, 46, 47,
970       48, 49, 50, 51, 52, 53, 54, 55,
971       56, 57, 58, 59, 60, 61, 62, 63,
972       64, 97, 98, 99,100,101,102,103,
973      104,105,106,107,108,109,110,111,
974      112,113,114,115,116,117,118,119,
975      120,121,122, 91, 92, 93, 94, 95,
976       96, 97, 98, 99,100,101,102,103,
977      104,105,106,107,108,109,110,111,
978      112,113,114,115,116,117,118,119,
979      120,121,122,123,124,125,126,127,
980      128,129,130,131,132,133,134,135,
981      136,137,138,139,140,141,142,143,
982      144,145,146,147,148,149,150,151,
983      152,153,154,155,156,157,158,159,
984      160,161,162,163,164,165,166,167,
985      168,169,170,171,172,173,174,175,
986      176,177,178,179,180,181,182,183,
987      184,185,186,187,188,189,190,191,
988      192,193,194,195,196,197,198,199,
989      200,201,202,203,204,205,206,207,
990      208,209,210,211,212,213,214,215,
991      216,217,218,219,220,221,222,223,
992      224,225,226,227,228,229,230,231,
993      232,233,234,235,236,237,238,239,
994      240,241,242,243,244,245,246,247,
995      248,249,250,251,252,253,254,255,
996    
997    /* This table is a case flipping table. */
998    
999        0,  1,  2,  3,  4,  5,  6,  7,
1000        8,  9, 10, 11, 12, 13, 14, 15,
1001       16, 17, 18, 19, 20, 21, 22, 23,
1002       24, 25, 26, 27, 28, 29, 30, 31,
1003       32, 33, 34, 35, 36, 37, 38, 39,
1004       40, 41, 42, 43, 44, 45, 46, 47,
1005       48, 49, 50, 51, 52, 53, 54, 55,
1006       56, 57, 58, 59, 60, 61, 62, 63,
1007       64, 97, 98, 99,100,101,102,103,
1008      104,105,106,107,108,109,110,111,
1009      112,113,114,115,116,117,118,119,
1010      120,121,122, 91, 92, 93, 94, 95,
1011       96, 65, 66, 67, 68, 69, 70, 71,
1012       72, 73, 74, 75, 76, 77, 78, 79,
1013       80, 81, 82, 83, 84, 85, 86, 87,
1014       88, 89, 90,123,124,125,126,127,
1015      128,129,130,131,132,133,134,135,
1016      136,137,138,139,140,141,142,143,
1017      144,145,146,147,148,149,150,151,
1018      152,153,154,155,156,157,158,159,
1019      160,161,162,163,164,165,166,167,
1020      168,169,170,171,172,173,174,175,
1021      176,177,178,179,180,181,182,183,
1022      184,185,186,187,188,189,190,191,
1023      192,193,194,195,196,197,198,199,
1024      200,201,202,203,204,205,206,207,
1025      208,209,210,211,212,213,214,215,
1026      216,217,218,219,220,221,222,223,
1027      224,225,226,227,228,229,230,231,
1028      232,233,234,235,236,237,238,239,
1029      240,241,242,243,244,245,246,247,
1030      248,249,250,251,252,253,254,255,
1031    
1032    /* This table contains bit maps for various character classes. Each map is 32
1033    bytes long and the bits run from the least significant end of each byte. The
1034    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1035    graph, print, punct, and cntrl. Other classes are built from combinations. */
1036    
1037      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1038      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1039      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1040      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1041    
1042      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1043      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1044      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1045      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1046    
1047      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1048      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1049      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1050      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1051    
1052      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1053      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1054      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1055      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1056    
1057      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1058      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1059      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1060      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1061    
1062      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1063      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1064      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1065      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1066    
1067      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1068      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1069      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1070      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1071    
1072      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1073      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1074      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1075      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1076    
1077      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1078      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1079      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1080      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1081    
1082      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1083      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1084      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1085      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1086    
1087    /* This table identifies various classes of character by individual bits:
1088      0x01   white space character
1089      0x02   letter
1090      0x04   decimal digit
1091      0x08   hexadecimal digit
1092      0x10   alphanumeric or '_'
1093      0x80   regular expression metacharacter or binary zero
1094    */
1095    
1096      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
1097      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
1098      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
1099      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
1100      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
1101      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
1102      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
1103      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
1104      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
1105      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
1106      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
1107      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
1108      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
1109      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
1110      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
1111      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
1112      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1113      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1114      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1115      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1116      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1117      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1118      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1119      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1120      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1121      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1122      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1123      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1124      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1125      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1126      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1127      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1128    
1129    /* This is a set of tables that came orginally from a Windows user. It seems to
1130    be at least an approximation of ISO 8859. In particular, there are characters
1131    greater than 128 that are marked as spaces, letters, etc. */
1132    
1133    static const pcre_uint8 tables1[] = {
1134    0,1,2,3,4,5,6,7,
1135    8,9,10,11,12,13,14,15,
1136    16,17,18,19,20,21,22,23,
1137    24,25,26,27,28,29,30,31,
1138    32,33,34,35,36,37,38,39,
1139    40,41,42,43,44,45,46,47,
1140    48,49,50,51,52,53,54,55,
1141    56,57,58,59,60,61,62,63,
1142    64,97,98,99,100,101,102,103,
1143    104,105,106,107,108,109,110,111,
1144    112,113,114,115,116,117,118,119,
1145    120,121,122,91,92,93,94,95,
1146    96,97,98,99,100,101,102,103,
1147    104,105,106,107,108,109,110,111,
1148    112,113,114,115,116,117,118,119,
1149    120,121,122,123,124,125,126,127,
1150    128,129,130,131,132,133,134,135,
1151    136,137,138,139,140,141,142,143,
1152    144,145,146,147,148,149,150,151,
1153    152,153,154,155,156,157,158,159,
1154    160,161,162,163,164,165,166,167,
1155    168,169,170,171,172,173,174,175,
1156    176,177,178,179,180,181,182,183,
1157    184,185,186,187,188,189,190,191,
1158    224,225,226,227,228,229,230,231,
1159    232,233,234,235,236,237,238,239,
1160    240,241,242,243,244,245,246,215,
1161    248,249,250,251,252,253,254,223,
1162    224,225,226,227,228,229,230,231,
1163    232,233,234,235,236,237,238,239,
1164    240,241,242,243,244,245,246,247,
1165    248,249,250,251,252,253,254,255,
1166    0,1,2,3,4,5,6,7,
1167    8,9,10,11,12,13,14,15,
1168    16,17,18,19,20,21,22,23,
1169    24,25,26,27,28,29,30,31,
1170    32,33,34,35,36,37,38,39,
1171    40,41,42,43,44,45,46,47,
1172    48,49,50,51,52,53,54,55,
1173    56,57,58,59,60,61,62,63,
1174    64,97,98,99,100,101,102,103,
1175    104,105,106,107,108,109,110,111,
1176    112,113,114,115,116,117,118,119,
1177    120,121,122,91,92,93,94,95,
1178    96,65,66,67,68,69,70,71,
1179    72,73,74,75,76,77,78,79,
1180    80,81,82,83,84,85,86,87,
1181    88,89,90,123,124,125,126,127,
1182    128,129,130,131,132,133,134,135,
1183    136,137,138,139,140,141,142,143,
1184    144,145,146,147,148,149,150,151,
1185    152,153,154,155,156,157,158,159,
1186    160,161,162,163,164,165,166,167,
1187    168,169,170,171,172,173,174,175,
1188    176,177,178,179,180,181,182,183,
1189    184,185,186,187,188,189,190,191,
1190    224,225,226,227,228,229,230,231,
1191    232,233,234,235,236,237,238,239,
1192    240,241,242,243,244,245,246,215,
1193    248,249,250,251,252,253,254,223,
1194    192,193,194,195,196,197,198,199,
1195    200,201,202,203,204,205,206,207,
1196    208,209,210,211,212,213,214,247,
1197    216,217,218,219,220,221,222,255,
1198    0,62,0,0,1,0,0,0,
1199    0,0,0,0,0,0,0,0,
1200    32,0,0,0,1,0,0,0,
1201    0,0,0,0,0,0,0,0,
1202    0,0,0,0,0,0,255,3,
1203    126,0,0,0,126,0,0,0,
1204    0,0,0,0,0,0,0,0,
1205    0,0,0,0,0,0,0,0,
1206    0,0,0,0,0,0,255,3,
1207    0,0,0,0,0,0,0,0,
1208    0,0,0,0,0,0,12,2,
1209    0,0,0,0,0,0,0,0,
1210    0,0,0,0,0,0,0,0,
1211    254,255,255,7,0,0,0,0,
1212    0,0,0,0,0,0,0,0,
1213    255,255,127,127,0,0,0,0,
1214    0,0,0,0,0,0,0,0,
1215    0,0,0,0,254,255,255,7,
1216    0,0,0,0,0,4,32,4,
1217    0,0,0,128,255,255,127,255,
1218    0,0,0,0,0,0,255,3,
1219    254,255,255,135,254,255,255,7,
1220    0,0,0,0,0,4,44,6,
1221    255,255,127,255,255,255,127,255,
1222    0,0,0,0,254,255,255,255,
1223    255,255,255,255,255,255,255,127,
1224    0,0,0,0,254,255,255,255,
1225    255,255,255,255,255,255,255,255,
1226    0,2,0,0,255,255,255,255,
1227    255,255,255,255,255,255,255,127,
1228    0,0,0,0,255,255,255,255,
1229    255,255,255,255,255,255,255,255,
1230    0,0,0,0,254,255,0,252,
1231    1,0,0,248,1,0,0,120,
1232    0,0,0,0,254,255,255,255,
1233    0,0,128,0,0,0,128,0,
1234    255,255,255,255,0,0,0,0,
1235    0,0,0,0,0,0,0,128,
1236    255,255,255,255,0,0,0,0,
1237    0,0,0,0,0,0,0,0,
1238    128,0,0,0,0,0,0,0,
1239    0,1,1,0,1,1,0,0,
1240    0,0,0,0,0,0,0,0,
1241    0,0,0,0,0,0,0,0,
1242    1,0,0,0,128,0,0,0,
1243    128,128,128,128,0,0,128,0,
1244    28,28,28,28,28,28,28,28,
1245    28,28,0,0,0,0,0,128,
1246    0,26,26,26,26,26,26,18,
1247    18,18,18,18,18,18,18,18,
1248    18,18,18,18,18,18,18,18,
1249    18,18,18,128,128,0,128,16,
1250    0,26,26,26,26,26,26,18,
1251    18,18,18,18,18,18,18,18,
1252    18,18,18,18,18,18,18,18,
1253    18,18,18,128,128,0,0,0,
1254    0,0,0,0,0,1,0,0,
1255    0,0,0,0,0,0,0,0,
1256    0,0,0,0,0,0,0,0,
1257    0,0,0,0,0,0,0,0,
1258    1,0,0,0,0,0,0,0,
1259    0,0,18,0,0,0,0,0,
1260    0,0,20,20,0,18,0,0,
1261    0,20,18,0,0,0,0,0,
1262    18,18,18,18,18,18,18,18,
1263    18,18,18,18,18,18,18,18,
1264    18,18,18,18,18,18,18,0,
1265    18,18,18,18,18,18,18,18,
1266    18,18,18,18,18,18,18,18,
1267    18,18,18,18,18,18,18,18,
1268    18,18,18,18,18,18,18,0,
1269    18,18,18,18,18,18,18,18
1270  };  };
1271    
1272    
1273  static void print_internals(pcre *re)  
1274    
1275    #ifndef HAVE_STRERROR
1276    /*************************************************
1277    *     Provide strerror() for non-ANSI libraries  *
1278    *************************************************/
1279    
1280    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1281    in their libraries, but can provide the same facility by this simple
1282    alternative function. */
1283    
1284    extern int   sys_nerr;
1285    extern char *sys_errlist[];
1286    
1287    char *
1288    strerror(int n)
1289  {  {
1290  unsigned char *code = ((real_pcre *)re)->code;  if (n < 0 || n >= sys_nerr) return "unknown error number";
1291    return sys_errlist[n];
1292    }
1293    #endif /* HAVE_STRERROR */
1294    
1295    
1296    
1297    /*************************************************
1298    *       Print newline configuration              *
1299    *************************************************/
1300    
1301  fprintf(outfile, "------------------------------------------------------------------\n");  /*
1302    Arguments:
1303      rc         the return code from PCRE_CONFIG_NEWLINE
1304      isc        TRUE if called from "-C newline"
1305    Returns:     nothing
1306    */
1307    
1308  for(;;)  static void
1309    print_newline_config(int rc, BOOL isc)
1310    {
1311    const char *s = NULL;
1312    if (!isc) printf("  Newline sequence is ");
1313    switch(rc)
1314    {    {
1315    int c;    case CHAR_CR: s = "CR"; break;
1316    int charlength;    case CHAR_LF: s = "LF"; break;
1317      case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1318      case -1: s = "ANY"; break;
1319      case -2: s = "ANYCRLF"; break;
1320    
1321      default:
1322      printf("a non-standard value: 0x%04x\n", rc);
1323      return;
1324      }
1325    
1326    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  printf("%s\n", s);
1327    }
1328    
   if (*code >= OP_BRA)  
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
1329    
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
1330    
1331      case OP_OPT:  /*************************************************
1332      fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  *         JIT memory callback                    *
1333      code++;  *************************************************/
     break;  
1334    
1335      case OP_COND:  static pcre_jit_stack* jit_callback(void *arg)
1336      fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  {
1337      code += 2;  jit_was_used = TRUE;
1338      break;  return (pcre_jit_stack *)arg;
1339    }
1340    
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
1341    
1342      case OP_CHARS:  #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1343      charlength = *(++code);  /*************************************************
1344      fprintf(outfile, "%3d ", charlength);  *            Convert UTF-8 string to value       *
1345      while (charlength-- > 0)  *************************************************/
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
1346    
1347      case OP_KETRMAX:  /* This function takes one or more bytes that represents a UTF-8 character,
1348      case OP_KETRMIN:  and returns the value of the character.
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
1349    
1350      case OP_REVERSE:  Argument:
1351      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);    utf8bytes   a pointer to the byte vector
1352      code += 2;    vptr        a pointer to an int to receive the value
1353      break;  
1354    Returns:      >  0 => the number of bytes consumed
1355                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1356    */
1357    
1358      case OP_STAR:  static int
1359      case OP_MINSTAR:  utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1360      case OP_PLUS:  {
1361      case OP_MINPLUS:  pcre_uint32 c = *utf8bytes++;
1362      case OP_QUERY:  pcre_uint32 d = c;
1363      case OP_MINQUERY:  int i, j, s;
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
1364    
1365      case OP_EXACT:  for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1366      case OP_UPTO:    {
1367      case OP_MINUPTO:    if ((d & 0x80) == 0) break;
1368      if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);    d <<= 1;
1369        else fprintf(outfile, "    \\x%02x{", c);    }
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
1370    
1371      case OP_TYPEEXACT:  if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1372      case OP_TYPEUPTO:  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
1373    
1374      case OP_NOT:  /* i now has a value in the range 1-5 */
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
1375    
1376      case OP_NOTSTAR:  s = 6*i;
1377      case OP_NOTMINSTAR:  d = (c & utf8_table3[i]) << s;
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
1378    
1379      case OP_NOTEXACT:  for (j = 0; j < i; j++)
1380      case OP_NOTUPTO:    {
1381      case OP_NOTMINUPTO:    c = *utf8bytes++;
1382      if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);    if ((c & 0xc0) != 0x80) return -(j+1);
1383        else fprintf(outfile, "    [^\\x%02x]{", c);    s -= 6;
1384      if (*code != OP_NOTEXACT) fprintf(outfile, ",");    d |= (c & 0x3f) << s;
1385      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);    }
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
1386    
1387      case OP_REF:  /* Check that encoding was the correct unique one */
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
1388    
1389      case OP_CLASS:  for (j = 0; j < utf8_table1_size; j++)
1390        {    if (d <= utf8_table1[j]) break;
1391        int i, min, max;  if (j != i) return -(i+1);
       code++;  
       fprintf(outfile, "    [");  
1392    
1393        for (i = 0; i < 256; i++)  /* Valid value */
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
1394    
1395        CLASS_REF_REPEAT:  *vptr = d;
1396    return i+1;
1397    }
1398    #endif /* NOUTF || SUPPORT_PCRE16 */
1399    
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
1400    
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
1401    
1402          default:  #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1403          code--;  /*************************************************
1404          }  *       Convert character value to UTF-8         *
1405        }  *************************************************/
     break;  
1406    
1407      /* Anything else is just a one-node item */  /* This function takes an integer value in the range 0 - 0x7fffffff
1408    and encodes it as a UTF-8 character in 0 to 6 bytes.
1409    
1410      default:  Arguments:
1411      fprintf(outfile, "    %s", OP_names[*code]);    cvalue     the character value
1412      break;    utf8bytes  pointer to buffer for result - at least 6 bytes long
1413    
1414    Returns:     number of characters placed in the buffer
1415    */
1416    
1417    static int
1418    ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1419    {
1420    register int i, j;
1421    if (cvalue > 0x7fffffffu)
1422      return -1;
1423    for (i = 0; i < utf8_table1_size; i++)
1424      if (cvalue <= utf8_table1[i]) break;
1425    utf8bytes += i;
1426    for (j = i; j > 0; j--)
1427     {
1428     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1429     cvalue >>= 6;
1430     }
1431    *utf8bytes = utf8_table2[i] | cvalue;
1432    return i + 1;
1433    }
1434    #endif
1435    
1436    
1437    #ifdef SUPPORT_PCRE16
1438    /*************************************************
1439    *         Convert a string to 16-bit             *
1440    *************************************************/
1441    
1442    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1443    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1444    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1445    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1446    result is always left in buffer16.
1447    
1448    Note that this function does not object to surrogate values. This is
1449    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1450    for the purpose of testing that they are correctly faulted.
1451    
1452    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1453    in UTF-8 so that values greater than 255 can be handled.
1454    
1455    Arguments:
1456      data       TRUE if converting a data line; FALSE for a regex
1457      p          points to a byte string
1458      utf        true if UTF-8 (to be converted to UTF-16)
1459      len        number of bytes in the string (excluding trailing zero)
1460    
1461    Returns:     number of 16-bit data items used (excluding trailing zero)
1462                 OR -1 if a UTF-8 string is malformed
1463                 OR -2 if a value > 0x10ffff is encountered
1464                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1465    */
1466    
1467    static int
1468    to16(int data, pcre_uint8 *p, int utf, int len)
1469    {
1470    pcre_uint16 *pp;
1471    
1472    if (buffer16_size < 2*len + 2)
1473      {
1474      if (buffer16 != NULL) free(buffer16);
1475      buffer16_size = 2*len + 2;
1476      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1477      if (buffer16 == NULL)
1478        {
1479        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1480        exit(1);
1481      }      }
1482      }
1483    
1484    code++;  pp = buffer16;
1485    fprintf(outfile, "\n");  
1486    if (!utf && !data)
1487      {
1488      while (len-- > 0) *pp++ = *p++;
1489    }    }
1490    
1491    else
1492      {
1493      pcre_uint32 c = 0;
1494      while (len > 0)
1495        {
1496        int chlen = utf82ord(p, &c);
1497        if (chlen <= 0) return -1;
1498        if (c > 0x10ffff) return -2;
1499        p += chlen;
1500        len -= chlen;
1501        if (c < 0x10000) *pp++ = c; else
1502          {
1503          if (!utf) return -3;
1504          c -= 0x10000;
1505          *pp++ = 0xD800 | (c >> 10);
1506          *pp++ = 0xDC00 | (c & 0x3ff);
1507          }
1508        }
1509      }
1510    
1511    *pp = 0;
1512    return pp - buffer16;
1513    }
1514    #endif
1515    
1516    #ifdef SUPPORT_PCRE32
1517    /*************************************************
1518    *         Convert a string to 32-bit             *
1519    *************************************************/
1520    
1521    /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1522    8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1523    times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1524    in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1525    result is always left in buffer32.
1526    
1527    Note that this function does not object to surrogate values. This is
1528    deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1529    for the purpose of testing that they are correctly faulted.
1530    
1531    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1532    in UTF-8 so that values greater than 255 can be handled.
1533    
1534    Arguments:
1535      data       TRUE if converting a data line; FALSE for a regex
1536      p          points to a byte string
1537      utf        true if UTF-8 (to be converted to UTF-32)
1538      len        number of bytes in the string (excluding trailing zero)
1539    
1540    Returns:     number of 32-bit data items used (excluding trailing zero)
1541                 OR -1 if a UTF-8 string is malformed
1542                 OR -2 if a value > 0x10ffff is encountered
1543                 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1544    */
1545    
1546    static int
1547    to32(int data, pcre_uint8 *p, int utf, int len)
1548    {
1549    pcre_uint32 *pp;
1550    
1551    if (buffer32_size < 4*len + 4)
1552      {
1553      if (buffer32 != NULL) free(buffer32);
1554      buffer32_size = 4*len + 4;
1555      buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1556      if (buffer32 == NULL)
1557        {
1558        fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1559        exit(1);
1560        }
1561      }
1562    
1563    pp = buffer32;
1564    
1565    if (!utf && !data)
1566      {
1567      while (len-- > 0) *pp++ = *p++;
1568      }
1569    
1570    else
1571      {
1572      pcre_uint32 c = 0;
1573      while (len > 0)
1574        {
1575        int chlen = utf82ord(p, &c);
1576        if (chlen <= 0) return -1;
1577        if (utf)
1578          {
1579          if (c > 0x10ffff) return -2;
1580          if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1581          }
1582    
1583        p += chlen;
1584        len -= chlen;
1585        *pp++ = c;
1586        }
1587      }
1588    
1589    *pp = 0;
1590    return pp - buffer32;
1591    }
1592    #endif
1593    
1594    /*************************************************
1595    *        Read or extend an input line            *
1596    *************************************************/
1597    
1598    /* Input lines are read into buffer, but both patterns and data lines can be
1599    continued over multiple input lines. In addition, if the buffer fills up, we
1600    want to automatically expand it so as to be able to handle extremely large
1601    lines that are needed for certain stress tests. When the input buffer is
1602    expanded, the other two buffers must also be expanded likewise, and the
1603    contents of pbuffer, which are a copy of the input for callouts, must be
1604    preserved (for when expansion happens for a data line). This is not the most
1605    optimal way of handling this, but hey, this is just a test program!
1606    
1607    Arguments:
1608      f            the file to read
1609      start        where in buffer to start (this *must* be within buffer)
1610      prompt       for stdin or readline()
1611    
1612    Returns:       pointer to the start of new data
1613                   could be a copy of start, or could be moved
1614                   NULL if no data read and EOF reached
1615    */
1616    
1617    static pcre_uint8 *
1618    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1619    {
1620    pcre_uint8 *here = start;
1621    
1622    for (;;)
1623      {
1624      size_t rlen = (size_t)(buffer_size - (here - buffer));
1625    
1626      if (rlen > 1000)
1627        {
1628        int dlen;
1629    
1630        /* If libreadline or libedit support is required, use readline() to read a
1631        line if the input is a terminal. Note that readline() removes the trailing
1632        newline, so we must put it back again, to be compatible with fgets(). */
1633    
1634    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1635        if (isatty(fileno(f)))
1636          {
1637          size_t len;
1638          char *s = readline(prompt);
1639          if (s == NULL) return (here == start)? NULL : start;
1640          len = strlen(s);
1641          if (len > 0) add_history(s);
1642          if (len > rlen - 1) len = rlen - 1;
1643          memcpy(here, s, len);
1644          here[len] = '\n';
1645          here[len+1] = 0;
1646          free(s);
1647          }
1648        else
1649    #endif
1650    
1651        /* Read the next line by normal means, prompting if the file is stdin. */
1652    
1653          {
1654          if (f == stdin) printf("%s", prompt);
1655          if (fgets((char *)here, rlen,  f) == NULL)
1656            return (here == start)? NULL : start;
1657          }
1658    
1659        dlen = (int)strlen((char *)here);
1660        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1661        here += dlen;
1662        }
1663    
1664      else
1665        {
1666        int new_buffer_size = 2*buffer_size;
1667        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1668        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1669    
1670        if (new_buffer == NULL || new_pbuffer == NULL)
1671          {
1672          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1673          exit(1);
1674          }
1675    
1676        memcpy(new_buffer, buffer, buffer_size);
1677        memcpy(new_pbuffer, pbuffer, buffer_size);
1678    
1679        buffer_size = new_buffer_size;
1680    
1681        start = new_buffer + (start - buffer);
1682        here = new_buffer + (here - buffer);
1683    
1684        free(buffer);
1685        free(pbuffer);
1686    
1687        buffer = new_buffer;
1688        pbuffer = new_pbuffer;
1689        }
1690      }
1691    
1692    return NULL;  /* Control never gets here */
1693    }
1694    
1695    
1696    
1697    /*************************************************
1698    *          Read number from string               *
1699    *************************************************/
1700    
1701    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1702    around with conditional compilation, just do the job by hand. It is only used
1703    for unpicking arguments, so just keep it simple.
1704    
1705    Arguments:
1706      str           string to be converted
1707      endptr        where to put the end pointer
1708    
1709    Returns:        the unsigned long
1710    */
1711    
1712    static int
1713    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1714    {
1715    int result = 0;
1716    while(*str != 0 && isspace(*str)) str++;
1717    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1718    *endptr = str;
1719    return(result);
1720    }
1721    
1722    
1723    
1724    /*************************************************
1725    *             Print one character                *
1726    *************************************************/
1727    
1728    /* Print a single character either literally, or as a hex escape. */
1729    
1730    static int pchar(pcre_uint32 c, FILE *f)
1731    {
1732    int n;
1733    if (PRINTOK(c))
1734      {
1735      if (f != NULL) fprintf(f, "%c", c);
1736      return 1;
1737      }
1738    
1739    if (c < 0x100)
1740      {
1741      if (use_utf)
1742        {
1743        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1744        return 6;
1745        }
1746      else
1747        {
1748        if (f != NULL) fprintf(f, "\\x%02x", c);
1749        return 4;
1750        }
1751      }
1752    
1753    if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1754    return n >= 0 ? n : 0;
1755    }
1756    
1757    
1758    
1759    #ifdef SUPPORT_PCRE8
1760    /*************************************************
1761    *         Print 8-bit character string           *
1762    *************************************************/
1763    
1764    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1765    If handed a NULL file, just counts chars without printing. */
1766    
1767    static int pchars(pcre_uint8 *p, int length, FILE *f)
1768    {
1769    pcre_uint32 c = 0;
1770    int yield = 0;
1771    
1772    if (length < 0)
1773      length = strlen((char *)p);
1774    
1775    while (length-- > 0)
1776      {
1777    #if !defined NOUTF
1778      if (use_utf)
1779        {
1780        int rc = utf82ord(p, &c);
1781        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1782          {
1783          length -= rc - 1;
1784          p += rc;
1785          yield += pchar(c, f);
1786          continue;
1787          }
1788        }
1789    #endif
1790      c = *p++;
1791      yield += pchar(c, f);
1792      }
1793    
1794    return yield;
1795    }
1796    #endif
1797    
1798    
1799    
1800    #ifdef SUPPORT_PCRE16
1801    /*************************************************
1802    *    Find length of 0-terminated 16-bit string   *
1803    *************************************************/
1804    
1805    static int strlen16(PCRE_SPTR16 p)
1806    {
1807    int len = 0;
1808    while (*p++ != 0) len++;
1809    return len;
1810    }
1811    #endif  /* SUPPORT_PCRE16 */
1812    
1813    
1814    
1815    #ifdef SUPPORT_PCRE32
1816    /*************************************************
1817    *    Find length of 0-terminated 32-bit string   *
1818    *************************************************/
1819    
1820    static int strlen32(PCRE_SPTR32 p)
1821    {
1822    int len = 0;
1823    while (*p++ != 0) len++;
1824    return len;
1825    }
1826    #endif  /* SUPPORT_PCRE32 */
1827    
1828    
1829    
1830    #ifdef SUPPORT_PCRE16
1831    /*************************************************
1832    *           Print 16-bit character string        *
1833    *************************************************/
1834    
1835    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1836    If handed a NULL file, just counts chars without printing. */
1837    
1838    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1839    {
1840    int yield = 0;
1841    
1842    if (length < 0)
1843      length = strlen16(p);
1844    
1845    while (length-- > 0)
1846      {
1847      pcre_uint32 c = *p++ & 0xffff;
1848    #if !defined NOUTF
1849      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1850        {
1851        int d = *p & 0xffff;
1852        if (d >= 0xDC00 && d < 0xDFFF)
1853          {
1854          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1855          length--;
1856          p++;
1857          }
1858        }
1859    #endif
1860      yield += pchar(c, f);
1861      }
1862    
1863    return yield;
1864    }
1865    #endif  /* SUPPORT_PCRE16 */
1866    
1867    
1868    
1869    #ifdef SUPPORT_PCRE32
1870    /*************************************************
1871    *           Print 32-bit character string        *
1872    *************************************************/
1873    
1874    /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
1875    If handed a NULL file, just counts chars without printing. */
1876    
1877    static int pchars32(PCRE_SPTR32 p, int length, FILE *f)
1878    {
1879    int yield = 0;
1880    
1881    if (length < 0)
1882      length = strlen32(p);
1883    
1884    while (length-- > 0)
1885      {
1886      pcre_uint32 c = *p++;
1887      yield += pchar(c, f);
1888      }
1889    
1890    return yield;
1891    }
1892    #endif  /* SUPPORT_PCRE32 */
1893    
1894    
1895    
1896    #ifdef SUPPORT_PCRE8
1897    /*************************************************
1898    *     Read a capture name (8-bit) and check it   *
1899    *************************************************/
1900    
1901    static pcre_uint8 *
1902    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1903    {
1904    pcre_uint8 *npp = *pp;
1905    while (isalnum(*p)) *npp++ = *p++;
1906    *npp++ = 0;
1907    *npp = 0;
1908    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1909      {
1910      fprintf(outfile, "no parentheses with name \"");
1911      PCHARSV(*pp, 0, -1, outfile);
1912      fprintf(outfile, "\"\n");
1913      }
1914    
1915    *pp = npp;
1916    return p;
1917    }
1918    #endif  /* SUPPORT_PCRE8 */
1919    
1920    
1921    
1922    #ifdef SUPPORT_PCRE16
1923    /*************************************************
1924    *     Read a capture name (16-bit) and check it  *
1925    *************************************************/
1926    
1927    /* Note that the text being read is 8-bit. */
1928    
1929    static pcre_uint8 *
1930    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1931    {
1932    pcre_uint16 *npp = *pp;
1933    while (isalnum(*p)) *npp++ = *p++;
1934    *npp++ = 0;
1935    *npp = 0;
1936    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1937      {
1938      fprintf(outfile, "no parentheses with name \"");
1939      PCHARSV(*pp, 0, -1, outfile);
1940      fprintf(outfile, "\"\n");
1941      }
1942    *pp = npp;
1943    return p;
1944    }
1945    #endif  /* SUPPORT_PCRE16 */
1946    
1947    
1948    
1949    #ifdef SUPPORT_PCRE32
1950    /*************************************************
1951    *     Read a capture name (32-bit) and check it  *
1952    *************************************************/
1953    
1954    /* Note that the text being read is 8-bit. */
1955    
1956    static pcre_uint8 *
1957    read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
1958    {
1959    pcre_uint32 *npp = *pp;
1960    while (isalnum(*p)) *npp++ = *p++;
1961    *npp++ = 0;
1962    *npp = 0;
1963    if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
1964      {
1965      fprintf(outfile, "no parentheses with name \"");
1966      PCHARSV(*pp, 0, -1, outfile);
1967      fprintf(outfile, "\"\n");
1968      }
1969    *pp = npp;
1970    return p;
1971    }
1972    #endif  /* SUPPORT_PCRE32 */
1973    
1974    
1975    
1976    /*************************************************
1977    *              Callout function                  *
1978    *************************************************/
1979    
1980    /* Called from PCRE as a result of the (?C) item. We print out where we are in
1981    the match. Yield zero unless more callouts than the fail count, or the callout
1982    data is not zero. */
1983    
1984    static int callout(pcre_callout_block *cb)
1985    {
1986    FILE *f = (first_callout | callout_extra)? outfile : NULL;
1987    int i, pre_start, post_start, subject_length;
1988    
1989    if (callout_extra)
1990      {
1991      fprintf(f, "Callout %d: last capture = %d\n",
1992        cb->callout_number, cb->capture_last);
1993    
1994      for (i = 0; i < cb->capture_top * 2; i += 2)
1995        {
1996        if (cb->offset_vector[i] < 0)
1997          fprintf(f, "%2d: <unset>\n", i/2);
1998        else
1999          {
2000          fprintf(f, "%2d: ", i/2);
2001          PCHARSV(cb->subject, cb->offset_vector[i],
2002            cb->offset_vector[i+1] - cb->offset_vector[i], f);
2003          fprintf(f, "\n");
2004          }
2005        }
2006      }
2007    
2008    /* Re-print the subject in canonical form, the first time or if giving full
2009    datails. On subsequent calls in the same match, we use pchars just to find the
2010    printed lengths of the substrings. */
2011    
2012    if (f != NULL) fprintf(f, "--->");
2013    
2014    PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2015    PCHARS(post_start, cb->subject, cb->start_match,
2016      cb->current_position - cb->start_match, f);
2017    
2018    PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2019    
2020    PCHARSV(cb->subject, cb->current_position,
2021      cb->subject_length - cb->current_position, f);
2022    
2023    if (f != NULL) fprintf(f, "\n");
2024    
2025    /* Always print appropriate indicators, with callout number if not already
2026    shown. For automatic callouts, show the pattern offset. */
2027    
2028    if (cb->callout_number == 255)
2029      {
2030      fprintf(outfile, "%+3d ", cb->pattern_position);
2031      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
2032      }
2033    else
2034      {
2035      if (callout_extra) fprintf(outfile, "    ");
2036        else fprintf(outfile, "%3d ", cb->callout_number);
2037      }
2038    
2039    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2040    fprintf(outfile, "^");
2041    
2042    if (post_start > 0)
2043      {
2044      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2045      fprintf(outfile, "^");
2046      }
2047    
2048    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2049      fprintf(outfile, " ");
2050    
2051    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2052      pbuffer + cb->pattern_position);
2053    
2054    fprintf(outfile, "\n");
2055    first_callout = 0;
2056    
2057    if (cb->mark != last_callout_mark)
2058      {
2059      if (cb->mark == NULL)
2060        fprintf(outfile, "Latest Mark: <unset>\n");
2061      else
2062        {
2063        fprintf(outfile, "Latest Mark: ");
2064        PCHARSV(cb->mark, 0, -1, outfile);
2065        putc('\n', outfile);
2066        }
2067      last_callout_mark = cb->mark;
2068      }
2069    
2070    if (cb->callout_data != NULL)
2071      {
2072      int callout_data = *((int *)(cb->callout_data));
2073      if (callout_data != 0)
2074        {
2075        fprintf(outfile, "Callout data = %d\n", callout_data);
2076        return callout_data;
2077        }
2078      }
2079    
2080    return (cb->callout_number != callout_fail_id)? 0 :
2081           (++callout_count >= callout_fail_count)? 1 : 0;
2082    }
2083    
2084    
2085    /*************************************************
2086    *            Local malloc functions              *
2087    *************************************************/
2088    
2089    /* Alternative malloc function, to test functionality and save the size of a
2090    compiled re, which is the first store request that pcre_compile() makes. The
2091    show_malloc variable is set only during matching. */
2092    
2093    static void *new_malloc(size_t size)
2094    {
2095    void *block = malloc(size);
2096    gotten_store = size;
2097    if (first_gotten_store == 0) first_gotten_store = size;
2098    if (show_malloc)
2099      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
2100    return block;
2101    }
2102    
2103    static void new_free(void *block)
2104    {
2105    if (show_malloc)
2106      fprintf(outfile, "free             %p\n", block);
2107    free(block);
2108    }
2109    
2110    /* For recursion malloc/free, to test stacking calls */
2111    
2112    static void *stack_malloc(size_t size)
2113    {
2114    void *block = malloc(size);
2115    if (show_malloc)
2116      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2117    return block;
2118    }
2119    
2120    static void stack_free(void *block)
2121    {
2122    if (show_malloc)
2123      fprintf(outfile, "stack_free       %p\n", block);
2124    free(block);
2125    }
2126    
2127    
2128    /*************************************************
2129    *          Call pcre_fullinfo()                  *
2130    *************************************************/
2131    
2132    /* Get one piece of information from the pcre_fullinfo() function. When only
2133    one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2134    value, but the code is defensive.
2135    
2136    Arguments:
2137      re        compiled regex
2138      study     study data
2139      option    PCRE_INFO_xxx option
2140      ptr       where to put the data
2141    
2142    Returns:    0 when OK, < 0 on error
2143    */
2144    
2145    static int
2146    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2147    {
2148    int rc;
2149    
2150    if (pcre_mode == PCRE32_MODE)
2151    #ifdef SUPPORT_PCRE32
2152      rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2153    #else
2154      rc = PCRE_ERROR_BADMODE;
2155    #endif
2156    else if (pcre_mode == PCRE16_MODE)
2157    #ifdef SUPPORT_PCRE16
2158      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2159    #else
2160      rc = PCRE_ERROR_BADMODE;
2161    #endif
2162    else
2163    #ifdef SUPPORT_PCRE8
2164      rc = pcre_fullinfo(re, study, option, ptr);
2165    #else
2166      rc = PCRE_ERROR_BADMODE;
2167    #endif
2168    
2169    if (rc < 0)
2170      {
2171      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2172        pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2173      if (rc == PCRE_ERROR_BADMODE)
2174        fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2175          "%d-bit mode\n", 8 * CHAR_SIZE,
2176          8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2177      }
2178    
2179    return rc;
2180    }
2181    
2182    
2183    
2184    /*************************************************
2185    *             Swap byte functions                *
2186    *************************************************/
2187    
2188    /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2189    value, respectively.
2190    
2191    Arguments:
2192      value        any number
2193    
2194    Returns:       the byte swapped value
2195    */
2196    
2197    static pcre_uint32
2198    swap_uint32(pcre_uint32 value)
2199    {
2200    return ((value & 0x000000ff) << 24) |
2201           ((value & 0x0000ff00) <<  8) |
2202           ((value & 0x00ff0000) >>  8) |
2203           (value >> 24);
2204    }
2205    
2206    static pcre_uint16
2207    swap_uint16(pcre_uint16 value)
2208    {
2209    return (value >> 8) | (value << 8);
2210    }
2211    
2212    
2213    
2214    /*************************************************
2215    *        Flip bytes in a compiled pattern        *
2216    *************************************************/
2217    
2218    /* This function is called if the 'F' option was present on a pattern that is
2219    to be written to a file. We flip the bytes of all the integer fields in the
2220    regex data block and the study block. In 16-bit mode this also flips relevant
2221    bytes in the pattern itself. This is to make it possible to test PCRE's
2222    ability to reload byte-flipped patterns, e.g. those compiled on a different
2223    architecture. */
2224    
2225    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2226    static void
2227    regexflip8_or_16(pcre *ere, pcre_extra *extra)
2228    {
2229    real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2230    #ifdef SUPPORT_PCRE16
2231    int op;
2232    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2233    int length = re->name_count * re->name_entry_size;
2234    #ifdef SUPPORT_UTF
2235    BOOL utf = (re->options & PCRE_UTF16) != 0;
2236    BOOL utf16_char = FALSE;
2237    #endif /* SUPPORT_UTF */
2238    #endif /* SUPPORT_PCRE16 */
2239    
2240    /* Always flip the bytes in the main data block and study blocks. */
2241    
2242    re->magic_number = REVERSED_MAGIC_NUMBER;
2243    re->size = swap_uint32(re->size);
2244    re->options = swap_uint32(re->options);
2245    re->flags = swap_uint16(re->flags);
2246    re->top_bracket = swap_uint16(re->top_bracket);
2247    re->top_backref = swap_uint16(re->top_backref);
2248    re->first_char = swap_uint16(re->first_char);
2249    re->req_char = swap_uint16(re->req_char);
2250    re->name_table_offset = swap_uint16(re->name_table_offset);
2251    re->name_entry_size = swap_uint16(re->name_entry_size);
2252    re->name_count = swap_uint16(re->name_count);
2253    
2254    if (extra != NULL)
2255      {
2256      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2257      rsd->size = swap_uint32(rsd->size);
2258      rsd->flags = swap_uint32(rsd->flags);
2259      rsd->minlength = swap_uint32(rsd->minlength);
2260      }
2261    
2262    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2263    in the name table, if present, and then in the pattern itself. */
2264    
2265    #ifdef SUPPORT_PCRE16
2266    if (pcre_mode != PCRE16_MODE) return;
2267    
2268    while(TRUE)
2269      {
2270      /* Swap previous characters. */
2271      while (length-- > 0)
2272        {
2273        *ptr = swap_uint16(*ptr);
2274        ptr++;
2275        }
2276    #ifdef SUPPORT_UTF
2277      if (utf16_char)
2278        {
2279        if ((ptr[-1] & 0xfc00) == 0xd800)
2280          {
2281          /* We know that there is only one extra character in UTF-16. */
2282          *ptr = swap_uint16(*ptr);
2283          ptr++;
2284          }
2285        }
2286      utf16_char = FALSE;
2287    #endif /* SUPPORT_UTF */
2288    
2289      /* Get next opcode. */
2290    
2291      length = 0;
2292      op = *ptr;
2293      *ptr++ = swap_uint16(op);
2294    
2295      switch (op)
2296        {
2297        case OP_END:
2298        return;
2299    
2300    #ifdef SUPPORT_UTF
2301        case OP_CHAR:
2302        case OP_CHARI:
2303        case OP_NOT:
2304        case OP_NOTI:
2305        case OP_STAR:
2306        case OP_MINSTAR:
2307        case OP_PLUS:
2308        case OP_MINPLUS:
2309        case OP_QUERY:
2310        case OP_MINQUERY:
2311        case OP_UPTO:
2312        case OP_MINUPTO:
2313        case OP_EXACT:
2314        case OP_POSSTAR:
2315        case OP_POSPLUS:
2316        case OP_POSQUERY:
2317        case OP_POSUPTO:
2318        case OP_STARI:
2319        case OP_MINSTARI:
2320        case OP_PLUSI:
2321        case OP_MINPLUSI:
2322        case OP_QUERYI:
2323        case OP_MINQUERYI:
2324        case OP_UPTOI:
2325        case OP_MINUPTOI:
2326        case OP_EXACTI:
2327        case OP_POSSTARI:
2328        case OP_POSPLUSI:
2329        case OP_POSQUERYI:
2330        case OP_POSUPTOI:
2331        case OP_NOTSTAR:
2332        case OP_NOTMINSTAR:
2333        case OP_NOTPLUS:
2334        case OP_NOTMINPLUS:
2335        case OP_NOTQUERY:
2336        case OP_NOTMINQUERY:
2337        case OP_NOTUPTO:
2338        case OP_NOTMINUPTO:
2339        case OP_NOTEXACT:
2340        case OP_NOTPOSSTAR:
2341        case OP_NOTPOSPLUS:
2342        case OP_NOTPOSQUERY:
2343        case OP_NOTPOSUPTO:
2344        case OP_NOTSTARI:
2345        case OP_NOTMINSTARI:
2346        case OP_NOTPLUSI:
2347        case OP_NOTMINPLUSI:
2348        case OP_NOTQUERYI:
2349        case OP_NOTMINQUERYI:
2350        case OP_NOTUPTOI:
2351        case OP_NOTMINUPTOI:
2352        case OP_NOTEXACTI:
2353        case OP_NOTPOSSTARI:
2354        case OP_NOTPOSPLUSI:
2355        case OP_NOTPOSQUERYI:
2356        case OP_NOTPOSUPTOI:
2357        if (utf) utf16_char = TRUE;
2358    #endif
2359        /* Fall through. */
2360    
2361        default:
2362        length = OP_lengths16[op] - 1;
2363        break;
2364    
2365        case OP_CLASS:
2366        case OP_NCLASS:
2367        /* Skip the character bit map. */
2368        ptr += 32/sizeof(pcre_uint16);
2369        length = 0;
2370        break;
2371    
2372        case OP_XCLASS:
2373        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2374        if (LINK_SIZE > 1)
2375          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2376            - (1 + LINK_SIZE + 1));
2377        else
2378          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2379    
2380        /* Reverse the size of the XCLASS instance. */
2381        *ptr = swap_uint16(*ptr);
2382        ptr++;
2383        if (LINK_SIZE > 1)
2384          {
2385          *ptr = swap_uint16(*ptr);
2386          ptr++;
2387          }
2388    
2389        op = *ptr;
2390        *ptr = swap_uint16(op);
2391        ptr++;
2392        if ((op & XCL_MAP) != 0)
2393          {
2394          /* Skip the character bit map. */
2395          ptr += 32/sizeof(pcre_uint16);
2396          length -= 32/sizeof(pcre_uint16);
2397          }
2398        break;
2399        }
2400      }
2401    /* Control should never reach here in 16 bit mode. */
2402    #endif /* SUPPORT_PCRE16 */
2403    }
2404    #endif /* SUPPORT_PCRE[8|16] */
2405    
2406    
2407    
2408    #if defined SUPPORT_PCRE32
2409    static void
2410    regexflip_32(pcre *ere, pcre_extra *extra)
2411    {
2412    real_pcre32 *re = (real_pcre32 *)ere;
2413    int op;
2414    pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2415    int length = re->name_count * re->name_entry_size;
2416    #ifdef SUPPORT_UTF
2417    BOOL utf = (re->options & PCRE_UTF32) != 0;
2418    #endif /* SUPPORT_UTF */
2419    
2420    /* Always flip the bytes in the main data block and study blocks. */
2421    
2422    re->magic_number = REVERSED_MAGIC_NUMBER;
2423    re->size = swap_uint32(re->size);
2424    re->options = swap_uint32(re->options);
2425    re->flags = swap_uint16(re->flags);
2426    re->top_bracket = swap_uint16(re->top_bracket);
2427    re->top_backref = swap_uint16(re->top_backref);
2428    re->first_char = swap_uint32(re->first_char);
2429    re->req_char = swap_uint32(re->req_char);
2430    re->name_table_offset = swap_uint16(re->name_table_offset);
2431    re->name_entry_size = swap_uint16(re->name_entry_size);
2432    re->name_count = swap_uint16(re->name_count);
2433    
2434    if (extra != NULL)
2435      {
2436      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2437      rsd->size = swap_uint32(rsd->size);
2438      rsd->flags = swap_uint32(rsd->flags);
2439      rsd->minlength = swap_uint32(rsd->minlength);
2440      }
2441    
2442    /* In 32-bit mode we must swap bytes
2443    in the name table, if present, and then in the pattern itself. */
2444    
2445    while(TRUE)
2446      {
2447      /* Swap previous characters. */
2448      while (length-- > 0)
2449        {
2450        *ptr = swap_uint32(*ptr);
2451        ptr++;
2452        }
2453    
2454      /* Get next opcode. */
2455    
2456      length = 0;
2457      op = *ptr;
2458      *ptr++ = swap_uint32(op);
2459    
2460      switch (op)
2461        {
2462        case OP_END:
2463        return;
2464    
2465        default:
2466        length = OP_lengths32[op] - 1;
2467        break;
2468    
2469        case OP_CLASS:
2470        case OP_NCLASS:
2471        /* Skip the character bit map. */
2472        ptr += 32/sizeof(pcre_uint32);
2473        length = 0;
2474        break;
2475    
2476        case OP_XCLASS:
2477        /* LINK_SIZE can only be 1 in 32-bit mode. */
2478        length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2479    
2480        /* Reverse the size of the XCLASS instance. */
2481        *ptr = swap_uint32(*ptr);
2482        ptr++;
2483    
2484        op = *ptr;
2485        *ptr = swap_uint32(op);
2486        ptr++;
2487        if ((op & XCL_MAP) != 0)
2488          {
2489          /* Skip the character bit map. */
2490          ptr += 32/sizeof(pcre_uint32);
2491          length -= 32/sizeof(pcre_uint32);
2492          }
2493        break;
2494        }
2495      }
2496    /* Control should never reach here in 32 bit mode. */
2497    }
2498    
2499    #endif /* SUPPORT_PCRE32 */
2500    
2501    
2502    
2503    static void
2504    regexflip(pcre *ere, pcre_extra *extra)
2505    {
2506    #if defined SUPPORT_PCRE32
2507      if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2508        regexflip_32(ere, extra);
2509    #endif
2510    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2511      if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2512        regexflip8_or_16(ere, extra);
2513    #endif
2514    }
2515    
2516    
2517    
2518    /*************************************************
2519    *        Check match or recursion limit          *
2520    *************************************************/
2521    
2522    static int
2523    check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2524      int start_offset, int options, int *use_offsets, int use_size_offsets,
2525      int flag, unsigned long int *limit, int errnumber, const char *msg)
2526    {
2527    int count;
2528    int min = 0;
2529    int mid = 64;
2530    int max = -1;
2531    
2532    extra->flags |= flag;
2533    
2534    for (;;)
2535      {
2536      *limit = mid;
2537    
2538      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2539        use_offsets, use_size_offsets);
2540    
2541      if (count == errnumber)
2542        {
2543        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2544        min = mid;
2545        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2546        }
2547    
2548      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2549                             count == PCRE_ERROR_PARTIAL)
2550        {
2551        if (mid == min + 1)
2552          {
2553          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2554          break;
2555          }
2556        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2557        max = mid;
2558        mid = (min + mid)/2;
2559        }
2560      else break;    /* Some other error */
2561      }
2562    
2563    extra->flags &= ~flag;
2564    return count;
2565    }
2566    
2567    
2568    
2569    /*************************************************
2570    *         Case-independent strncmp() function    *
2571    *************************************************/
2572    
2573    /*
2574    Arguments:
2575      s         first string
2576      t         second string
2577      n         number of characters to compare
2578    
2579    Returns:    < 0, = 0, or > 0, according to the comparison
2580    */
2581    
2582    static int
2583    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2584    {
2585    while (n--)
2586      {
2587      int c = tolower(*s++) - tolower(*t++);
2588      if (c) return c;
2589      }
2590    return 0;
2591  }  }
2592    
2593    
2594    
2595  /* Character string printing function. */  /*************************************************
2596    *         Check newline indicator                *
2597  static void pchars(unsigned char *p, int length)  *************************************************/
 {  
 int c;  
 while (length-- > 0)  
   if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
     else fprintf(outfile, "\\x%02x", c);  
 }  
2598    
2599    /* This is used both at compile and run-time to check for <xxx> escapes. Print
2600    a message and return 0 if there is no match.
2601    
2602    Arguments:
2603      p           points after the leading '<'
2604      f           file for error message
2605    
2606  /* Alternative malloc function, to test functionality and show the size of the  Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
2607  compiled re. */  */
2608    
2609  static void *new_malloc(size_t size)  static int
2610    check_newline(pcre_uint8 *p, FILE *f)
2611  {  {
2612  gotten_store = size;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2613  if (log_store)  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2614    fprintf(outfile, "Memory allocation (code space): %d\n",  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2615      (int)((int)size - offsetof(real_pcre, code[0])));  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2616  return malloc(size);  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2617    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2618    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2619    fprintf(f, "Unknown newline type at: <%s\n", p);
2620    return 0;
2621  }  }
2622    
2623    
2624    
2625    /*************************************************
2626    *             Usage function                     *
2627    *************************************************/
2628    
2629  /* Get one piece of information from the pcre_fullinfo() function */  static void
2630    usage(void)
 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  
2631  {  {
2632  int rc;  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2633  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  printf("Input and output default to stdin and stdout.\n");
2634    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2635    printf("If input is a terminal, readline() is used to read from it.\n");
2636    #else
2637    printf("This version of pcretest is not linked with readline().\n");
2638    #endif
2639    printf("\nOptions:\n");
2640    #ifdef SUPPORT_PCRE16
2641    printf("  -16      use the 16-bit library\n");
2642    #endif
2643    #ifdef SUPPORT_PCRE32
2644    printf("  -32      use the 32-bit library\n");
2645    #endif
2646    printf("  -b       show compiled code\n");
2647    printf("  -C       show PCRE compile-time options and exit\n");
2648    printf("  -C arg   show a specific compile-time option\n");
2649    printf("           and exit with its value. The arg can be:\n");
2650    printf("     linksize     internal link size [2, 3, 4]\n");
2651    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2652    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2653    printf("     pcre32       32 bit library support enabled [0, 1]\n");
2654    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2655    printf("     ucp          Unicode Properties supported [0, 1]\n");
2656    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2657    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2658    printf("  -d       debug: show compiled code and information (-b and -i)\n");
2659    #if !defined NODFA
2660    printf("  -dfa     force DFA matching for all subjects\n");
2661    #endif
2662    printf("  -help    show usage information\n");
2663    printf("  -i       show information about compiled patterns\n"
2664           "  -M       find MATCH_LIMIT minimum for each subject\n"
2665           "  -m       output memory used information\n"
2666           "  -o <n>   set size of offsets vector to <n>\n");
2667    #if !defined NOPOSIX
2668    printf("  -p       use POSIX interface\n");
2669    #endif
2670    printf("  -q       quiet: do not output PCRE version number at start\n");
2671    printf("  -S <n>   set stack size to <n> megabytes\n");
2672    printf("  -s       force each pattern to be studied at basic level\n"
2673           "  -s+      force each pattern to be studied, using JIT if available\n"
2674           "  -s++     ditto, verifying when JIT was actually used\n"
2675           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2676           "             where 1 <= n <= 7 selects JIT options\n"
2677           "  -s++n    ditto, verifying when JIT was actually used\n"
2678           "  -t       time compilation and execution\n");
2679    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2680    printf("  -tm      time execution (matching) only\n");
2681    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
2682  }  }
2683    
2684    
2685    
2686    /*************************************************
2687    *                Main Program                    *
2688    *************************************************/
2689    
2690  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
2691  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 311  options, followed by a set of test data, Line 2694  options, followed by a set of test data,
2694  int main(int argc, char **argv)  int main(int argc, char **argv)
2695  {  {
2696  FILE *infile = stdin;  FILE *infile = stdin;
2697    const char *version;
2698  int options = 0;  int options = 0;
2699  int study_options = 0;  int study_options = 0;
2700    int default_find_match_limit = FALSE;
2701  int op = 1;  int op = 1;
2702  int timeit = 0;  int timeit = 0;
2703    int timeitm = 0;
2704  int showinfo = 0;  int showinfo = 0;
2705  int showstore = 0;  int showstore = 0;
2706  int posix = 0;  int force_study = -1;
2707    int force_study_options = 0;
2708    int quiet = 0;
2709    int size_offsets = 45;
2710    int size_offsets_max;
2711    int *offsets = NULL;
2712  int debug = 0;  int debug = 0;
2713  int done = 0;  int done = 0;
2714  unsigned char buffer[30000];  int all_use_dfa = 0;
2715  unsigned char dbuffer[1024];  int verify_jit = 0;
2716    int yield = 0;
2717    int stack_size;
2718    pcre_uint8 *dbuffer = NULL;
2719    size_t dbuffer_size = 1u << 14;
2720    
2721    #if !defined NOPOSIX
2722    int posix = 0;
2723    #endif
2724    #if !defined NODFA
2725    int *dfa_workspace = NULL;
2726    #endif
2727    
2728    pcre_jit_stack *jit_stack = NULL;
2729    
2730    /* These vectors store, end-to-end, a list of zero-terminated captured
2731    substring names, each list itself being terminated by an empty name. Assume
2732    that 1024 is plenty long enough for the few names we'll be testing. It is
2733    easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2734    for the actual memory, to ensure alignment. */
2735    
2736    pcre_uint32 copynames[1024];
2737    pcre_uint32 getnames[1024];
2738    
2739    #ifdef SUPPORT_PCRE32
2740    pcre_uint32 *cn32ptr;
2741    pcre_uint32 *gn32ptr;
2742    #endif
2743    
2744    #ifdef SUPPORT_PCRE16
2745    pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2746    pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2747    pcre_uint16 *cn16ptr;
2748    pcre_uint16 *gn16ptr;
2749    #endif
2750    
2751    #ifdef SUPPORT_PCRE8
2752    pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2753    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2754    pcre_uint8 *cn8ptr;
2755    pcre_uint8 *gn8ptr;
2756    #endif
2757    
2758  /* Static so that new_malloc can use it. */  /* Get buffers from malloc() so that valgrind will check their misuse when
2759    debugging. They grow automatically when very long lines are read. The 16-
2760    and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2761    
2762    buffer = (pcre_uint8 *)malloc(buffer_size);
2763    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2764    
2765    /* The outfile variable is static so that new_malloc can use it. */
2766    
2767  outfile = stdout;  outfile = stdout;
2768    
2769    /* The following  _setmode() stuff is some Windows magic that tells its runtime
2770    library to translate CRLF into a single LF character. At least, that's what
2771    I've been told: never having used Windows I take this all on trust. Originally
2772    it set 0x8000, but then I was advised that _O_BINARY was better. */
2773    
2774    #if defined(_WIN32) || defined(WIN32)
2775    _setmode( _fileno( stdout ), _O_BINARY );
2776    #endif
2777    
2778    /* Get the version number: both pcre_version() and pcre16_version() give the
2779    same answer. We just need to ensure that we call one that is available. */
2780    
2781    #if defined SUPPORT_PCRE8
2782    version = pcre_version();
2783    #elif defined SUPPORT_PCRE16
2784    version = pcre16_version();
2785    #elif defined SUPPORT_PCRE32
2786    version = pcre32_version();
2787    #endif
2788    
2789  /* Scan options */  /* Scan options */
2790    
2791  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2792    {    {
2793    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    pcre_uint8 *endptr;
2794      showstore = 1;    char *arg = argv[op];
2795    else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
2796    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    if (strcmp(arg, "-m") == 0) showstore = 1;
2797    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(arg, "-s") == 0) force_study = 0;
2798    else if (strcmp(argv[op], "-p") == 0) posix = 1;  
2799      else if (strncmp(arg, "-s+", 3) == 0)
2800        {
2801        arg += 3;
2802        if (*arg == '+') { arg++; verify_jit = TRUE; }
2803        force_study = 1;
2804        if (*arg == 0)
2805          force_study_options = jit_study_bits[6];
2806        else if (*arg >= '1' && *arg <= '7')
2807          force_study_options = jit_study_bits[*arg - '1'];
2808        else goto BAD_ARG;
2809        }
2810      else if (strcmp(arg, "-16") == 0)
2811        {
2812    #ifdef SUPPORT_PCRE16
2813        pcre_mode = PCRE16_MODE;
2814    #else
2815        printf("** This version of PCRE was built without 16-bit support\n");
2816        exit(1);
2817    #endif
2818        }
2819      else if (strcmp(arg, "-32") == 0)
2820        {
2821    #ifdef SUPPORT_PCRE32
2822        pcre_mode = PCRE32_MODE;
2823    #else
2824        printf("** This version of PCRE was built without 32-bit support\n");
2825        exit(1);
2826    #endif
2827        }
2828      else if (strcmp(arg, "-q") == 0) quiet = 1;
2829      else if (strcmp(arg, "-b") == 0) debug = 1;
2830      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2831      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2832      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2833    #if !defined NODFA
2834      else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2835    #endif
2836      else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2837          ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2838            *endptr == 0))
2839        {
2840        op++;
2841        argc--;
2842        }
2843      else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2844        {
2845        int both = arg[2] == 0;
2846        int temp;
2847        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2848                         *endptr == 0))
2849          {
2850          timeitm = temp;
2851          op++;
2852          argc--;
2853          }
2854        else timeitm = LOOPREPEAT;
2855        if (both) timeit = timeitm;
2856        }
2857      else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2858          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2859            *endptr == 0))
2860        {
2861    #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2862        printf("PCRE: -S not supported on this OS\n");
2863        exit(1);
2864    #else
2865        int rc;
2866        struct rlimit rlim;
2867        getrlimit(RLIMIT_STACK, &rlim);
2868        rlim.rlim_cur = stack_size * 1024 * 1024;
2869        rc = setrlimit(RLIMIT_STACK, &rlim);
2870        if (rc != 0)
2871          {
2872        printf("PCRE: setrlimit() failed with error %d\n", rc);
2873        exit(1);
2874          }
2875        op++;
2876        argc--;
2877    #endif
2878        }
2879    #if !defined NOPOSIX
2880      else if (strcmp(arg, "-p") == 0) posix = 1;
2881    #endif
2882      else if (strcmp(arg, "-C") == 0)
2883        {
2884        int rc;
2885        unsigned long int lrc;
2886    
2887        if (argc > 2)
2888          {
2889          if (strcmp(argv[op + 1], "linksize") == 0)
2890            {
2891            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2892            printf("%d\n", rc);
2893            yield = rc;
2894            }
2895          else if (strcmp(argv[op + 1], "pcre8") == 0)
2896            {
2897    #ifdef SUPPORT_PCRE8
2898            printf("1\n");
2899            yield = 1;
2900    #else
2901            printf("0\n");
2902            yield = 0;
2903    #endif
2904            }
2905          else if (strcmp(argv[op + 1], "pcre16") == 0)
2906            {
2907    #ifdef SUPPORT_PCRE16
2908            printf("1\n");
2909            yield = 1;
2910    #else
2911            printf("0\n");
2912            yield = 0;
2913    #endif
2914            }
2915          else if (strcmp(argv[op + 1], "pcre32") == 0)
2916            {
2917    #ifdef SUPPORT_PCRE32
2918            printf("1\n");
2919            yield = 1;
2920    #else
2921            printf("0\n");
2922            yield = 0;
2923    #endif
2924            goto EXIT;
2925            }
2926          if (strcmp(argv[op + 1], "utf") == 0)
2927            {
2928    #ifdef SUPPORT_PCRE8
2929            if (pcre_mode == PCRE8_MODE)
2930              (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2931    #endif
2932    #ifdef SUPPORT_PCRE16
2933            if (pcre_mode == PCRE16_MODE)
2934              (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2935    #endif
2936    #ifdef SUPPORT_PCRE32
2937            if (pcre_mode == PCRE32_MODE)
2938              (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
2939    #endif
2940            printf("%d\n", rc);
2941            yield = rc;
2942            goto EXIT;
2943            }
2944          else if (strcmp(argv[op + 1], "ucp") == 0)
2945            {
2946            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2947            printf("%d\n", rc);
2948            yield = rc;
2949            }
2950          else if (strcmp(argv[op + 1], "jit") == 0)
2951            {
2952            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2953            printf("%d\n", rc);
2954            yield = rc;
2955            }
2956          else if (strcmp(argv[op + 1], "newline") == 0)
2957            {
2958            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2959            print_newline_config(rc, TRUE);
2960            }
2961          else if (strcmp(argv[op + 1], "ebcdic") == 0)
2962            {
2963    #ifdef EBCDIC
2964            printf("1\n");
2965            yield = 1;
2966    #else
2967            printf("0\n");
2968    #endif
2969            }
2970          else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2971            {
2972    #ifdef EBCDIC
2973            printf("0x%02x\n", CHAR_LF);
2974    #else
2975            printf("0\n");
2976    #endif
2977            }
2978          else
2979            {
2980            printf("Unknown -C option: %s\n", argv[op + 1]);
2981            }
2982          goto EXIT;
2983          }
2984    
2985        /* No argument for -C: output all configuration information. */
2986    
2987        printf("PCRE version %s\n", version);
2988        printf("Compiled with\n");
2989    
2990    #ifdef EBCDIC
2991        printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2992    #endif
2993    
2994    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2995    are set, either both UTFs are supported or both are not supported. */
2996    
2997    #ifdef SUPPORT_PCRE8
2998        printf("  8-bit support\n");
2999        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3000          printf ("  %sUTF-8 support\n", rc ? "" : "No ");
3001    #endif
3002    #ifdef SUPPORT_PCRE16
3003        printf("  16-bit support\n");
3004        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3005        printf ("  %sUTF-16 support\n", rc ? "" : "No ");
3006    #endif
3007    #ifdef SUPPORT_PCRE32
3008        printf("  32-bit support\n");
3009        (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3010        printf ("  %sUTF-32 support\n", rc ? "" : "No ");
3011    #endif
3012    
3013        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3014        printf("  %sUnicode properties support\n", rc? "" : "No ");
3015        (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3016        if (rc)
3017          {
3018          const char *arch;
3019          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3020          printf("  Just-in-time compiler support: %s\n", arch);
3021          }
3022        else
3023          printf("  No just-in-time compiler support\n");
3024        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3025        print_newline_config(rc, FALSE);
3026        (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3027        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3028                                         "all Unicode newlines");
3029        (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3030        printf("  Internal link size = %d\n", rc);
3031        (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3032        printf("  POSIX malloc threshold = %d\n", rc);
3033        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3034        printf("  Default match limit = %ld\n", lrc);
3035        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3036        printf("  Default recursion depth limit = %ld\n", lrc);
3037        (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3038        printf("  Match recursion uses %s", rc? "stack" : "heap");
3039        if (showstore)
3040          {
3041          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3042          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3043          }
3044        printf("\n");
3045        goto EXIT;
3046        }
3047      else if (strcmp(arg, "-help") == 0 ||
3048               strcmp(arg, "--help") == 0)
3049        {
3050        usage();
3051        goto EXIT;
3052        }
3053    else    else
3054      {      {
3055      printf("*** Unknown option %s\n", argv[op]);      BAD_ARG:
3056      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("** Unknown or malformed option %s\n", arg);
3057      printf("  -d   debug: show compiled code; implies -i\n"      usage();
3058             "  -i   show information about compiled pattern\n"      yield = 1;
3059             "  -p   use POSIX interface\n"      goto EXIT;
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
3060      }      }
3061    op++;    op++;
3062    argc--;    argc--;
3063    }    }
3064    
3065    /* Get the store for the offsets vector, and remember what it was */
3066    
3067    size_offsets_max = size_offsets;
3068    offsets = (int *)malloc(size_offsets_max * sizeof(int));
3069    if (offsets == NULL)
3070      {
3071      printf("** Failed to get %d bytes of memory for offsets vector\n",
3072        (int)(size_offsets_max * sizeof(int)));
3073      yield = 1;
3074      goto EXIT;
3075      }
3076    
3077  /* Sort out the input and output files */  /* Sort out the input and output files */
3078    
3079  if (argc > 1)  if (argc > 1)
3080    {    {
3081    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
3082    if (infile == NULL)    if (infile == NULL)
3083      {      {
3084      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
3085      return 1;      yield = 1;
3086        goto EXIT;
3087      }      }
3088    }    }
3089    
3090  if (argc > 2)  if (argc > 2)
3091    {    {
3092    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
3093    if (outfile == NULL)    if (outfile == NULL)
3094      {      {
3095      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
3096      return 1;      yield = 1;
3097        goto EXIT;
3098      }      }
3099    }    }
3100    
3101  /* Set alternative malloc function */  /* Set alternative malloc function */
3102    
3103    #ifdef SUPPORT_PCRE8
3104  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
3105    pcre_free = new_free;
3106    pcre_stack_malloc = stack_malloc;
3107    pcre_stack_free = stack_free;
3108    #endif
3109    
3110    #ifdef SUPPORT_PCRE16
3111    pcre16_malloc = new_malloc;
3112    pcre16_free = new_free;
3113    pcre16_stack_malloc = stack_malloc;
3114    pcre16_stack_free = stack_free;
3115    #endif
3116    
3117    #ifdef SUPPORT_PCRE32
3118    pcre32_malloc = new_malloc;
3119    pcre32_free = new_free;
3120    pcre32_stack_malloc = stack_malloc;
3121    pcre32_stack_free = stack_free;
3122    #endif
3123    
3124  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
3125    
3126  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3127    
3128  /* Main loop */  /* Main loop */
3129    
# Line 391  while (!done) Line 3134  while (!done)
3134    
3135  #if !defined NOPOSIX  /* There are still compilers that require no indent */  #if !defined NOPOSIX  /* There are still compilers that require no indent */
3136    regex_t preg;    regex_t preg;
3137      int do_posix = 0;
3138  #endif  #endif
3139    
3140    const char *error;    const char *error;
3141    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
3142    unsigned const char *tables = NULL;    pcre_uint8 *p, *pp, *ppp;
3143      pcre_uint8 *to_file = NULL;
3144      const pcre_uint8 *tables = NULL;
3145      unsigned long int get_options;
3146      unsigned long int true_size, true_study_size = 0;
3147      size_t size, regex_gotten_store;
3148      int do_allcaps = 0;
3149      int do_mark = 0;
3150    int do_study = 0;    int do_study = 0;
3151      int no_force_study = 0;
3152    int do_debug = debug;    int do_debug = debug;
3153    int do_G = 0;    int do_G = 0;
3154    int do_g = 0;    int do_g = 0;
3155    int do_showinfo = showinfo;    int do_showinfo = showinfo;
3156    int do_showrest = 0;    int do_showrest = 0;
3157    int do_posix = 0;    int do_showcaprest = 0;
3158    int erroroffset, len, delimiter;    int do_flip = 0;
3159      int erroroffset, len, delimiter, poffset;
3160    
3161    #if !defined NODFA
3162      int dfa_matched = 0;
3163    #endif
3164    
3165    if (infile == stdin) printf("  re> ");    use_utf = 0;
3166    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    debug_lengths = 1;
3167    
3168      if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
3169    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3170      fflush(outfile);
3171    
3172    p = buffer;    p = buffer;
3173    while (isspace(*p)) p++;    while (isspace(*p)) p++;
3174    if (*p == 0) continue;    if (*p == 0) continue;
3175    
3176    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
3177    complete, read more. */  
3178      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3179        {
3180        pcre_uint32 magic;
3181        pcre_uint8 sbuf[8];
3182        FILE *f;
3183    
3184        p++;
3185        if (*p == '!')
3186          {
3187          do_debug = TRUE;
3188          do_showinfo = TRUE;
3189          p++;
3190          }
3191    
3192        pp = p + (int)strlen((char *)p);
3193        while (isspace(pp[-1])) pp--;
3194        *pp = 0;
3195    
3196        f = fopen((char *)p, "rb");
3197        if (f == NULL)
3198          {
3199          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3200          continue;
3201          }
3202    
3203        first_gotten_store = 0;
3204        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3205    
3206        true_size =
3207          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3208        true_study_size =
3209          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3210    
3211        re = (pcre *)new_malloc(true_size);
3212        if (re == NULL)
3213          {
3214          printf("** Failed to get %d bytes of memory for pcre object\n",
3215            (int)true_size);
3216          yield = 1;
3217          goto EXIT;
3218          }
3219        regex_gotten_store = first_gotten_store;
3220    
3221        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3222    
3223        magic = REAL_PCRE_MAGIC(re);
3224        if (magic != MAGIC_NUMBER)
3225          {
3226          if (swap_uint32(magic) == MAGIC_NUMBER)
3227            {
3228            do_flip = 1;
3229            }
3230          else
3231            {
3232            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3233            new_free(re);
3234            fclose(f);
3235            continue;
3236            }
3237          }
3238    
3239        /* We hide the byte-invert info for little and big endian tests. */
3240        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3241          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3242    
3243        /* Now see if there is any following study data. */
3244    
3245        if (true_study_size != 0)
3246          {
3247          pcre_study_data *psd;
3248    
3249          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3250          extra->flags = PCRE_EXTRA_STUDY_DATA;
3251    
3252          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3253          extra->study_data = psd;
3254    
3255          if (fread(psd, 1, true_study_size, f) != true_study_size)
3256            {
3257            FAIL_READ:
3258            fprintf(outfile, "Failed to read data from %s\n", p);
3259            if (extra != NULL)
3260              {
3261              PCRE_FREE_STUDY(extra);
3262              }
3263            new_free(re);
3264            fclose(f);
3265            continue;
3266            }
3267          fprintf(outfile, "Study data loaded from %s\n", p);
3268          do_study = 1;     /* To get the data output if requested */
3269          }
3270        else fprintf(outfile, "No study data\n");
3271    
3272        /* Flip the necessary bytes. */
3273        if (do_flip)
3274          {
3275          int rc;
3276          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3277          if (rc == PCRE_ERROR_BADMODE)
3278            {
3279            /* Simulate the result of the function call below. */
3280            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3281              pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3282              PCRE_INFO_OPTIONS);
3283            fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3284              "%d-bit mode\n", 8 * CHAR_SIZE,
3285              8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3286            new_free(re);
3287            fclose(f);
3288            continue;
3289            }
3290          }
3291    
3292        /* Need to know if UTF-8 for printing data strings. */
3293    
3294        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3295          {
3296          new_free(re);
3297          fclose(f);
3298          continue;
3299          }
3300        use_utf = (get_options & PCRE_UTF8) != 0;
3301    
3302        fclose(f);
3303        goto SHOW_INFO;
3304        }
3305    
3306      /* In-line pattern (the usual case). Get the delimiter and seek the end of
3307      the pattern; if it isn't complete, read more. */
3308    
3309    delimiter = *p++;    delimiter = *p++;
3310    
3311    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
3312      {      {
3313      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3314      goto SKIP_DATA;      goto SKIP_DATA;
3315      }      }
3316    
3317    pp = p;    pp = p;
3318      poffset = (int)(p - buffer);
3319    
3320    for(;;)    for(;;)
3321      {      {
# Line 435  while (!done) Line 3326  while (!done)
3326        pp++;        pp++;
3327        }        }
3328      if (*pp != 0) break;      if (*pp != 0) break;
3329        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
3330        {        {
3331        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
3332        done = 1;        done = 1;
# Line 453  while (!done) Line 3335  while (!done)
3335      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3336      }      }
3337    
3338      /* The buffer may have moved while being extended; reset the start of data
3339      pointer to the correct relative point in the buffer. */
3340    
3341      p = buffer + poffset;
3342    
3343    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
3344    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
3345    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
3346    
3347    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
3348    
3349    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
3350      for callouts. */
3351    
3352    *pp++ = 0;    *pp++ = 0;
3353      strcpy((char *)pbuffer, (char *)p);
3354    
3355    /* Look for options after final delimiter */    /* Look for options after final delimiter */
3356    
3357    options = 0;    options = 0;
3358    study_options = 0;    study_options = force_study_options;
3359    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
3360    
3361    while (*pp != 0)    while (*pp != 0)
3362      {      {
3363      switch (*pp++)      switch (*pp++)
3364        {        {
3365          case 'f': options |= PCRE_FIRSTLINE; break;
3366        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
3367        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
3368        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
3369        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
3370        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
3371    
3372        case '+': do_showrest = 1; break;        case '+':
3373          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3374          break;
3375    
3376          case '=': do_allcaps = 1; break;
3377        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
3378          case 'B': do_debug = 1; break;
3379          case 'C': options |= PCRE_AUTO_CALLOUT; break;
3380        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
3381        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3382          case 'F': do_flip = 1; break;
3383        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
3384        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
3385          case 'J': options |= PCRE_DUPNAMES; break;
3386          case 'K': do_mark = 1; break;
3387        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
3388          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3389    
3390  #if !defined NOPOSIX  #if !defined NOPOSIX
3391        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
3392  #endif  #endif
3393    
3394        case 'S': do_study = 1; break;        case 'S':
3395          do_study = 1;
3396          for (;;)
3397            {
3398            switch (*pp++)
3399              {
3400              case 'S':
3401              do_study = 0;
3402              no_force_study = 1;
3403              break;
3404    
3405              case '!':
3406              study_options |= PCRE_STUDY_EXTRA_NEEDED;
3407              break;
3408    
3409              case '+':
3410              if (*pp == '+')
3411                {
3412                verify_jit = TRUE;
3413                pp++;
3414                }
3415              if (*pp >= '1' && *pp <= '7')
3416                study_options |= jit_study_bits[*pp++ - '1'];
3417              else
3418                study_options |= jit_study_bits[6];
3419              break;
3420    
3421              case '-':
3422              study_options &= ~PCRE_STUDY_ALLJIT;
3423              break;
3424    
3425              default:
3426              pp--;
3427              goto ENDLOOP;
3428              }
3429            }
3430          ENDLOOP:
3431          break;
3432    
3433        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
3434          case 'W': options |= PCRE_UCP; break;
3435        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
3436          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3437          case 'Z': debug_lengths = 0; break;
3438          case '8': options |= PCRE_UTF8; use_utf = 1; break;
3439          case '?': options |= PCRE_NO_UTF8_CHECK; break;
3440    
3441          case 'T':
3442          switch (*pp++)
3443            {
3444            case '0': tables = tables0; break;
3445            case '1': tables = tables1; break;
3446    
3447            case '\r':
3448            case '\n':
3449            case ' ':
3450            case 0:
3451            fprintf(outfile, "** Missing table number after /T\n");
3452            goto SKIP_DATA;
3453    
3454            default:
3455            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3456            goto SKIP_DATA;
3457            }
3458          break;
3459    
3460        case 'L':        case 'L':
3461        ppp = pp;        ppp = pp;
3462        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
3463          /* The '0' test is just in case this is an unterminated line. */
3464          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3465        *ppp = 0;        *ppp = 0;
3466        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3467          {          {
3468          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3469          goto SKIP_DATA;          goto SKIP_DATA;
3470          }          }
3471        tables = pcre_maketables();        locale_set = 1;
3472          tables = PCRE_MAKETABLES;
3473        pp = ppp;        pp = ppp;
3474        break;        break;
3475    
3476        case '\n': case ' ': break;        case '>':
3477          to_file = pp;
3478          while (*pp != 0) pp++;
3479          while (isspace(pp[-1])) pp--;
3480          *pp = 0;
3481          break;
3482    
3483          case '<':
3484            {
3485            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3486              {
3487              options |= PCRE_JAVASCRIPT_COMPAT;
3488              pp += 3;
3489              }
3490            else
3491              {
3492              int x = check_newline(pp, outfile);
3493              if (x == 0) goto SKIP_DATA;
3494              options |= x;
3495              while (*pp++ != '>');
3496              }
3497            }
3498          break;
3499    
3500          case '\r':                      /* So that it works in Windows */
3501          case '\n':
3502          case ' ':
3503          break;
3504    
3505        default:        default:
3506        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3507        goto SKIP_DATA;        goto SKIP_DATA;
# Line 517  while (!done) Line 3510  while (!done)
3510    
3511    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
3512    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
3513    local character tables. */    local character tables. Neither does it have 16-bit support. */
3514    
3515  #if !defined NOPOSIX  #if !defined NOPOSIX
3516    if (posix || do_posix)    if (posix || do_posix)
3517      {      {
3518      int rc;      int rc;
3519      int cflags = 0;      int cflags = 0;
3520    
3521      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3522      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3523        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3524        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3525        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3526        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3527        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3528    
3529        first_gotten_store = 0;
3530      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
3531    
3532      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 533  while (!done) Line 3534  while (!done)
3534    
3535      if (rc != 0)      if (rc != 0)
3536        {        {
3537        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3538        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3539        goto SKIP_DATA;        goto SKIP_DATA;
3540        }        }
3541      }      }
3542    
3543    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
3544    
3545      else
3546    #endif  /* !defined NOPOSIX */
3547    
3548        {
3549        /* In 16- or 32-bit mode, convert the input. */
3550    
3551    #ifdef SUPPORT_PCRE16
3552        if (pcre_mode == PCRE16_MODE)
3553          {
3554          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3555            {
3556            case -1:
3557            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3558              "converted to UTF-16\n");
3559            goto SKIP_DATA;
3560    
3561            case -2:
3562            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3563              "cannot be converted to UTF-16\n");
3564            goto SKIP_DATA;
3565    
3566            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3567            fprintf(outfile, "**Failed: character value greater than 0xffff "
3568              "cannot be converted to 16-bit in non-UTF mode\n");
3569            goto SKIP_DATA;
3570    
3571            default:
3572            break;
3573            }
3574          p = (pcre_uint8 *)buffer16;
3575          }
3576    #endif
3577    
3578    #ifdef SUPPORT_PCRE32
3579        if (pcre_mode == PCRE32_MODE)
3580          {
3581          switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3582            {
3583            case -1:
3584            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3585              "converted to UTF-32\n");
3586            goto SKIP_DATA;
3587