/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 43 by nigel, Sat Feb 24 21:39:21 2007 UTC revision 1263 by chpe, Wed Feb 27 17:31:40 2013 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10    -----------------------------------------------------------------------------
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are met:
13    
14        * Redistributions of source code must retain the above copyright notice,
15          this list of conditions and the following disclaimer.
16    
17        * Redistributions in binary form must reproduce the above copyright
18          notice, this list of conditions and the following disclaimer in the
19          documentation and/or other materials provided with the distribution.
20    
21        * Neither the name of the University of Cambridge nor the names of its
22          contributors may be used to endorse or promote products derived from
23          this software without specific prior written permission.
24    
25    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35    POSSIBILITY OF SUCH DAMAGE.
36    -----------------------------------------------------------------------------
37    */
38    
39    /* This program now supports the testing of all of the 8-bit, 16-bit, and
40    32-bit PCRE libraries in a single program. This is different from the modules
41    such as pcre_compile.c in the library itself, which are compiled separately for
42    each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43    twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44    make use of any of the macros from pcre_internal.h that depend on
45    COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46    SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47    supported library functions. */
48    
49    #ifdef HAVE_CONFIG_H
50    #include "config.h"
51    #endif
52    
53  #include <ctype.h>  #include <ctype.h>
54  #include <stdio.h>  #include <stdio.h>
55  #include <string.h>  #include <string.h>
56  #include <stdlib.h>  #include <stdlib.h>
57  #include <time.h>  #include <time.h>
58  #include <locale.h>  #include <locale.h>
59    #include <errno.h>
60    
61    /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67    #ifdef HAVE_UNISTD_H
68    #include <unistd.h>
69    #endif
70    #if defined(SUPPORT_LIBREADLINE)
71    #include <readline/readline.h>
72    #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80    #endif
81    
82    /* A number of things vary for Windows builds. Originally, pcretest opened its
83    input and output without "b"; then I was told that "b" was needed in some
84    environments, so it was added for release 5.0 to both the input and output. (It
85    makes no difference on Unix-like systems.) Later I was told that it is wrong
86    for the input on Windows. I've now abstracted the modes into two macros that
87    are set here, to make it easier to fiddle with them, and removed "b" from the
88    input mode under Windows. */
89    
90    #if defined(_WIN32) || defined(WIN32)
91    #include <io.h>                /* For _setmode() */
92    #include <fcntl.h>             /* For _O_BINARY */
93    #define INPUT_MODE   "r"
94    #define OUTPUT_MODE  "wb"
95    
96    #ifndef isatty
97    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
98    #endif                         /* though in some environments they seem to   */
99                                   /* be already defined, hence the #ifndefs.    */
100    #ifndef fileno
101    #define fileno _fileno
102    #endif
103    
104    /* A user sent this fix for Borland Builder 5 under Windows. */
105    
106    #ifdef __BORLANDC__
107    #define _setmode(handle, mode) setmode(handle, mode)
108    #endif
109    
110    /* Not Windows */
111    
112    #else
113    #include <sys/time.h>          /* These two includes are needed */
114    #include <sys/resource.h>      /* for setrlimit(). */
115    #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
116    #define INPUT_MODE   "r"
117    #define OUTPUT_MODE  "w"
118    #else
119    #define INPUT_MODE   "rb"
120    #define OUTPUT_MODE  "wb"
121    #endif
122    #endif
123    
124    #ifdef __VMS
125    #include <ssdef.h>
126    void vms_setsymbol( char *, char *, int );
127    #endif
128    
129    
130    #define PRIV(name) name
131    
132    /* We have to include pcre_internal.h because we need the internal info for
133    displaying the results of pcre_study() and we also need to know about the
134    internal macros, structures, and other internal data values; pcretest has
135    "inside information" compared to a program that strictly follows the PCRE API.
136    
137    Although pcre_internal.h does itself include pcre.h, we explicitly include it
138    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
139    appropriately for an application, not for building PCRE. */
140    
141    #include "pcre.h"
142    #include "pcre_internal.h"
143    
144    /* The pcre_printint() function, which prints the internal form of a compiled
145    regex, is held in a separate file so that (a) it can be compiled in either
146    8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
147    when that is compiled in debug mode. */
148    
149    #ifdef SUPPORT_PCRE8
150    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151    #endif
152    #ifdef SUPPORT_PCRE16
153    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154    #endif
155    #ifdef SUPPORT_PCRE32
156    void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
157    #endif
158    
159    /* We need access to some of the data tables that PCRE uses. So as not to have
160    to keep two copies, we include the source files here, changing the names of the
161    external symbols to prevent clashes. */
162    
163    #define PCRE_INCLUDED
164    
165    #include "pcre_tables.c"
166    #include "pcre_ucd.c"
167    
168    /* The definition of the macro PRINTABLE, which determines whether to print an
169    output character as-is or as a hex value when showing compiled patterns, is
170    the same as in the printint.src file. We uses it here in cases when the locale
171    has not been explicitly changed, so as to get consistent output from systems
172    that differ in their output from isprint() even in the "C" locale. */
173    
174    #ifdef EBCDIC
175    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
176    #else
177    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
178    #endif
179    
180  /* Use the internal info for displaying the results of pcre_study(). */  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
181    
182  #include "internal.h"  /* Posix support is disabled in 16 or 32 bit only mode. */
183    #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
184    #define NOPOSIX
185    #endif
186    
187  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
188  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 192  Makefile. */
192  #include "pcreposix.h"  #include "pcreposix.h"
193  #endif  #endif
194    
195    /* It is also possible, originally for the benefit of a version that was
196    imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
197    NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
198    automatically cut out the UTF support if PCRE is built without it. */
199    
200    #ifndef SUPPORT_UTF
201    #ifndef NOUTF
202    #define NOUTF
203    #endif
204    #endif
205    
206    /* To make the code a bit tidier for 8/16/32-bit support, we define macros
207    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
208    only from one place and is handled differently). I couldn't dream up any way of
209    using a single macro to do this in a generic way, because of the many different
210    argument requirements. We know that at least one of SUPPORT_PCRE8 and
211    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
212    use these in the definitions of generic macros.
213    
214    **** Special note about the PCHARSxxx macros: the address of the string to be
215    printed is always given as two arguments: a base address followed by an offset.
216    The base address is cast to the correct data size for 8 or 16 bit data; the
217    offset is in units of this size. If the string were given as base+offset in one
218    argument, the casting might be incorrectly applied. */
219    
220    #ifdef SUPPORT_PCRE8
221    
222    #define PCHARS8(lv, p, offset, len, f) \
223      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
224    
225    #define PCHARSV8(p, offset, len, f) \
226      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
227    
228    #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
229      p = read_capture_name8(p, cn8, re)
230    
231    #define STRLEN8(p) ((int)strlen((char *)p))
232    
233    #define SET_PCRE_CALLOUT8(callout) \
234      pcre_callout = callout
235    
236    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
237       pcre_assign_jit_stack(extra, callback, userdata)
238    
239    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
240      re = pcre_compile((char *)pat, options, error, erroffset, tables)
241    
242    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
243        namesptr, cbuffer, size) \
244      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
245        (char *)namesptr, cbuffer, size)
246    
247    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
248      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
249    
250    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
251        offsets, size_offsets, workspace, size_workspace) \
252      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
253        offsets, size_offsets, workspace, size_workspace)
254    
255    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
256        offsets, size_offsets) \
257      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
258        offsets, size_offsets)
259    
260    #define PCRE_FREE_STUDY8(extra) \
261      pcre_free_study(extra)
262    
263    #define PCRE_FREE_SUBSTRING8(substring) \
264      pcre_free_substring(substring)
265    
266    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
267      pcre_free_substring_list(listptr)
268    
269    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
270        getnamesptr, subsptr) \
271      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
272        (char *)getnamesptr, subsptr)
273    
274    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
275      n = pcre_get_stringnumber(re, (char *)ptr)
276    
277    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
278      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
279    
280    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
281      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
282    
283    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
284      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
285    
286    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
287      pcre_printint(re, outfile, debug_lengths)
288    
289    #define PCRE_STUDY8(extra, re, options, error) \
290      extra = pcre_study(re, options, error)
291    
292    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
293      pcre_jit_stack_alloc(startsize, maxsize)
294    
295    #define PCRE_JIT_STACK_FREE8(stack) \
296      pcre_jit_stack_free(stack)
297    
298    #define pcre8_maketables pcre_maketables
299    
300    #endif /* SUPPORT_PCRE8 */
301    
302    /* -----------------------------------------------------------*/
303    
304    #ifdef SUPPORT_PCRE16
305    
306    #define PCHARS16(lv, p, offset, len, f) \
307      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
308    
309    #define PCHARSV16(p, offset, len, f) \
310      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
311    
312    #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
313      p = read_capture_name16(p, cn16, re)
314    
315    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
316    
317    #define SET_PCRE_CALLOUT16(callout) \
318      pcre16_callout = (int (*)(pcre16_callout_block *))callout
319    
320    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
321      pcre16_assign_jit_stack((pcre16_extra *)extra, \
322        (pcre16_jit_callback)callback, userdata)
323    
324    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
325      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
326        tables)
327    
328    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
329        namesptr, cbuffer, size) \
330      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
331        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
332    
333    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
334      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
335        (PCRE_UCHAR16 *)cbuffer, size/2)
336    
337    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338        offsets, size_offsets, workspace, size_workspace) \
339      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
340        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
341        workspace, size_workspace)
342    
343    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344        offsets, size_offsets) \
345      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
346        len, start_offset, options, offsets, size_offsets)
347    
348    #define PCRE_FREE_STUDY16(extra) \
349      pcre16_free_study((pcre16_extra *)extra)
350    
351    #define PCRE_FREE_SUBSTRING16(substring) \
352      pcre16_free_substring((PCRE_SPTR16)substring)
353    
354    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
355      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
356    
357    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
358        getnamesptr, subsptr) \
359      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
360        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
361    
362    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
363      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
364    
365    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
366      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
367        (PCRE_SPTR16 *)(void*)subsptr)
368    
369    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
370      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
371        (PCRE_SPTR16 **)(void*)listptr)
372    
373    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
374      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
375        tables)
376    
377    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
378      pcre16_printint(re, outfile, debug_lengths)
379    
380    #define PCRE_STUDY16(extra, re, options, error) \
381      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
382    
383    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
384      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
385    
386    #define PCRE_JIT_STACK_FREE16(stack) \
387      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
388    
389    #endif /* SUPPORT_PCRE16 */
390    
391    /* -----------------------------------------------------------*/
392    
393    #ifdef SUPPORT_PCRE32
394    
395    #define PCHARS32(lv, p, offset, len, f) \
396      lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
397    
398    #define PCHARSV32(p, offset, len, f)                \
399      (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
400    
401    #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
402      p = read_capture_name32(p, cn32, re)
403    
404    #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
405    
406    #define SET_PCRE_CALLOUT32(callout) \
407      pcre32_callout = (int (*)(pcre32_callout_block *))callout
408    
409    #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
410      pcre32_assign_jit_stack((pcre32_extra *)extra, \
411        (pcre32_jit_callback)callback, userdata)
412    
413    #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
414      re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
415        tables)
416    
417    #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
418        namesptr, cbuffer, size) \
419      rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
420        count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
421    
422    #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
423      rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
424        (PCRE_UCHAR32 *)cbuffer, size/2)
425    
426    #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
427        offsets, size_offsets, workspace, size_workspace) \
428      count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
429        (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
430        workspace, size_workspace)
431    
432    #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433        offsets, size_offsets) \
434      count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
435        len, start_offset, options, offsets, size_offsets)
436    
437    #define PCRE_FREE_STUDY32(extra) \
438      pcre32_free_study((pcre32_extra *)extra)
439    
440    #define PCRE_FREE_SUBSTRING32(substring) \
441      pcre32_free_substring((PCRE_SPTR32)substring)
442    
443    #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
444      pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
445    
446    #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
447        getnamesptr, subsptr) \
448      rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
449        count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
450    
451    #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
452      n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
453    
454    #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
455      rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
456        (PCRE_SPTR32 *)(void*)subsptr)
457    
458    #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
459      rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
460        (PCRE_SPTR32 **)(void*)listptr)
461    
462    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
463      rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
464        tables)
465    
466    #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
467      pcre32_printint(re, outfile, debug_lengths)
468    
469    #define PCRE_STUDY32(extra, re, options, error) \
470      extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
471    
472    #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
473      (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
474    
475    #define PCRE_JIT_STACK_FREE32(stack) \
476      pcre32_jit_stack_free((pcre32_jit_stack *)stack)
477    
478    #endif /* SUPPORT_PCRE32 */
479    
480    
481    /* ----- More than one mode is supported; a runtime test is needed, except for
482    pcre_config(), and the JIT stack functions, when it doesn't matter which
483    available version is called. ----- */
484    
485    enum {
486      PCRE8_MODE,
487      PCRE16_MODE,
488      PCRE32_MODE
489    };
490    
491    #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
492         defined (SUPPORT_PCRE32)) >= 2
493    
494    #define CHAR_SIZE (1 << pcre_mode)
495    
496    /* There doesn't seem to be an easy way of writing these macros that can cope
497    with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
498    cases separately. */
499    
500    /* ----- All three modes supported ----- */
501    
502    #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
503    
504    #define PCHARS(lv, p, offset, len, f) \
505      if (pcre_mode == PCRE32_MODE) \
506        PCHARS32(lv, p, offset, len, f); \
507      else if (pcre_mode == PCRE16_MODE) \
508        PCHARS16(lv, p, offset, len, f); \
509      else \
510        PCHARS8(lv, p, offset, len, f)
511    
512    #define PCHARSV(p, offset, len, f) \
513      if (pcre_mode == PCRE32_MODE) \
514        PCHARSV32(p, offset, len, f); \
515      else if (pcre_mode == PCRE16_MODE) \
516        PCHARSV16(p, offset, len, f); \
517      else \
518        PCHARSV8(p, offset, len, f)
519    
520    #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
521      if (pcre_mode == PCRE32_MODE) \
522        READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
523      else if (pcre_mode == PCRE16_MODE) \
524        READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
525      else \
526        READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
527    
528    #define SET_PCRE_CALLOUT(callout) \
529      if (pcre_mode == PCRE32_MODE) \
530        SET_PCRE_CALLOUT32(callout); \
531      else if (pcre_mode == PCRE16_MODE) \
532        SET_PCRE_CALLOUT16(callout); \
533      else \
534        SET_PCRE_CALLOUT8(callout)
535    
536    #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
537    
538    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
539      if (pcre_mode == PCRE32_MODE) \
540        PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
541      else if (pcre_mode == PCRE16_MODE) \
542        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
543      else \
544        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
545    
546    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
547      if (pcre_mode == PCRE32_MODE) \
548        PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
549      else if (pcre_mode == PCRE16_MODE) \
550        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
551      else \
552        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
553    
554    #define PCRE_CONFIG pcre_config
555    
556    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
557        namesptr, cbuffer, size) \
558      if (pcre_mode == PCRE32_MODE) \
559        PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
560          namesptr, cbuffer, size); \
561      else if (pcre_mode == PCRE16_MODE) \
562        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
563          namesptr, cbuffer, size); \
564      else \
565        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
566          namesptr, cbuffer, size)
567    
568    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
569      if (pcre_mode == PCRE32_MODE) \
570        PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
571      else if (pcre_mode == PCRE16_MODE) \
572        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
573      else \
574        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
575    
576    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
577        offsets, size_offsets, workspace, size_workspace) \
578      if (pcre_mode == PCRE32_MODE) \
579        PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
580          offsets, size_offsets, workspace, size_workspace); \
581      else if (pcre_mode == PCRE16_MODE) \
582        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
583          offsets, size_offsets, workspace, size_workspace); \
584      else \
585        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
586          offsets, size_offsets, workspace, size_workspace)
587    
588    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
589        offsets, size_offsets) \
590      if (pcre_mode == PCRE32_MODE) \
591        PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
592          offsets, size_offsets); \
593      else if (pcre_mode == PCRE16_MODE) \
594        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
595          offsets, size_offsets); \
596      else \
597        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
598          offsets, size_offsets)
599    
600    #define PCRE_FREE_STUDY(extra) \
601      if (pcre_mode == PCRE32_MODE) \
602        PCRE_FREE_STUDY32(extra); \
603      else if (pcre_mode == PCRE16_MODE) \
604        PCRE_FREE_STUDY16(extra); \
605      else \
606        PCRE_FREE_STUDY8(extra)
607    
608    #define PCRE_FREE_SUBSTRING(substring) \
609      if (pcre_mode == PCRE32_MODE) \
610        PCRE_FREE_SUBSTRING32(substring); \
611      else if (pcre_mode == PCRE16_MODE) \
612        PCRE_FREE_SUBSTRING16(substring); \
613      else \
614        PCRE_FREE_SUBSTRING8(substring)
615    
616    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
617      if (pcre_mode == PCRE32_MODE) \
618        PCRE_FREE_SUBSTRING_LIST32(listptr); \
619      else if (pcre_mode == PCRE16_MODE) \
620        PCRE_FREE_SUBSTRING_LIST16(listptr); \
621      else \
622        PCRE_FREE_SUBSTRING_LIST8(listptr)
623    
624    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
625        getnamesptr, subsptr) \
626      if (pcre_mode == PCRE32_MODE) \
627        PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
628          getnamesptr, subsptr); \
629      else if (pcre_mode == PCRE16_MODE) \
630        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
631          getnamesptr, subsptr); \
632      else \
633        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
634          getnamesptr, subsptr)
635    
636    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
637      if (pcre_mode == PCRE32_MODE) \
638        PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
639      else if (pcre_mode == PCRE16_MODE) \
640        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
641      else \
642        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
643    
644    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
645      if (pcre_mode == PCRE32_MODE) \
646        PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
647      else if (pcre_mode == PCRE16_MODE) \
648        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
649      else \
650        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
651    
652    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
653      if (pcre_mode == PCRE32_MODE) \
654        PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
655      else if (pcre_mode == PCRE16_MODE) \
656        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
657      else \
658        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
659    
660    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
661      (pcre_mode == PCRE32_MODE ? \
662         PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
663        : pcre_mode == PCRE16_MODE ? \
664          PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
665          : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
666    
667    #define PCRE_JIT_STACK_FREE(stack) \
668      if (pcre_mode == PCRE32_MODE) \
669        PCRE_JIT_STACK_FREE32(stack); \
670      else if (pcre_mode == PCRE16_MODE) \
671        PCRE_JIT_STACK_FREE16(stack); \
672      else \
673        PCRE_JIT_STACK_FREE8(stack)
674    
675    #define PCRE_MAKETABLES \
676      (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
677    
678    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
679      if (pcre_mode == PCRE32_MODE) \
680        PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
681      else if (pcre_mode == PCRE16_MODE) \
682        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
683      else \
684        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
685    
686    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
687      if (pcre_mode == PCRE32_MODE) \
688        PCRE_PRINTINT32(re, outfile, debug_lengths); \
689      else if (pcre_mode == PCRE16_MODE) \
690        PCRE_PRINTINT16(re, outfile, debug_lengths); \
691      else \
692        PCRE_PRINTINT8(re, outfile, debug_lengths)
693    
694    #define PCRE_STUDY(extra, re, options, error) \
695      if (pcre_mode == PCRE32_MODE) \
696        PCRE_STUDY32(extra, re, options, error); \
697      else if (pcre_mode == PCRE16_MODE) \
698        PCRE_STUDY16(extra, re, options, error); \
699      else \
700        PCRE_STUDY8(extra, re, options, error)
701    
702    
703    /* ----- Two out of three modes are supported ----- */
704    
705    #else
706    
707    /* We can use some macro trickery to make a single set of definitions work in
708    the three different cases. */
709    
710    /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
711    
712    #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
713    #define BITONE 32
714    #define BITTWO 16
715    
716    /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
717    
718    #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
719    #define BITONE 32
720    #define BITTWO 8
721    
722    /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
723    
724    #else
725    #define BITONE 16
726    #define BITTWO 8
727    #endif
728    
729    #define glue(a,b) a##b
730    #define G(a,b) glue(a,b)
731    
732    
733    /* ----- Common macros for two-mode cases ----- */
734    
735    #define PCHARS(lv, p, offset, len, f) \
736      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
737        G(PCHARS,BITONE)(lv, p, offset, len, f); \
738      else \
739        G(PCHARS,BITTWO)(lv, p, offset, len, f)
740    
741    #define PCHARSV(p, offset, len, f) \
742      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
743        G(PCHARSV,BITONE)(p, offset, len, f); \
744      else \
745        G(PCHARSV,BITTWO)(p, offset, len, f)
746    
747    #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
748      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
749        G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
750      else \
751        G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
752    
753    #define SET_PCRE_CALLOUT(callout) \
754      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
755        G(SET_PCRE_CALLOUT,BITONE)(callout); \
756      else \
757        G(SET_PCRE_CALLOUT,BITTWO)(callout)
758    
759    #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
760      G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
761    
762    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
763      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
764        G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
765      else \
766        G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
767    
768    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
769      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
770        G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
771      else \
772        G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
773    
774    #define PCRE_CONFIG G(G(pcre,BITONE),_config)
775    
776    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
777        namesptr, cbuffer, size) \
778      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
779        G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
780          namesptr, cbuffer, size); \
781      else \
782        G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
783          namesptr, cbuffer, size)
784    
785    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
786      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787        G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
788      else \
789        G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
790    
791    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
792        offsets, size_offsets, workspace, size_workspace) \
793      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
794        G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
795          offsets, size_offsets, workspace, size_workspace); \
796      else \
797        G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
798          offsets, size_offsets, workspace, size_workspace)
799    
800    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
801        offsets, size_offsets) \
802      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
803        G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
804          offsets, size_offsets); \
805      else \
806        G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
807          offsets, size_offsets)
808    
809    #define PCRE_FREE_STUDY(extra) \
810      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811        G(PCRE_FREE_STUDY,BITONE)(extra); \
812      else \
813        G(PCRE_FREE_STUDY,BITTWO)(extra)
814    
815    #define PCRE_FREE_SUBSTRING(substring) \
816      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817        G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
818      else \
819        G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
820    
821    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
822      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
823        G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
824      else \
825        G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
826    
827    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
828        getnamesptr, subsptr) \
829      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
830        G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
831          getnamesptr, subsptr); \
832      else \
833        G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
834          getnamesptr, subsptr)
835    
836    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
837      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838        G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
839      else \
840        G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
841    
842    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
843      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
844        G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
845      else \
846        G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
847    
848    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
849      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
850        G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
851      else \
852        G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
853    
854    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
855      (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
856         G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
857        : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
858    
859    #define PCRE_JIT_STACK_FREE(stack) \
860      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
861        G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
862      else \
863        G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
864    
865    #define PCRE_MAKETABLES \
866      (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
867        G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
868    
869    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
870      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
871        G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
872      else \
873        G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
874    
875    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
876      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
877        G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
878      else \
879        G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
880    
881    #define PCRE_STUDY(extra, re, options, error) \
882      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
883        G(PCRE_STUDY,BITONE)(extra, re, options, error); \
884      else \
885        G(PCRE_STUDY,BITTWO)(extra, re, options, error)
886    
887    #endif  /* Two out of three modes */
888    
889    /* ----- End of cases where more than one mode is supported ----- */
890    
891    
892    /* ----- Only 8-bit mode is supported ----- */
893    
894    #elif defined SUPPORT_PCRE8
895    #define CHAR_SIZE                 1
896    #define PCHARS                    PCHARS8
897    #define PCHARSV                   PCHARSV8
898    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
899    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
900    #define STRLEN                    STRLEN8
901    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
902    #define PCRE_COMPILE              PCRE_COMPILE8
903    #define PCRE_CONFIG               pcre_config
904    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
905    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
906    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
907    #define PCRE_EXEC                 PCRE_EXEC8
908    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
909    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
910    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
911    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
912    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
913    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
914    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
915    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
916    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
917    #define PCRE_MAKETABLES           pcre_maketables()
918    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
919    #define PCRE_PRINTINT             PCRE_PRINTINT8
920    #define PCRE_STUDY                PCRE_STUDY8
921    
922    /* ----- Only 16-bit mode is supported ----- */
923    
924    #elif defined SUPPORT_PCRE16
925    #define CHAR_SIZE                 2
926    #define PCHARS                    PCHARS16
927    #define PCHARSV                   PCHARSV16
928    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
929    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
930    #define STRLEN                    STRLEN16
931    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
932    #define PCRE_COMPILE              PCRE_COMPILE16
933    #define PCRE_CONFIG               pcre16_config
934    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
935    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
936    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
937    #define PCRE_EXEC                 PCRE_EXEC16
938    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
939    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
940    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
941    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
942    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
943    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
944    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
945    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
946    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
947    #define PCRE_MAKETABLES           pcre16_maketables()
948    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
949    #define PCRE_PRINTINT             PCRE_PRINTINT16
950    #define PCRE_STUDY                PCRE_STUDY16
951    
952    /* ----- Only 32-bit mode is supported ----- */
953    
954    #elif defined SUPPORT_PCRE32
955    #define CHAR_SIZE                 4
956    #define PCHARS                    PCHARS32
957    #define PCHARSV                   PCHARSV32
958    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME32
959    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT32
960    #define STRLEN                    STRLEN32
961    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK32
962    #define PCRE_COMPILE              PCRE_COMPILE32
963    #define PCRE_CONFIG               pcre32_config
964    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
965    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING32
966    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC32
967    #define PCRE_EXEC                 PCRE_EXEC32
968    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY32
969    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING32
970    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST32
971    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING32
972    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER32
973    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING32
974    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST32
975    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC32
976    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE32
977    #define PCRE_MAKETABLES           pcre32_maketables()
978    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
979    #define PCRE_PRINTINT             PCRE_PRINTINT32
980    #define PCRE_STUDY                PCRE_STUDY32
981    
982    #endif
983    
984    /* ----- End of mode-specific function call macros ----- */
985    
986    
987    /* Other parameters */
988    
989  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
990  #ifdef CLK_TCK  #ifdef CLK_TCK
991  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 994  Makefile. */
994  #endif  #endif
995  #endif  #endif
996    
997  #define LOOPREPEAT 20000  #if !defined NODFA
998    #define DFA_WS_DIMENSION 1000
999    #endif
1000    
1001    /* This is the default loop count for timing. */
1002    
1003    #define LOOPREPEAT 500000
1004    
1005    /* Static variables */
1006    
1007  static FILE *outfile;  static FILE *outfile;
1008  static int log_store = 0;  static int log_store = 0;
1009    static int callout_count;
1010    static int callout_extra;
1011    static int callout_fail_count;
1012    static int callout_fail_id;
1013    static int debug_lengths;
1014    static int first_callout;
1015    static int jit_was_used;
1016    static int locale_set = 0;
1017    static int show_malloc;
1018    static int use_utf;
1019  static size_t gotten_store;  static size_t gotten_store;
1020    static size_t first_gotten_store = 0;
1021    static const unsigned char *last_callout_mark = NULL;
1022    
1023    /* The buffers grow automatically if very long input lines are encountered. */
1024    
1025    static int buffer_size = 50000;
1026    static pcre_uint8 *buffer = NULL;
1027    static pcre_uint8 *pbuffer = NULL;
1028    
1029  /* Debugging function to print the internal form of the regex. This is the same  /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
 code as contained in pcre.c under the DEBUG macro. */  
1030    
1031  static const char *OP_names[] = {  #ifdef COMPILE_PCRE16
1032    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1033    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  #endif
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
1034    
1035    #ifdef COMPILE_PCRE32
1036    #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1037    #endif
1038    
1039  static void print_internals(pcre *re)  /* We need buffers for building 16/32-bit strings, and the tables of operator
1040  {  lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1041  unsigned char *code = ((real_pcre *)re)->code;  pattern for saving/reloading testing. Luckily, the data for these tables is
1042    defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1043    are used in the tables) are adjusted appropriately for the 16/32-bit world.
1044    LINK_SIZE is also used later in this program. */
1045    
1046    #ifdef SUPPORT_PCRE16
1047    #undef IMM2_SIZE
1048    #define IMM2_SIZE 1
1049    
1050    #if LINK_SIZE == 2
1051    #undef LINK_SIZE
1052    #define LINK_SIZE 1
1053    #elif LINK_SIZE == 3 || LINK_SIZE == 4
1054    #undef LINK_SIZE
1055    #define LINK_SIZE 2
1056    #else
1057    #error LINK_SIZE must be either 2, 3, or 4
1058    #endif
1059    
1060  fprintf(outfile, "------------------------------------------------------------------\n");  static int buffer16_size = 0;
1061    static pcre_uint16 *buffer16 = NULL;
1062    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1063    #endif  /* SUPPORT_PCRE16 */
1064    
1065    #ifdef SUPPORT_PCRE32
1066    #undef IMM2_SIZE
1067    #define IMM2_SIZE 1
1068    #undef LINK_SIZE
1069    #define LINK_SIZE 1
1070    
1071    static int buffer32_size = 0;
1072    static pcre_uint32 *buffer32 = NULL;
1073    static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1074    #endif  /* SUPPORT_PCRE32 */
1075    
1076    /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1077    support, it can be changed by an option. If there is no 8-bit support, there
1078    must be 16-or 32-bit support, so default it to 1. */
1079    
1080    #if defined SUPPORT_PCRE8
1081    static int pcre_mode = PCRE8_MODE;
1082    #elif defined SUPPORT_PCRE16
1083    static int pcre_mode = PCRE16_MODE;
1084    #elif defined SUPPORT_PCRE32
1085    static int pcre_mode = PCRE32_MODE;
1086    #endif
1087    
1088    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1089    
1090  for(;;)  static int jit_study_bits[] =
1091    {    {
1092    int c;    PCRE_STUDY_JIT_COMPILE,
1093    int charlength;    PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1094      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1095      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1096      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1097      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1098      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1099        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1100    };
1101    
1102    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1103      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1104    
1105    if (*code >= OP_BRA)  /* Textual explanations for runtime error codes */
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
1106    
1107    else switch(*code)  static const char *errtexts[] = {
1108      {    NULL,  /* 0 is no error */
1109      case OP_END:    NULL,  /* NOMATCH is handled specially */
1110      fprintf(outfile, "    %s\n", OP_names[*code]);    "NULL argument passed",
1111      fprintf(outfile, "------------------------------------------------------------------\n");    "bad option value",
1112      return;    "magic number missing",
1113      "unknown opcode - pattern overwritten?",
1114      "no more memory",
1115      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
1116      "match limit exceeded",
1117      "callout error code",
1118      NULL,  /* BADUTF8/16 is handled specially */
1119      NULL,  /* BADUTF8/16 offset is handled specially */
1120      NULL,  /* PARTIAL is handled specially */
1121      "not used - internal error",
1122      "internal error - pattern overwritten?",
1123      "bad count value",
1124      "item unsupported for DFA matching",
1125      "backreference condition or recursion test not supported for DFA matching",
1126      "match limit not supported for DFA matching",
1127      "workspace size exceeded in DFA matching",
1128      "too much recursion for DFA matching",
1129      "recursion limit exceeded",
1130      "not used - internal error",
1131      "invalid combination of newline options",
1132      "bad offset value",
1133      NULL,  /* SHORTUTF8/16 is handled specially */
1134      "nested recursion at the same subject position",
1135      "JIT stack limit reached",
1136      "pattern compiled in wrong mode: 8-bit/16-bit error",
1137      "pattern compiled with other endianness",
1138      "invalid data in workspace for DFA restart",
1139      "bad JIT option",
1140      "bad length"
1141    };
1142    
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
1143    
1144      case OP_COND:  /*************************************************
1145      fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  *         Alternate character tables             *
1146      code += 2;  *************************************************/
     break;  
1147    
1148      case OP_CREF:  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1149      fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  using the default tables of the library. However, the T option can be used to
1150      code++;  select alternate sets of tables, for different kinds of testing. Note also that
1151      break;  the L (locale) option also adjusts the tables. */
1152    
1153    /* This is the set of tables distributed as default with PCRE. It recognizes
1154    only ASCII characters. */
1155    
1156    static const pcre_uint8 tables0[] = {
1157    
1158    /* This table is a lower casing table. */
1159    
1160        0,  1,  2,  3,  4,  5,  6,  7,
1161        8,  9, 10, 11, 12, 13, 14, 15,
1162       16, 17, 18, 19, 20, 21, 22, 23,
1163       24, 25, 26, 27, 28, 29, 30, 31,
1164       32, 33, 34, 35, 36, 37, 38, 39,
1165       40, 41, 42, 43, 44, 45, 46, 47,
1166       48, 49, 50, 51, 52, 53, 54, 55,
1167       56, 57, 58, 59, 60, 61, 62, 63,
1168       64, 97, 98, 99,100,101,102,103,
1169      104,105,106,107,108,109,110,111,
1170      112,113,114,115,116,117,118,119,
1171      120,121,122, 91, 92, 93, 94, 95,
1172       96, 97, 98, 99,100,101,102,103,
1173      104,105,106,107,108,109,110,111,
1174      112,113,114,115,116,117,118,119,
1175      120,121,122,123,124,125,126,127,
1176      128,129,130,131,132,133,134,135,
1177      136,137,138,139,140,141,142,143,
1178      144,145,146,147,148,149,150,151,
1179      152,153,154,155,156,157,158,159,
1180      160,161,162,163,164,165,166,167,
1181      168,169,170,171,172,173,174,175,
1182      176,177,178,179,180,181,182,183,
1183      184,185,186,187,188,189,190,191,
1184      192,193,194,195,196,197,198,199,
1185      200,201,202,203,204,205,206,207,
1186      208,209,210,211,212,213,214,215,
1187      216,217,218,219,220,221,222,223,
1188      224,225,226,227,228,229,230,231,
1189      232,233,234,235,236,237,238,239,
1190      240,241,242,243,244,245,246,247,
1191      248,249,250,251,252,253,254,255,
1192    
1193    /* This table is a case flipping table. */
1194    
1195        0,  1,  2,  3,  4,  5,  6,  7,
1196        8,  9, 10, 11, 12, 13, 14, 15,
1197       16, 17, 18, 19, 20, 21, 22, 23,
1198       24, 25, 26, 27, 28, 29, 30, 31,
1199       32, 33, 34, 35, 36, 37, 38, 39,
1200       40, 41, 42, 43, 44, 45, 46, 47,
1201       48, 49, 50, 51, 52, 53, 54, 55,
1202       56, 57, 58, 59, 60, 61, 62, 63,
1203       64, 97, 98, 99,100,101,102,103,
1204      104,105,106,107,108,109,110,111,
1205      112,113,114,115,116,117,118,119,
1206      120,121,122, 91, 92, 93, 94, 95,
1207       96, 65, 66, 67, 68, 69, 70, 71,
1208       72, 73, 74, 75, 76, 77, 78, 79,
1209       80, 81, 82, 83, 84, 85, 86, 87,
1210       88, 89, 90,123,124,125,126,127,
1211      128,129,130,131,132,133,134,135,
1212      136,137,138,139,140,141,142,143,
1213      144,145,146,147,148,149,150,151,
1214      152,153,154,155,156,157,158,159,
1215      160,161,162,163,164,165,166,167,
1216      168,169,170,171,172,173,174,175,
1217      176,177,178,179,180,181,182,183,
1218      184,185,186,187,188,189,190,191,
1219      192,193,194,195,196,197,198,199,
1220      200,201,202,203,204,205,206,207,
1221      208,209,210,211,212,213,214,215,
1222      216,217,218,219,220,221,222,223,
1223      224,225,226,227,228,229,230,231,
1224      232,233,234,235,236,237,238,239,
1225      240,241,242,243,244,245,246,247,
1226      248,249,250,251,252,253,254,255,
1227    
1228    /* This table contains bit maps for various character classes. Each map is 32
1229    bytes long and the bits run from the least significant end of each byte. The
1230    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1231    graph, print, punct, and cntrl. Other classes are built from combinations. */
1232    
1233      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1234      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1235      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1236      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1237    
1238      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1239      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1240      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1241      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1242    
1243      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1244      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1245      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1246      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1247    
1248      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1249      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1250      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1251      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1252    
1253      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1254      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1255      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1256      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1257    
1258      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1259      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1260      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1261      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1262    
1263      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1264      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1265      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1266      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1267    
1268      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1269      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1270      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1271      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1272    
1273      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1274      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1275      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1276      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1277    
1278      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1279      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1280      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1281      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1282    
1283    /* This table identifies various classes of character by individual bits:
1284      0x01   white space character
1285      0x02   letter
1286      0x04   decimal digit
1287      0x08   hexadecimal digit
1288      0x10   alphanumeric or '_'
1289      0x80   regular expression metacharacter or binary zero
1290    */
1291    
1292      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
1293      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
1294      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
1295      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
1296      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
1297      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
1298      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
1299      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
1300      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
1301      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
1302      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
1303      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
1304      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
1305      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
1306      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
1307      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
1308      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1309      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1310      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1311      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1312      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1313      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1314      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1315      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1316      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1317      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1318      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1319      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1320      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1321      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1322      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1323      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1324    
1325    /* This is a set of tables that came orginally from a Windows user. It seems to
1326    be at least an approximation of ISO 8859. In particular, there are characters
1327    greater than 128 that are marked as spaces, letters, etc. */
1328    
1329    static const pcre_uint8 tables1[] = {
1330    0,1,2,3,4,5,6,7,
1331    8,9,10,11,12,13,14,15,
1332    16,17,18,19,20,21,22,23,
1333    24,25,26,27,28,29,30,31,
1334    32,33,34,35,36,37,38,39,
1335    40,41,42,43,44,45,46,47,
1336    48,49,50,51,52,53,54,55,
1337    56,57,58,59,60,61,62,63,
1338    64,97,98,99,100,101,102,103,
1339    104,105,106,107,108,109,110,111,
1340    112,113,114,115,116,117,118,119,
1341    120,121,122,91,92,93,94,95,
1342    96,97,98,99,100,101,102,103,
1343    104,105,106,107,108,109,110,111,
1344    112,113,114,115,116,117,118,119,
1345    120,121,122,123,124,125,126,127,
1346    128,129,130,131,132,133,134,135,
1347    136,137,138,139,140,141,142,143,
1348    144,145,146,147,148,149,150,151,
1349    152,153,154,155,156,157,158,159,
1350    160,161,162,163,164,165,166,167,
1351    168,169,170,171,172,173,174,175,
1352    176,177,178,179,180,181,182,183,
1353    184,185,186,187,188,189,190,191,
1354    224,225,226,227,228,229,230,231,
1355    232,233,234,235,236,237,238,239,
1356    240,241,242,243,244,245,246,215,
1357    248,249,250,251,252,253,254,223,
1358    224,225,226,227,228,229,230,231,
1359    232,233,234,235,236,237,238,239,
1360    240,241,242,243,244,245,246,247,
1361    248,249,250,251,252,253,254,255,
1362    0,1,2,3,4,5,6,7,
1363    8,9,10,11,12,13,14,15,
1364    16,17,18,19,20,21,22,23,
1365    24,25,26,27,28,29,30,31,
1366    32,33,34,35,36,37,38,39,
1367    40,41,42,43,44,45,46,47,
1368    48,49,50,51,52,53,54,55,
1369    56,57,58,59,60,61,62,63,
1370    64,97,98,99,100,101,102,103,
1371    104,105,106,107,108,109,110,111,
1372    112,113,114,115,116,117,118,119,
1373    120,121,122,91,92,93,94,95,
1374    96,65,66,67,68,69,70,71,
1375    72,73,74,75,76,77,78,79,
1376    80,81,82,83,84,85,86,87,
1377    88,89,90,123,124,125,126,127,
1378    128,129,130,131,132,133,134,135,
1379    136,137,138,139,140,141,142,143,
1380    144,145,146,147,148,149,150,151,
1381    152,153,154,155,156,157,158,159,
1382    160,161,162,163,164,165,166,167,
1383    168,169,170,171,172,173,174,175,
1384    176,177,178,179,180,181,182,183,
1385    184,185,186,187,188,189,190,191,
1386    224,225,226,227,228,229,230,231,
1387    232,233,234,235,236,237,238,239,
1388    240,241,242,243,244,245,246,215,
1389    248,249,250,251,252,253,254,223,
1390    192,193,194,195,196,197,198,199,
1391    200,201,202,203,204,205,206,207,
1392    208,209,210,211,212,213,214,247,
1393    216,217,218,219,220,221,222,255,
1394    0,62,0,0,1,0,0,0,
1395    0,0,0,0,0,0,0,0,
1396    32,0,0,0,1,0,0,0,
1397    0,0,0,0,0,0,0,0,
1398    0,0,0,0,0,0,255,3,
1399    126,0,0,0,126,0,0,0,
1400    0,0,0,0,0,0,0,0,
1401    0,0,0,0,0,0,0,0,
1402    0,0,0,0,0,0,255,3,
1403    0,0,0,0,0,0,0,0,
1404    0,0,0,0,0,0,12,2,
1405    0,0,0,0,0,0,0,0,
1406    0,0,0,0,0,0,0,0,
1407    254,255,255,7,0,0,0,0,
1408    0,0,0,0,0,0,0,0,
1409    255,255,127,127,0,0,0,0,
1410    0,0,0,0,0,0,0,0,
1411    0,0,0,0,254,255,255,7,
1412    0,0,0,0,0,4,32,4,
1413    0,0,0,128,255,255,127,255,
1414    0,0,0,0,0,0,255,3,
1415    254,255,255,135,254,255,255,7,
1416    0,0,0,0,0,4,44,6,
1417    255,255,127,255,255,255,127,255,
1418    0,0,0,0,254,255,255,255,
1419    255,255,255,255,255,255,255,127,
1420    0,0,0,0,254,255,255,255,
1421    255,255,255,255,255,255,255,255,
1422    0,2,0,0,255,255,255,255,
1423    255,255,255,255,255,255,255,127,
1424    0,0,0,0,255,255,255,255,
1425    255,255,255,255,255,255,255,255,
1426    0,0,0,0,254,255,0,252,
1427    1,0,0,248,1,0,0,120,
1428    0,0,0,0,254,255,255,255,
1429    0,0,128,0,0,0,128,0,
1430    255,255,255,255,0,0,0,0,
1431    0,0,0,0,0,0,0,128,
1432    255,255,255,255,0,0,0,0,
1433    0,0,0,0,0,0,0,0,
1434    128,0,0,0,0,0,0,0,
1435    0,1,1,0,1,1,0,0,
1436    0,0,0,0,0,0,0,0,
1437    0,0,0,0,0,0,0,0,
1438    1,0,0,0,128,0,0,0,
1439    128,128,128,128,0,0,128,0,
1440    28,28,28,28,28,28,28,28,
1441    28,28,0,0,0,0,0,128,
1442    0,26,26,26,26,26,26,18,
1443    18,18,18,18,18,18,18,18,
1444    18,18,18,18,18,18,18,18,
1445    18,18,18,128,128,0,128,16,
1446    0,26,26,26,26,26,26,18,
1447    18,18,18,18,18,18,18,18,
1448    18,18,18,18,18,18,18,18,
1449    18,18,18,128,128,0,0,0,
1450    0,0,0,0,0,1,0,0,
1451    0,0,0,0,0,0,0,0,
1452    0,0,0,0,0,0,0,0,
1453    0,0,0,0,0,0,0,0,
1454    1,0,0,0,0,0,0,0,
1455    0,0,18,0,0,0,0,0,
1456    0,0,20,20,0,18,0,0,
1457    0,20,18,0,0,0,0,0,
1458    18,18,18,18,18,18,18,18,
1459    18,18,18,18,18,18,18,18,
1460    18,18,18,18,18,18,18,0,
1461    18,18,18,18,18,18,18,18,
1462    18,18,18,18,18,18,18,18,
1463    18,18,18,18,18,18,18,18,
1464    18,18,18,18,18,18,18,0,
1465    18,18,18,18,18,18,18,18
1466    };
1467    
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
1468    
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
1469    
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
1470    
1471      case OP_STAR:  #ifndef HAVE_STRERROR
1472      case OP_MINSTAR:  /*************************************************
1473      case OP_PLUS:  *     Provide strerror() for non-ANSI libraries  *
1474      case OP_MINPLUS:  *************************************************/
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
1475    
1476      case OP_EXACT:  /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1477      case OP_UPTO:  in their libraries, but can provide the same facility by this simple
1478      case OP_MINUPTO:  alternative function. */
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
1479    
1480      case OP_TYPEEXACT:  extern int   sys_nerr;
1481      case OP_TYPEUPTO:  extern char *sys_errlist[];
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
1482    
1483      case OP_NOT:  char *
1484      if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  strerror(int n)
1485        else fprintf(outfile, "    [^\\x%02x]", c);  {
1486      break;  if (n < 0 || n >= sys_nerr) return "unknown error number";
1487    return sys_errlist[n];
1488    }
1489    #endif /* HAVE_STRERROR */
1490    
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
1491    
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
1492    
1493      case OP_REF:  /*************************************************
1494      fprintf(outfile, "    \\%d", *(++code));  *       Print newline configuration              *
1495      code++;  *************************************************/
     goto CLASS_REF_REPEAT;  
1496    
1497      case OP_CLASS:  /*
1498        {  Arguments:
1499        int i, min, max;    rc         the return code from PCRE_CONFIG_NEWLINE
1500        code++;    isc        TRUE if called from "-C newline"
1501        fprintf(outfile, "    [");  Returns:     nothing
1502    */
1503    
1504        for (i = 0; i < 256; i++)  static void
1505          {  print_newline_config(int rc, BOOL isc)
1506          if ((code[i/8] & (1 << (i&7))) != 0)  {
1507            {  const char *s = NULL;
1508            int j;  if (!isc) printf("  Newline sequence is ");
1509            for (j = i+1; j < 256; j++)  switch(rc)
1510              if ((code[j/8] & (1 << (j&7))) == 0) break;    {
1511            if (i == '-' || i == ']') fprintf(outfile, "\\");    case CHAR_CR: s = "CR"; break;
1512            if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);    case CHAR_LF: s = "LF"; break;
1513            if (--j > i)    case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1514              {    case -1: s = "ANY"; break;
1515              fprintf(outfile, "-");    case -2: s = "ANYCRLF"; break;
1516              if (j == '-' || j == ']') fprintf(outfile, "\\");  
1517              if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);    default:
1518              }    printf("a non-standard value: 0x%04x\n", rc);
1519            i = j;    return;
1520            }    }
         }  
       fprintf(outfile, "]");  
       code += 32;  
1521    
1522        CLASS_REF_REPEAT:  printf("%s\n", s);
1523    }
1524    
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
1525    
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
1526    
1527          default:  /*************************************************
1528          code--;  *         JIT memory callback                    *
1529          }  *************************************************/
1530        }  
1531      break;  static pcre_jit_stack* jit_callback(void *arg)
1532    {
1533    jit_was_used = TRUE;
1534    return (pcre_jit_stack *)arg;
1535    }
1536    
     /* Anything else is just a one-node item */  
1537    
1538      default:  #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1539      fprintf(outfile, "    %s", OP_names[*code]);  /*************************************************
1540      break;  *            Convert UTF-8 string to value       *
1541      }  *************************************************/
1542    
1543    /* This function takes one or more bytes that represents a UTF-8 character,
1544    and returns the value of the character.
1545    
1546    Argument:
1547      utf8bytes   a pointer to the byte vector
1548      vptr        a pointer to an int to receive the value
1549    
1550    Returns:      >  0 => the number of bytes consumed
1551                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1552    */
1553    
1554    static int
1555    utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1556    {
1557    pcre_uint32 c = *utf8bytes++;
1558    pcre_uint32 d = c;
1559    int i, j, s;
1560    
1561    code++;  for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1562    fprintf(outfile, "\n");    {
1563      if ((d & 0x80) == 0) break;
1564      d <<= 1;
1565    }    }
 }  
1566    
1567    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1568    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1569    
1570    /* i now has a value in the range 1-5 */
1571    
1572  /* Character string printing function. */  s = 6*i;
1573    d = (c & utf8_table3[i]) << s;
1574    
1575  static void pchars(unsigned char *p, int length)  for (j = 0; j < i; j++)
1576  {    {
1577  int c;    c = *utf8bytes++;
1578  while (length-- > 0)    if ((c & 0xc0) != 0x80) return -(j+1);
1579    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    s -= 6;
1580      else fprintf(outfile, "\\x%02x", c);    d |= (c & 0x3f) << s;
1581      }
1582    
1583    /* Check that encoding was the correct unique one */
1584    
1585    for (j = 0; j < utf8_table1_size; j++)
1586      if (d <= (pcre_uint32)utf8_table1[j]) break;
1587    if (j != i) return -(i+1);
1588    
1589    /* Valid value */
1590    
1591    *vptr = d;
1592    return i+1;
1593  }  }
1594    #endif /* NOUTF || SUPPORT_PCRE16 */
1595    
1596    
1597    
1598    #if defined SUPPORT_PCRE8 && !defined NOUTF
1599    /*************************************************
1600    *       Convert character value to UTF-8         *
1601    *************************************************/
1602    
1603  /* Alternative malloc function, to test functionality and show the size of the  /* This function takes an integer value in the range 0 - 0x7fffffff
1604  compiled re. */  and encodes it as a UTF-8 character in 0 to 6 bytes.
1605    
1606  static void *new_malloc(size_t size)  Arguments:
1607      cvalue     the character value
1608      utf8bytes  pointer to buffer for result - at least 6 bytes long
1609    
1610    Returns:     number of characters placed in the buffer
1611    */
1612    
1613    static int
1614    ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1615  {  {
1616  gotten_store = size;  register int i, j;
1617  if (log_store)  if (cvalue > 0x7fffffffu)
1618    fprintf(outfile, "Memory allocation (code space): %d\n",    return -1;
1619      (int)((int)size - offsetof(real_pcre, code[0])));  for (i = 0; i < utf8_table1_size; i++)
1620  return malloc(size);    if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1621    utf8bytes += i;
1622    for (j = i; j > 0; j--)
1623     {
1624     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1625     cvalue >>= 6;
1626     }
1627    *utf8bytes = utf8_table2[i] | cvalue;
1628    return i + 1;
1629  }  }
1630    #endif
1631    
1632    
1633    #ifdef SUPPORT_PCRE16
1634    /*************************************************
1635    *         Convert a string to 16-bit             *
1636    *************************************************/
1637    
1638    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1639    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1640    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1641    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1642    result is always left in buffer16.
1643    
1644    Note that this function does not object to surrogate values. This is
1645    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1646    for the purpose of testing that they are correctly faulted.
1647    
1648    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1649    in UTF-8 so that values greater than 255 can be handled.
1650    
1651    Arguments:
1652      data       TRUE if converting a data line; FALSE for a regex
1653      p          points to a byte string
1654      utf        true if UTF-8 (to be converted to UTF-16)
1655      len        number of bytes in the string (excluding trailing zero)
1656    
1657    Returns:     number of 16-bit data items used (excluding trailing zero)
1658                 OR -1 if a UTF-8 string is malformed
1659                 OR -2 if a value > 0x10ffff is encountered
1660                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1661    */
1662    
1663    static int
1664    to16(int data, pcre_uint8 *p, int utf, int len)
1665    {
1666    pcre_uint16 *pp;
1667    
1668    if (buffer16_size < 2*len + 2)
1669      {
1670      if (buffer16 != NULL) free(buffer16);
1671      buffer16_size = 2*len + 2;
1672      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1673      if (buffer16 == NULL)
1674        {
1675        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1676        exit(1);
1677        }
1678      }
1679    
1680    pp = buffer16;
1681    
1682    if (!utf && !data)
1683      {
1684      while (len-- > 0) *pp++ = *p++;
1685      }
1686    
1687    else
1688      {
1689      pcre_uint32 c = 0;
1690      while (len > 0)
1691        {
1692        int chlen = utf82ord(p, &c);
1693        if (chlen <= 0) return -1;
1694        if (c > 0x10ffff) return -2;
1695        p += chlen;
1696        len -= chlen;
1697        if (c < 0x10000) *pp++ = c; else
1698          {
1699          if (!utf) return -3;
1700          c -= 0x10000;
1701          *pp++ = 0xD800 | (c >> 10);
1702          *pp++ = 0xDC00 | (c & 0x3ff);
1703          }
1704        }
1705      }
1706    
1707    *pp = 0;
1708    return pp - buffer16;
1709    }
1710    #endif
1711    
1712    #ifdef SUPPORT_PCRE32
1713    /*************************************************
1714    *         Convert a string to 32-bit             *
1715    *************************************************/
1716    
1717    /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1718    8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1719    times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1720    in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1721    result is always left in buffer32.
1722    
1723    Note that this function does not object to surrogate values. This is
1724    deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1725    for the purpose of testing that they are correctly faulted.
1726    
1727    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1728    in UTF-8 so that values greater than 255 can be handled.
1729    
1730    Arguments:
1731      data       TRUE if converting a data line; FALSE for a regex
1732      p          points to a byte string
1733      utf        true if UTF-8 (to be converted to UTF-32)
1734      len        number of bytes in the string (excluding trailing zero)
1735    
1736    Returns:     number of 32-bit data items used (excluding trailing zero)
1737                 OR -1 if a UTF-8 string is malformed
1738                 OR -2 if a value > 0x10ffff is encountered
1739                 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1740    */
1741    
1742    static int
1743    to32(int data, pcre_uint8 *p, int utf, int len)
1744    {
1745    pcre_uint32 *pp;
1746    
1747    if (buffer32_size < 4*len + 4)
1748      {
1749      if (buffer32 != NULL) free(buffer32);
1750      buffer32_size = 4*len + 4;
1751      buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1752      if (buffer32 == NULL)
1753        {
1754        fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1755        exit(1);
1756        }
1757      }
1758    
1759    pp = buffer32;
1760    
1761    if (!utf && !data)
1762      {
1763      while (len-- > 0) *pp++ = *p++;
1764      }
1765    
1766    else
1767      {
1768      pcre_uint32 c = 0;
1769      while (len > 0)
1770        {
1771        int chlen = utf82ord(p, &c);
1772        if (chlen <= 0) return -1;
1773        if (utf)
1774          {
1775          if (c > 0x10ffff) return -2;
1776          if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1777          }
1778    
1779        p += chlen;
1780        len -= chlen;
1781        *pp++ = c;
1782        }
1783      }
1784    
1785    *pp = 0;
1786    return pp - buffer32;
1787    }
1788    
1789    /* Check that a 32-bit character string is valid UTF-32.
1790    
1791    Arguments:
1792      string       points to the string
1793      length       length of string, or -1 if the string is zero-terminated
1794    
1795    Returns:       TRUE  if the string is a valid UTF-32 string
1796                   FALSE otherwise
1797    */
1798    
1799    #ifdef NEVER   /* Not used */
1800    #ifdef SUPPORT_UTF
1801    static BOOL
1802    valid_utf32(pcre_uint32 *string, int length)
1803    {
1804    register pcre_uint32 *p;
1805    register pcre_uint32 c;
1806    
1807    for (p = string; length-- > 0; p++)
1808      {
1809      c = *p;
1810      if (c > 0x10ffffu) return FALSE;                 /* Too big */
1811      if ((c & 0xfffff800u) == 0xd800u) return FALSE;  /* Surrogate */
1812      }
1813    
1814    return TRUE;
1815    }
1816    #endif /* SUPPORT_UTF */
1817    #endif /* NEVER */
1818    #endif /* SUPPORT_PCRE32 */
1819    
1820    
1821    /*************************************************
1822    *        Read or extend an input line            *
1823    *************************************************/
1824    
1825    /* Input lines are read into buffer, but both patterns and data lines can be
1826    continued over multiple input lines. In addition, if the buffer fills up, we
1827    want to automatically expand it so as to be able to handle extremely large
1828    lines that are needed for certain stress tests. When the input buffer is
1829    expanded, the other two buffers must also be expanded likewise, and the
1830    contents of pbuffer, which are a copy of the input for callouts, must be
1831    preserved (for when expansion happens for a data line). This is not the most
1832    optimal way of handling this, but hey, this is just a test program!
1833    
1834    Arguments:
1835      f            the file to read
1836      start        where in buffer to start (this *must* be within buffer)
1837      prompt       for stdin or readline()
1838    
1839    Returns:       pointer to the start of new data
1840                   could be a copy of start, or could be moved
1841                   NULL if no data read and EOF reached
1842    */
1843    
1844    static pcre_uint8 *
1845    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1846    {
1847    pcre_uint8 *here = start;
1848    
1849    for (;;)
1850      {
1851      size_t rlen = (size_t)(buffer_size - (here - buffer));
1852    
1853      if (rlen > 1000)
1854        {
1855        int dlen;
1856    
1857        /* If libreadline or libedit support is required, use readline() to read a
1858        line if the input is a terminal. Note that readline() removes the trailing
1859        newline, so we must put it back again, to be compatible with fgets(). */
1860    
1861    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1862        if (isatty(fileno(f)))
1863          {
1864          size_t len;
1865          char *s = readline(prompt);
1866          if (s == NULL) return (here == start)? NULL : start;
1867          len = strlen(s);
1868          if (len > 0) add_history(s);
1869          if (len > rlen - 1) len = rlen - 1;
1870          memcpy(here, s, len);
1871          here[len] = '\n';
1872          here[len+1] = 0;
1873          free(s);
1874          }
1875        else
1876    #endif
1877    
1878        /* Read the next line by normal means, prompting if the file is stdin. */
1879    
1880          {
1881          if (f == stdin) printf("%s", prompt);
1882          if (fgets((char *)here, rlen,  f) == NULL)
1883            return (here == start)? NULL : start;
1884          }
1885    
1886        dlen = (int)strlen((char *)here);
1887        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1888        here += dlen;
1889        }
1890    
1891      else
1892        {
1893        int new_buffer_size = 2*buffer_size;
1894        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1895        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1896    
1897        if (new_buffer == NULL || new_pbuffer == NULL)
1898          {
1899          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1900          exit(1);
1901          }
1902    
1903        memcpy(new_buffer, buffer, buffer_size);
1904        memcpy(new_pbuffer, pbuffer, buffer_size);
1905    
1906        buffer_size = new_buffer_size;
1907    
1908        start = new_buffer + (start - buffer);
1909        here = new_buffer + (here - buffer);
1910    
1911        free(buffer);
1912        free(pbuffer);
1913    
1914        buffer = new_buffer;
1915        pbuffer = new_pbuffer;
1916        }
1917      }
1918    
1919    return NULL;  /* Control never gets here */
1920    }
1921    
1922    
1923    
1924    /*************************************************
1925    *          Read number from string               *
1926    *************************************************/
1927    
1928    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1929    around with conditional compilation, just do the job by hand. It is only used
1930    for unpicking arguments, so just keep it simple.
1931    
1932    Arguments:
1933      str           string to be converted
1934      endptr        where to put the end pointer
1935    
1936    Returns:        the unsigned long
1937    */
1938    
1939    static int
1940    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1941    {
1942    int result = 0;
1943    while(*str != 0 && isspace(*str)) str++;
1944    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1945    *endptr = str;
1946    return(result);
1947    }
1948    
1949    
1950    
1951    /*************************************************
1952    *             Print one character                *
1953    *************************************************/
1954    
1955    /* Print a single character either literally, or as a hex escape. */
1956    
1957    static int pchar(pcre_uint32 c, FILE *f)
1958    {
1959    int n = 0;
1960    if (PRINTOK(c))
1961      {
1962      if (f != NULL) fprintf(f, "%c", c);
1963      return 1;
1964      }
1965    
1966    if (c < 0x100)
1967      {
1968      if (use_utf)
1969        {
1970        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1971        return 6;
1972        }
1973      else
1974        {
1975        if (f != NULL) fprintf(f, "\\x%02x", c);
1976        return 4;
1977        }
1978      }
1979    
1980    if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1981    return n >= 0 ? n : 0;
1982    }
1983    
1984    
1985    
1986    #ifdef SUPPORT_PCRE8
1987    /*************************************************
1988    *         Print 8-bit character string           *
1989    *************************************************/
1990    
1991    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1992    If handed a NULL file, just counts chars without printing. */
1993    
1994    static int pchars(pcre_uint8 *p, int length, FILE *f)
1995    {
1996    pcre_uint32 c = 0;
1997    int yield = 0;
1998    
1999    if (length < 0)
2000      length = strlen((char *)p);
2001    
2002    while (length-- > 0)
2003      {
2004    #if !defined NOUTF
2005      if (use_utf)
2006        {
2007        int rc = utf82ord(p, &c);
2008        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
2009          {
2010          length -= rc - 1;
2011          p += rc;
2012          yield += pchar(c, f);
2013          continue;
2014          }
2015        }
2016    #endif
2017      c = *p++;
2018      yield += pchar(c, f);
2019      }
2020    
2021    return yield;
2022    }
2023    #endif
2024    
2025    
2026    
2027    #ifdef SUPPORT_PCRE16
2028    /*************************************************
2029    *    Find length of 0-terminated 16-bit string   *
2030    *************************************************/
2031    
2032    static int strlen16(PCRE_SPTR16 p)
2033    {
2034    int len = 0;
2035    while (*p++ != 0) len++;
2036    return len;
2037    }
2038    #endif  /* SUPPORT_PCRE16 */
2039    
2040    
2041    
2042    #ifdef SUPPORT_PCRE32
2043    /*************************************************
2044    *    Find length of 0-terminated 32-bit string   *
2045    *************************************************/
2046    
2047    static int strlen32(PCRE_SPTR32 p)
2048    {
2049    int len = 0;
2050    while (*p++ != 0) len++;
2051    return len;
2052    }
2053    #endif  /* SUPPORT_PCRE32 */
2054    
2055    
2056    
2057    #ifdef SUPPORT_PCRE16
2058    /*************************************************
2059    *           Print 16-bit character string        *
2060    *************************************************/
2061    
2062    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2063    If handed a NULL file, just counts chars without printing. */
2064    
2065    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2066    {
2067    int yield = 0;
2068    
2069    if (length < 0)
2070      length = strlen16(p);
2071    
2072    while (length-- > 0)
2073      {
2074      pcre_uint32 c = *p++ & 0xffff;
2075    #if !defined NOUTF
2076      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2077        {
2078        int d = *p & 0xffff;
2079        if (d >= 0xDC00 && d <= 0xDFFF)
2080          {
2081          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2082          length--;
2083          p++;
2084          }
2085        }
2086    #endif
2087      yield += pchar(c, f);
2088      }
2089    
2090    return yield;
2091    }
2092    #endif  /* SUPPORT_PCRE16 */
2093    
2094    
2095    
2096    #ifdef SUPPORT_PCRE32
2097    /*************************************************
2098    *           Print 32-bit character string        *
2099    *************************************************/
2100    
2101    /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2102    If handed a NULL file, just counts chars without printing. */
2103    
2104    static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2105    {
2106    int yield = 0;
2107    
2108    (void)(utf);  /* Avoid compiler warning */
2109    
2110    if (length < 0)
2111      length = strlen32(p);
2112    
2113    while (length-- > 0)
2114      {
2115      pcre_uint32 c = *p++;
2116      yield += pchar(c, f);
2117      }
2118    
2119    return yield;
2120    }
2121    #endif  /* SUPPORT_PCRE32 */
2122    
2123    
2124    
2125    #ifdef SUPPORT_PCRE8
2126    /*************************************************
2127    *     Read a capture name (8-bit) and check it   *
2128    *************************************************/
2129    
2130    static pcre_uint8 *
2131    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2132    {
2133    pcre_uint8 *npp = *pp;
2134    while (isalnum(*p)) *npp++ = *p++;
2135    *npp++ = 0;
2136    *npp = 0;
2137    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2138      {
2139      fprintf(outfile, "no parentheses with name \"");
2140      PCHARSV(*pp, 0, -1, outfile);
2141      fprintf(outfile, "\"\n");
2142      }
2143    
2144    *pp = npp;
2145    return p;
2146    }
2147    #endif  /* SUPPORT_PCRE8 */
2148    
2149    
2150    
2151    #ifdef SUPPORT_PCRE16
2152    /*************************************************
2153    *     Read a capture name (16-bit) and check it  *
2154    *************************************************/
2155    
2156    /* Note that the text being read is 8-bit. */
2157    
2158    static pcre_uint8 *
2159    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2160    {
2161    pcre_uint16 *npp = *pp;
2162    while (isalnum(*p)) *npp++ = *p++;
2163    *npp++ = 0;
2164    *npp = 0;
2165    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2166      {
2167      fprintf(outfile, "no parentheses with name \"");
2168      PCHARSV(*pp, 0, -1, outfile);
2169      fprintf(outfile, "\"\n");
2170      }
2171    *pp = npp;
2172    return p;
2173    }
2174    #endif  /* SUPPORT_PCRE16 */
2175    
2176    
2177    
2178    #ifdef SUPPORT_PCRE32
2179    /*************************************************
2180    *     Read a capture name (32-bit) and check it  *
2181    *************************************************/
2182    
2183    /* Note that the text being read is 8-bit. */
2184    
2185    static pcre_uint8 *
2186    read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2187    {
2188    pcre_uint32 *npp = *pp;
2189    while (isalnum(*p)) *npp++ = *p++;
2190    *npp++ = 0;
2191    *npp = 0;
2192    if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2193      {
2194      fprintf(outfile, "no parentheses with name \"");
2195      PCHARSV(*pp, 0, -1, outfile);
2196      fprintf(outfile, "\"\n");
2197      }
2198    *pp = npp;
2199    return p;
2200    }
2201    #endif  /* SUPPORT_PCRE32 */
2202    
2203    
2204    
2205    /*************************************************
2206    *              Callout function                  *
2207    *************************************************/
2208    
2209    /* Called from PCRE as a result of the (?C) item. We print out where we are in
2210    the match. Yield zero unless more callouts than the fail count, or the callout
2211    data is not zero. */
2212    
2213    static int callout(pcre_callout_block *cb)
2214    {
2215    FILE *f = (first_callout | callout_extra)? outfile : NULL;
2216    int i, pre_start, post_start, subject_length;
2217    
2218    if (callout_extra)
2219      {
2220      fprintf(f, "Callout %d: last capture = %d\n",
2221        cb->callout_number, cb->capture_last);
2222    
2223      for (i = 0; i < cb->capture_top * 2; i += 2)
2224        {
2225        if (cb->offset_vector[i] < 0)
2226          fprintf(f, "%2d: <unset>\n", i/2);
2227        else
2228          {
2229          fprintf(f, "%2d: ", i/2);
2230          PCHARSV(cb->subject, cb->offset_vector[i],
2231            cb->offset_vector[i+1] - cb->offset_vector[i], f);
2232          fprintf(f, "\n");
2233          }
2234        }
2235      }
2236    
2237    /* Re-print the subject in canonical form, the first time or if giving full
2238    datails. On subsequent calls in the same match, we use pchars just to find the
2239    printed lengths of the substrings. */
2240    
2241    if (f != NULL) fprintf(f, "--->");
2242    
2243    PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2244    PCHARS(post_start, cb->subject, cb->start_match,
2245      cb->current_position - cb->start_match, f);
2246    
2247    PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2248    
2249    PCHARSV(cb->subject, cb->current_position,
2250      cb->subject_length - cb->current_position, f);
2251    
2252    if (f != NULL) fprintf(f, "\n");
2253    
2254    /* Always print appropriate indicators, with callout number if not already
2255    shown. For automatic callouts, show the pattern offset. */
2256    
2257    if (cb->callout_number == 255)
2258      {
2259      fprintf(outfile, "%+3d ", cb->pattern_position);
2260      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
2261      }
2262    else
2263      {
2264      if (callout_extra) fprintf(outfile, "    ");
2265        else fprintf(outfile, "%3d ", cb->callout_number);
2266      }
2267    
2268    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2269    fprintf(outfile, "^");
2270    
2271    if (post_start > 0)
2272      {
2273      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2274      fprintf(outfile, "^");
2275      }
2276    
2277    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2278      fprintf(outfile, " ");
2279    
2280    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2281      pbuffer + cb->pattern_position);
2282    
2283    fprintf(outfile, "\n");
2284    first_callout = 0;
2285    
2286    if (cb->mark != last_callout_mark)
2287      {
2288      if (cb->mark == NULL)
2289        fprintf(outfile, "Latest Mark: <unset>\n");
2290      else
2291        {
2292        fprintf(outfile, "Latest Mark: ");
2293        PCHARSV(cb->mark, 0, -1, outfile);
2294        putc('\n', outfile);
2295        }
2296      last_callout_mark = cb->mark;
2297      }
2298    
2299    if (cb->callout_data != NULL)
2300      {
2301      int callout_data = *((int *)(cb->callout_data));
2302      if (callout_data != 0)
2303        {
2304        fprintf(outfile, "Callout data = %d\n", callout_data);
2305        return callout_data;
2306        }
2307      }
2308    
2309    return (cb->callout_number != callout_fail_id)? 0 :
2310           (++callout_count >= callout_fail_count)? 1 : 0;
2311    }
2312    
2313    
2314    /*************************************************
2315    *            Local malloc functions              *
2316    *************************************************/
2317    
2318    /* Alternative malloc function, to test functionality and save the size of a
2319    compiled re, which is the first store request that pcre_compile() makes. The
2320    show_malloc variable is set only during matching. */
2321    
2322    static void *new_malloc(size_t size)
2323    {
2324    void *block = malloc(size);
2325    gotten_store = size;
2326    if (first_gotten_store == 0) first_gotten_store = size;
2327    if (show_malloc)
2328      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
2329    return block;
2330    }
2331    
2332    static void new_free(void *block)
2333    {
2334    if (show_malloc)
2335      fprintf(outfile, "free             %p\n", block);
2336    free(block);
2337    }
2338    
2339    /* For recursion malloc/free, to test stacking calls */
2340    
2341    static void *stack_malloc(size_t size)
2342    {
2343    void *block = malloc(size);
2344    if (show_malloc)
2345      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2346    return block;
2347    }
2348    
2349    static void stack_free(void *block)
2350    {
2351    if (show_malloc)
2352      fprintf(outfile, "stack_free       %p\n", block);
2353    free(block);
2354    }
2355    
2356    
2357    /*************************************************
2358    *          Call pcre_fullinfo()                  *
2359    *************************************************/
2360    
2361    /* Get one piece of information from the pcre_fullinfo() function. When only
2362    one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2363    value, but the code is defensive.
2364    
2365    Arguments:
2366      re        compiled regex
2367      study     study data
2368      option    PCRE_INFO_xxx option
2369      ptr       where to put the data
2370    
2371    Returns:    0 when OK, < 0 on error
2372    */
2373    
2374    static int
2375    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2376    {
2377    int rc;
2378    
2379    if (pcre_mode == PCRE32_MODE)
2380    #ifdef SUPPORT_PCRE32
2381      rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2382    #else
2383      rc = PCRE_ERROR_BADMODE;
2384    #endif
2385    else if (pcre_mode == PCRE16_MODE)
2386    #ifdef SUPPORT_PCRE16
2387      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2388    #else
2389      rc = PCRE_ERROR_BADMODE;
2390    #endif
2391    else
2392    #ifdef SUPPORT_PCRE8
2393      rc = pcre_fullinfo(re, study, option, ptr);
2394    #else
2395      rc = PCRE_ERROR_BADMODE;
2396    #endif
2397    
2398    if (rc < 0)
2399      {
2400      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2401        pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2402      if (rc == PCRE_ERROR_BADMODE)
2403        fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2404          "%d-bit mode\n", 8 * CHAR_SIZE,
2405          8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2406      }
2407    
2408    return rc;
2409    }
2410    
2411    
2412    
2413    /*************************************************
2414    *             Swap byte functions                *
2415    *************************************************/
2416    
2417    /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2418    value, respectively.
2419    
2420    Arguments:
2421      value        any number
2422    
2423    Returns:       the byte swapped value
2424    */
2425    
2426    static pcre_uint32
2427    swap_uint32(pcre_uint32 value)
2428    {
2429    return ((value & 0x000000ff) << 24) |
2430           ((value & 0x0000ff00) <<  8) |
2431           ((value & 0x00ff0000) >>  8) |
2432           (value >> 24);
2433    }
2434    
2435    static pcre_uint16
2436    swap_uint16(pcre_uint16 value)
2437    {
2438    return (value >> 8) | (value << 8);
2439    }
2440    
2441    
2442    
2443    /*************************************************
2444    *        Flip bytes in a compiled pattern        *
2445    *************************************************/
2446    
2447    /* This function is called if the 'F' option was present on a pattern that is
2448    to be written to a file. We flip the bytes of all the integer fields in the
2449    regex data block and the study block. In 16-bit mode this also flips relevant
2450    bytes in the pattern itself. This is to make it possible to test PCRE's
2451    ability to reload byte-flipped patterns, e.g. those compiled on a different
2452    architecture. */
2453    
2454    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2455    static void
2456    regexflip8_or_16(pcre *ere, pcre_extra *extra)
2457    {
2458    real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2459    #ifdef SUPPORT_PCRE16
2460    int op;
2461    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2462    int length = re->name_count * re->name_entry_size;
2463    #ifdef SUPPORT_UTF
2464    BOOL utf = (re->options & PCRE_UTF16) != 0;
2465    BOOL utf16_char = FALSE;
2466    #endif /* SUPPORT_UTF */
2467    #endif /* SUPPORT_PCRE16 */
2468    
2469    /* Always flip the bytes in the main data block and study blocks. */
2470    
2471    re->magic_number = REVERSED_MAGIC_NUMBER;
2472    re->size = swap_uint32(re->size);
2473    re->options = swap_uint32(re->options);
2474    re->flags = swap_uint16(re->flags);
2475    re->top_bracket = swap_uint16(re->top_bracket);
2476    re->top_backref = swap_uint16(re->top_backref);
2477    re->first_char = swap_uint16(re->first_char);
2478    re->req_char = swap_uint16(re->req_char);
2479    re->name_table_offset = swap_uint16(re->name_table_offset);
2480    re->name_entry_size = swap_uint16(re->name_entry_size);
2481    re->name_count = swap_uint16(re->name_count);
2482    
2483    if (extra != NULL)
2484      {
2485      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2486      rsd->size = swap_uint32(rsd->size);
2487      rsd->flags = swap_uint32(rsd->flags);
2488      rsd->minlength = swap_uint32(rsd->minlength);
2489      }
2490    
2491    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2492    in the name table, if present, and then in the pattern itself. */
2493    
2494    #ifdef SUPPORT_PCRE16
2495    if (pcre_mode != PCRE16_MODE) return;
2496    
2497    while(TRUE)
2498      {
2499      /* Swap previous characters. */
2500      while (length-- > 0)
2501        {
2502        *ptr = swap_uint16(*ptr);
2503        ptr++;
2504        }
2505    #ifdef SUPPORT_UTF
2506      if (utf16_char)
2507        {
2508        if ((ptr[-1] & 0xfc00) == 0xd800)
2509          {
2510          /* We know that there is only one extra character in UTF-16. */
2511          *ptr = swap_uint16(*ptr);
2512          ptr++;
2513          }
2514        }
2515      utf16_char = FALSE;
2516    #endif /* SUPPORT_UTF */
2517    
2518      /* Get next opcode. */
2519    
2520      length = 0;
2521      op = *ptr;
2522      *ptr++ = swap_uint16(op);
2523    
2524      switch (op)
2525        {
2526        case OP_END:
2527        return;
2528    
2529    #ifdef SUPPORT_UTF
2530        case OP_CHAR:
2531        case OP_CHARI:
2532        case OP_NOT:
2533        case OP_NOTI:
2534        case OP_STAR:
2535        case OP_MINSTAR:
2536        case OP_PLUS:
2537        case OP_MINPLUS:
2538        case OP_QUERY:
2539        case OP_MINQUERY:
2540        case OP_UPTO:
2541        case OP_MINUPTO:
2542        case OP_EXACT:
2543        case OP_POSSTAR:
2544        case OP_POSPLUS:
2545        case OP_POSQUERY:
2546        case OP_POSUPTO:
2547        case OP_STARI:
2548        case OP_MINSTARI:
2549        case OP_PLUSI:
2550        case OP_MINPLUSI:
2551        case OP_QUERYI:
2552        case OP_MINQUERYI:
2553        case OP_UPTOI:
2554        case OP_MINUPTOI:
2555        case OP_EXACTI:
2556        case OP_POSSTARI:
2557        case OP_POSPLUSI:
2558        case OP_POSQUERYI:
2559        case OP_POSUPTOI:
2560        case OP_NOTSTAR:
2561        case OP_NOTMINSTAR:
2562        case OP_NOTPLUS:
2563        case OP_NOTMINPLUS:
2564        case OP_NOTQUERY:
2565        case OP_NOTMINQUERY:
2566        case OP_NOTUPTO:
2567        case OP_NOTMINUPTO:
2568        case OP_NOTEXACT:
2569        case OP_NOTPOSSTAR:
2570        case OP_NOTPOSPLUS:
2571        case OP_NOTPOSQUERY:
2572        case OP_NOTPOSUPTO:
2573        case OP_NOTSTARI:
2574        case OP_NOTMINSTARI:
2575        case OP_NOTPLUSI:
2576        case OP_NOTMINPLUSI:
2577        case OP_NOTQUERYI:
2578        case OP_NOTMINQUERYI:
2579        case OP_NOTUPTOI:
2580        case OP_NOTMINUPTOI:
2581        case OP_NOTEXACTI:
2582        case OP_NOTPOSSTARI:
2583        case OP_NOTPOSPLUSI:
2584        case OP_NOTPOSQUERYI:
2585        case OP_NOTPOSUPTOI:
2586        if (utf) utf16_char = TRUE;
2587    #endif
2588        /* Fall through. */
2589    
2590        default:
2591        length = OP_lengths16[op] - 1;
2592        break;
2593    
2594        case OP_CLASS:
2595        case OP_NCLASS:
2596        /* Skip the character bit map. */
2597        ptr += 32/sizeof(pcre_uint16);
2598        length = 0;
2599        break;
2600    
2601        case OP_XCLASS:
2602        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2603        if (LINK_SIZE > 1)
2604          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2605            - (1 + LINK_SIZE + 1));
2606        else
2607          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2608    
2609        /* Reverse the size of the XCLASS instance. */
2610        *ptr = swap_uint16(*ptr);
2611        ptr++;
2612        if (LINK_SIZE > 1)
2613          {
2614          *ptr = swap_uint16(*ptr);
2615          ptr++;
2616          }
2617    
2618        op = *ptr;
2619        *ptr = swap_uint16(op);
2620        ptr++;
2621        if ((op & XCL_MAP) != 0)
2622          {
2623          /* Skip the character bit map. */
2624          ptr += 32/sizeof(pcre_uint16);
2625          length -= 32/sizeof(pcre_uint16);
2626          }
2627        break;
2628        }
2629      }
2630    /* Control should never reach here in 16 bit mode. */
2631    #endif /* SUPPORT_PCRE16 */
2632    }
2633    #endif /* SUPPORT_PCRE[8|16] */
2634    
2635    
2636    
2637    #if defined SUPPORT_PCRE32
2638    static void
2639    regexflip_32(pcre *ere, pcre_extra *extra)
2640    {
2641    real_pcre32 *re = (real_pcre32 *)ere;
2642    int op;
2643    pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2644    int length = re->name_count * re->name_entry_size;
2645    
2646    /* Always flip the bytes in the main data block and study blocks. */
2647    
2648    re->magic_number = REVERSED_MAGIC_NUMBER;
2649    re->size = swap_uint32(re->size);
2650    re->options = swap_uint32(re->options);
2651    re->flags = swap_uint16(re->flags);
2652    re->top_bracket = swap_uint16(re->top_bracket);
2653    re->top_backref = swap_uint16(re->top_backref);
2654    re->first_char = swap_uint32(re->first_char);
2655    re->req_char = swap_uint32(re->req_char);
2656    re->name_table_offset = swap_uint16(re->name_table_offset);
2657    re->name_entry_size = swap_uint16(re->name_entry_size);
2658    re->name_count = swap_uint16(re->name_count);
2659    
2660    if (extra != NULL)
2661      {
2662      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2663      rsd->size = swap_uint32(rsd->size);
2664      rsd->flags = swap_uint32(rsd->flags);
2665      rsd->minlength = swap_uint32(rsd->minlength);
2666      }
2667    
2668    /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2669    the pattern itself. */
2670    
2671    while(TRUE)
2672      {
2673      /* Swap previous characters. */
2674      while (length-- > 0)
2675        {
2676        *ptr = swap_uint32(*ptr);
2677        ptr++;
2678        }
2679    
2680      /* Get next opcode. */
2681    
2682      length = 0;
2683      op = *ptr;
2684      *ptr++ = swap_uint32(op);
2685    
2686      switch (op)
2687        {
2688        case OP_END:
2689        return;
2690    
2691        default:
2692        length = OP_lengths32[op] - 1;
2693        break;
2694    
2695        case OP_CLASS:
2696        case OP_NCLASS:
2697        /* Skip the character bit map. */
2698        ptr += 32/sizeof(pcre_uint32);
2699        length = 0;
2700        break;
2701    
2702        case OP_XCLASS:
2703        /* LINK_SIZE can only be 1 in 32-bit mode. */
2704        length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2705    
2706        /* Reverse the size of the XCLASS instance. */
2707        *ptr = swap_uint32(*ptr);
2708        ptr++;
2709    
2710        op = *ptr;
2711        *ptr = swap_uint32(op);
2712        ptr++;
2713        if ((op & XCL_MAP) != 0)
2714          {
2715          /* Skip the character bit map. */
2716          ptr += 32/sizeof(pcre_uint32);
2717          length -= 32/sizeof(pcre_uint32);
2718          }
2719        break;
2720        }
2721      }
2722    /* Control should never reach here in 32 bit mode. */
2723    }
2724    
2725    #endif /* SUPPORT_PCRE32 */
2726    
2727    
2728    
2729    static void
2730    regexflip(pcre *ere, pcre_extra *extra)
2731    {
2732    #if defined SUPPORT_PCRE32
2733      if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2734        regexflip_32(ere, extra);
2735    #endif
2736    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2737      if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2738        regexflip8_or_16(ere, extra);
2739    #endif
2740    }
2741    
2742    
2743    
2744    /*************************************************
2745    *        Check match or recursion limit          *
2746    *************************************************/
2747    
2748    static int
2749    check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2750      int start_offset, int options, int *use_offsets, int use_size_offsets,
2751      int flag, unsigned long int *limit, int errnumber, const char *msg)
2752    {
2753    int count;
2754    int min = 0;
2755    int mid = 64;
2756    int max = -1;
2757    
2758    extra->flags |= flag;
2759    
2760    for (;;)
2761      {
2762      *limit = mid;
2763    
2764      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2765        use_offsets, use_size_offsets);
2766    
2767      if (count == errnumber)
2768        {
2769        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2770        min = mid;
2771        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2772        }
2773    
2774      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2775                             count == PCRE_ERROR_PARTIAL)
2776        {
2777        if (mid == min + 1)
2778          {
2779          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2780          break;
2781          }
2782        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2783        max = mid;
2784        mid = (min + mid)/2;
2785        }
2786      else break;    /* Some other error */
2787      }
2788    
2789    extra->flags &= ~flag;
2790    return count;
2791    }
2792    
2793    
2794    
2795    /*************************************************
2796    *         Case-independent strncmp() function    *
2797    *************************************************/
2798    
2799    /*
2800    Arguments:
2801      s         first string
2802      t         second string
2803      n         number of characters to compare
2804    
2805    Returns:    < 0, = 0, or > 0, according to the comparison
2806    */
2807    
2808    static int
2809    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2810    {
2811    while (n--)
2812      {
2813      int c = tolower(*s++) - tolower(*t++);
2814      if (c) return c;
2815      }
2816    return 0;
2817    }
2818    
2819    
2820    
2821    /*************************************************
2822    *         Check newline indicator                *
2823    *************************************************/
2824    
2825    /* This is used both at compile and run-time to check for <xxx> escapes. Print
2826    a message and return 0 if there is no match.
2827    
2828    Arguments:
2829      p           points after the leading '<'
2830      f           file for error message
2831    
2832    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
2833    */
2834    
2835    static int
2836    check_newline(pcre_uint8 *p, FILE *f)
2837    {
2838    if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2839    if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2840    if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2841    if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2842    if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2843    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2844    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2845    fprintf(f, "Unknown newline type at: <%s\n", p);
2846    return 0;
2847    }
2848    
2849    
2850    
2851    /*************************************************
2852    *             Usage function                     *
2853    *************************************************/
2854    
2855    static void
2856    usage(void)
2857    {
2858    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2859    printf("Input and output default to stdin and stdout.\n");
2860    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2861    printf("If input is a terminal, readline() is used to read from it.\n");
2862    #else
2863    printf("This version of pcretest is not linked with readline().\n");
2864    #endif
2865    printf("\nOptions:\n");
2866    #ifdef SUPPORT_PCRE16
2867    printf("  -16      use the 16-bit library\n");
2868    #endif
2869    #ifdef SUPPORT_PCRE32
2870    printf("  -32      use the 32-bit library\n");
2871    #endif
2872    printf("  -b       show compiled code\n");
2873    printf("  -C       show PCRE compile-time options and exit\n");
2874    printf("  -C arg   show a specific compile-time option\n");
2875    printf("           and exit with its value. The arg can be:\n");
2876    printf("     linksize     internal link size [2, 3, 4]\n");
2877    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2878    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2879    printf("     pcre32       32 bit library support enabled [0, 1]\n");
2880    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2881    printf("     ucp          Unicode Properties supported [0, 1]\n");
2882    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2883    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2884    printf("  -d       debug: show compiled code and information (-b and -i)\n");
2885    #if !defined NODFA
2886    printf("  -dfa     force DFA matching for all subjects\n");
2887    #endif
2888    printf("  -help    show usage information\n");
2889    printf("  -i       show information about compiled patterns\n"
2890           "  -M       find MATCH_LIMIT minimum for each subject\n"
2891           "  -m       output memory used information\n"
2892           "  -o <n>   set size of offsets vector to <n>\n");
2893    #if !defined NOPOSIX
2894    printf("  -p       use POSIX interface\n");
2895    #endif
2896    printf("  -q       quiet: do not output PCRE version number at start\n");
2897    printf("  -S <n>   set stack size to <n> megabytes\n");
2898    printf("  -s       force each pattern to be studied at basic level\n"
2899           "  -s+      force each pattern to be studied, using JIT if available\n"
2900           "  -s++     ditto, verifying when JIT was actually used\n"
2901           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2902           "             where 1 <= n <= 7 selects JIT options\n"
2903           "  -s++n    ditto, verifying when JIT was actually used\n"
2904           "  -t       time compilation and execution\n");
2905    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2906    printf("  -tm      time execution (matching) only\n");
2907    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
2908    }
2909    
2910    
2911    
2912    /*************************************************
2913    *                Main Program                    *
2914    *************************************************/
2915    
2916    /* Read lines from named file or stdin and write to named file or stdout; lines
2917    consist of a regular expression, in delimiters and optionally followed by
2918    options, followed by a set of test data, terminated by an empty line. */
2919    
2920    int main(int argc, char **argv)
2921    {
2922    FILE *infile = stdin;
2923    const char *version;
2924    int options = 0;
2925    int study_options = 0;
2926    int default_find_match_limit = FALSE;
2927    int op = 1;
2928    int timeit = 0;
2929    int timeitm = 0;
2930    int showinfo = 0;
2931    int showstore = 0;
2932    int force_study = -1;
2933    int force_study_options = 0;
2934    int quiet = 0;
2935    int size_offsets = 45;
2936    int size_offsets_max;
2937    int *offsets = NULL;
2938    int debug = 0;
2939    int done = 0;
2940    int all_use_dfa = 0;
2941    int verify_jit = 0;
2942    int yield = 0;
2943    int stack_size;
2944    pcre_uint8 *dbuffer = NULL;
2945    size_t dbuffer_size = 1u << 14;
2946    
2947    #if !defined NOPOSIX
2948    int posix = 0;
2949    #endif
2950    #if !defined NODFA
2951    int *dfa_workspace = NULL;
2952    #endif
2953    
2954    pcre_jit_stack *jit_stack = NULL;
2955    
2956    /* These vectors store, end-to-end, a list of zero-terminated captured
2957    substring names, each list itself being terminated by an empty name. Assume
2958    that 1024 is plenty long enough for the few names we'll be testing. It is
2959    easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2960    for the actual memory, to ensure alignment. */
2961    
2962    pcre_uint32 copynames[1024];
2963    pcre_uint32 getnames[1024];
2964    
2965    #ifdef SUPPORT_PCRE32
2966    pcre_uint32 *cn32ptr;
2967    pcre_uint32 *gn32ptr;
2968    #endif
2969    
2970    #ifdef SUPPORT_PCRE16
2971    pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2972    pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2973    pcre_uint16 *cn16ptr;
2974    pcre_uint16 *gn16ptr;
2975    #endif
2976    
2977    #ifdef SUPPORT_PCRE8
2978    pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2979    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2980    pcre_uint8 *cn8ptr;
2981    pcre_uint8 *gn8ptr;
2982    #endif
2983    
2984    /* Get buffers from malloc() so that valgrind will check their misuse when
2985    debugging. They grow automatically when very long lines are read. The 16-
2986    and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2987    
2988    buffer = (pcre_uint8 *)malloc(buffer_size);
2989    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2990    
2991    /* The outfile variable is static so that new_malloc can use it. */
2992    
2993    outfile = stdout;
2994    
2995    /* The following  _setmode() stuff is some Windows magic that tells its runtime
2996    library to translate CRLF into a single LF character. At least, that's what
2997    I've been told: never having used Windows I take this all on trust. Originally
2998    it set 0x8000, but then I was advised that _O_BINARY was better. */
2999    
3000    #if defined(_WIN32) || defined(WIN32)
3001    _setmode( _fileno( stdout ), _O_BINARY );
3002    #endif
3003    
3004    /* Get the version number: both pcre_version() and pcre16_version() give the
3005    same answer. We just need to ensure that we call one that is available. */
3006    
3007    #if defined SUPPORT_PCRE8
3008    version = pcre_version();
3009    #elif defined SUPPORT_PCRE16
3010    version = pcre16_version();
3011    #elif defined SUPPORT_PCRE32
3012    version = pcre32_version();
3013    #endif
3014    
3015  /* Get one piece of information from the pcre_fullinfo() function */  /* Scan options */
3016    
3017  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  while (argc > 1 && argv[op][0] == '-')
3018  {    {
3019  int rc;    pcre_uint8 *endptr;
3020  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)    char *arg = argv[op];
   fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  
 }  
3021    
3022      if (strcmp(arg, "-m") == 0) showstore = 1;
3023      else if (strcmp(arg, "-s") == 0) force_study = 0;
3024    
3025      else if (strncmp(arg, "-s+", 3) == 0)
3026        {
3027        arg += 3;
3028        if (*arg == '+') { arg++; verify_jit = TRUE; }
3029        force_study = 1;
3030        if (*arg == 0)
3031          force_study_options = jit_study_bits[6];
3032        else if (*arg >= '1' && *arg <= '7')
3033          force_study_options = jit_study_bits[*arg - '1'];
3034        else goto BAD_ARG;
3035        }
3036      else if (strcmp(arg, "-8") == 0)
3037        {
3038    #ifdef SUPPORT_PCRE8
3039        pcre_mode = PCRE8_MODE;
3040    #else
3041        printf("** This version of PCRE was built without 8-bit support\n");
3042        exit(1);
3043    #endif
3044        }
3045      else if (strcmp(arg, "-16") == 0)
3046        {
3047    #ifdef SUPPORT_PCRE16
3048        pcre_mode = PCRE16_MODE;
3049    #else
3050        printf("** This version of PCRE was built without 16-bit support\n");
3051        exit(1);
3052    #endif
3053        }
3054      else if (strcmp(arg, "-32") == 0)
3055        {
3056    #ifdef SUPPORT_PCRE32
3057        pcre_mode = PCRE32_MODE;
3058    #else
3059        printf("** This version of PCRE was built without 32-bit support\n");
3060        exit(1);
3061    #endif
3062        }
3063      else if (strcmp(arg, "-q") == 0) quiet = 1;
3064      else if (strcmp(arg, "-b") == 0) debug = 1;
3065      else if (strcmp(arg, "-i") == 0) showinfo = 1;
3066      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3067      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3068    #if !defined NODFA
3069      else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3070    #endif
3071      else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3072          ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3073            *endptr == 0))
3074        {
3075        op++;
3076        argc--;
3077        }
3078      else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
3079        {
3080        int both = arg[2] == 0;
3081        int temp;
3082        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3083                         *endptr == 0))
3084          {
3085          timeitm = temp;
3086          op++;
3087          argc--;
3088          }
3089        else timeitm = LOOPREPEAT;
3090        if (both) timeit = timeitm;
3091        }
3092      else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3093          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3094            *endptr == 0))
3095        {
3096    #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
3097        printf("PCRE: -S not supported on this OS\n");
3098        exit(1);
3099    #else
3100        int rc;
3101        struct rlimit rlim;
3102        getrlimit(RLIMIT_STACK, &rlim);
3103        rlim.rlim_cur = stack_size * 1024 * 1024;
3104        rc = setrlimit(RLIMIT_STACK, &rlim);
3105        if (rc != 0)
3106          {
3107        printf("PCRE: setrlimit() failed with error %d\n", rc);
3108        exit(1);
3109          }
3110        op++;
3111        argc--;
3112    #endif
3113        }
3114    #if !defined NOPOSIX
3115      else if (strcmp(arg, "-p") == 0) posix = 1;
3116    #endif
3117      else if (strcmp(arg, "-C") == 0)
3118        {
3119        int rc;
3120        unsigned long int lrc;
3121    
3122        if (argc > 2)
3123          {
3124          if (strcmp(argv[op + 1], "linksize") == 0)
3125            {
3126            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3127            printf("%d\n", rc);
3128            yield = rc;
3129    
3130    #ifdef __VMS
3131            vms_setsymbol("LINKSIZE",0,yield );
3132    #endif
3133            }
3134          else if (strcmp(argv[op + 1], "pcre8") == 0)
3135            {
3136    #ifdef SUPPORT_PCRE8
3137            printf("1\n");
3138            yield = 1;
3139    #else
3140            printf("0\n");
3141            yield = 0;
3142    #endif
3143    #ifdef __VMS
3144            vms_setsymbol("PCRE8",0,yield );
3145    #endif
3146            }
3147          else if (strcmp(argv[op + 1], "pcre16") == 0)
3148            {
3149    #ifdef SUPPORT_PCRE16
3150            printf("1\n");
3151            yield = 1;
3152    #else
3153            printf("0\n");
3154            yield = 0;
3155    #endif
3156    #ifdef __VMS
3157            vms_setsymbol("PCRE16",0,yield );
3158    #endif
3159            }
3160          else if (strcmp(argv[op + 1], "pcre32") == 0)
3161            {
3162    #ifdef SUPPORT_PCRE32
3163            printf("1\n");
3164            yield = 1;
3165    #else
3166            printf("0\n");
3167            yield = 0;
3168    #endif
3169    #ifdef __VMS
3170            vms_setsymbol("PCRE32",0,yield );
3171    #endif
3172            }
3173          else if (strcmp(argv[op + 1], "utf") == 0)
3174            {
3175    #ifdef SUPPORT_PCRE8
3176            if (pcre_mode == PCRE8_MODE)
3177              (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3178    #endif
3179    #ifdef SUPPORT_PCRE16
3180            if (pcre_mode == PCRE16_MODE)
3181              (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3182    #endif
3183    #ifdef SUPPORT_PCRE32
3184            if (pcre_mode == PCRE32_MODE)
3185              (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3186    #endif
3187            printf("%d\n", rc);
3188            yield = rc;
3189    #ifdef __VMS
3190            vms_setsymbol("UTF",0,yield );
3191    #endif
3192            }
3193          else if (strcmp(argv[op + 1], "ucp") == 0)
3194            {
3195            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3196            printf("%d\n", rc);
3197            yield = rc;
3198            }
3199          else if (strcmp(argv[op + 1], "jit") == 0)
3200            {
3201            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3202            printf("%d\n", rc);
3203            yield = rc;
3204            }
3205          else if (strcmp(argv[op + 1], "newline") == 0)
3206            {
3207            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3208            print_newline_config(rc, TRUE);
3209            }
3210          else if (strcmp(argv[op + 1], "ebcdic") == 0)
3211            {
3212    #ifdef EBCDIC
3213            printf("1\n");
3214            yield = 1;
3215    #else
3216            printf("0\n");
3217    #endif
3218            }
3219          else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3220            {
3221    #ifdef EBCDIC
3222            printf("0x%02x\n", CHAR_LF);
3223    #else
3224            printf("0\n");
3225    #endif
3226            }
3227          else
3228            {
3229            printf("Unknown -C option: %s\n", argv[op + 1]);
3230            }
3231          goto EXIT;
3232          }
3233    
3234  /* Read lines from named file or stdin and write to named file or stdout; lines      /* No argument for -C: output all configuration information. */
 consist of a regular expression, in delimiters and optionally followed by  
 options, followed by a set of test data, terminated by an empty line. */  
3235    
3236  int main(int argc, char **argv)      printf("PCRE version %s\n", version);
3237  {      printf("Compiled with\n");
 FILE *infile = stdin;  
 int options = 0;  
 int study_options = 0;  
 int op = 1;  
 int timeit = 0;  
 int showinfo = 0;  
 int showstore = 0;  
 int posix = 0;  
 int debug = 0;  
 int done = 0;  
 unsigned char buffer[30000];  
 unsigned char dbuffer[1024];  
3238    
3239  /* Static so that new_malloc can use it. */  #ifdef EBCDIC
3240        printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3241    #endif
3242    
3243  outfile = stdout;  /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3244    are set, either both UTFs are supported or both are not supported. */
3245    
3246  /* Scan options */  #ifdef SUPPORT_PCRE8
3247        printf("  8-bit support\n");
3248        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3249          printf ("  %sUTF-8 support\n", rc ? "" : "No ");
3250    #endif
3251    #ifdef SUPPORT_PCRE16
3252        printf("  16-bit support\n");
3253        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3254        printf ("  %sUTF-16 support\n", rc ? "" : "No ");
3255    #endif
3256    #ifdef SUPPORT_PCRE32
3257        printf("  32-bit support\n");
3258        (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3259        printf ("  %sUTF-32 support\n", rc ? "" : "No ");
3260    #endif
3261    
3262  while (argc > 1 && argv[op][0] == '-')      (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3263    {      printf("  %sUnicode properties support\n", rc? "" : "No ");
3264    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3265      showstore = 1;      if (rc)
3266    else if (strcmp(argv[op], "-t") == 0) timeit = 1;        {
3267    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;        const char *arch;
3268    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;        (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3269    else if (strcmp(argv[op], "-p") == 0) posix = 1;        printf("  Just-in-time compiler support: %s\n", arch);
3270          }
3271        else
3272          printf("  No just-in-time compiler support\n");
3273        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3274        print_newline_config(rc, FALSE);
3275        (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3276        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3277                                         "all Unicode newlines");
3278        (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3279        printf("  Internal link size = %d\n", rc);
3280        (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3281        printf("  POSIX malloc threshold = %d\n", rc);
3282        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3283        printf("  Default match limit = %ld\n", lrc);
3284        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3285        printf("  Default recursion depth limit = %ld\n", lrc);
3286        (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3287        printf("  Match recursion uses %s", rc? "stack" : "heap");
3288        if (showstore)
3289          {
3290          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3291          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3292          }
3293        printf("\n");
3294        goto EXIT;
3295        }
3296      else if (strcmp(arg, "-help") == 0 ||
3297               strcmp(arg, "--help") == 0)
3298        {
3299        usage();
3300        goto EXIT;
3301        }
3302    else    else
3303      {      {
3304      printf("*** Unknown option %s\n", argv[op]);      BAD_ARG:
3305      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("** Unknown or malformed option %s\n", arg);
3306      printf("  -d   debug: show compiled code; implies -i\n"      usage();
3307             "  -i   show information about compiled pattern\n"      yield = 1;
3308             "  -p   use POSIX interface\n"      goto EXIT;
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
3309      }      }
3310    op++;    op++;
3311    argc--;    argc--;
3312    }    }
3313    
3314    /* Get the store for the offsets vector, and remember what it was */
3315    
3316    size_offsets_max = size_offsets;
3317    offsets = (int *)malloc(size_offsets_max * sizeof(int));
3318    if (offsets == NULL)
3319      {
3320      printf("** Failed to get %d bytes of memory for offsets vector\n",
3321        (int)(size_offsets_max * sizeof(int)));
3322      yield = 1;
3323      goto EXIT;
3324      }
3325    
3326  /* Sort out the input and output files */  /* Sort out the input and output files */
3327    
3328  if (argc > 1)  if (argc > 1)
3329    {    {
3330    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
3331    if (infile == NULL)    if (infile == NULL)
3332      {      {
3333      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
3334      return 1;      yield = 1;
3335        goto EXIT;
3336      }      }
3337    }    }
3338    
3339  if (argc > 2)  if (argc > 2)
3340    {    {
3341    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
3342    if (outfile == NULL)    if (outfile == NULL)
3343      {      {
3344      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
3345      return 1;      yield = 1;
3346        goto EXIT;
3347      }      }
3348    }    }
3349    
3350  /* Set alternative malloc function */  /* Set alternative malloc function */
3351    
3352    #ifdef SUPPORT_PCRE8
3353  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
3354    pcre_free = new_free;
3355    pcre_stack_malloc = stack_malloc;
3356    pcre_stack_free = stack_free;
3357    #endif
3358    
3359    #ifdef SUPPORT_PCRE16
3360    pcre16_malloc = new_malloc;
3361    pcre16_free = new_free;
3362    pcre16_stack_malloc = stack_malloc;
3363    pcre16_stack_free = stack_free;
3364    #endif
3365    
3366  /* Heading line, then prompt for first regex if stdin */  #ifdef SUPPORT_PCRE32
3367    pcre32_malloc = new_malloc;
3368    pcre32_free = new_free;
3369    pcre32_stack_malloc = stack_malloc;
3370    pcre32_stack_free = stack_free;
3371    #endif
3372    
3373    /* Heading line unless quiet, then prompt for first regex if stdin */
3374    
3375  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3376    
3377  /* Main loop */  /* Main loop */
3378    
# Line 391  while (!done) Line 3383  while (!done)
3383    
3384  #if !defined NOPOSIX  /* There are still compilers that require no indent */  #if !defined NOPOSIX  /* There are still compilers that require no indent */
3385    regex_t preg;    regex_t preg;
3386      int do_posix = 0;
3387  #endif  #endif
3388    
3389    const char *error;    const char *error;
3390    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
3391    unsigned const char *tables = NULL;    pcre_uint8 *p, *pp, *ppp;
3392      pcre_uint8 *to_file = NULL;
3393      const pcre_uint8 *tables = NULL;
3394      unsigned long int get_options;
3395      unsigned long int true_size, true_study_size = 0;
3396      size_t size, regex_gotten_store;
3397      int do_allcaps = 0;
3398      int do_mark = 0;
3399    int do_study = 0;    int do_study = 0;
3400      int no_force_study = 0;
3401    int do_debug = debug;    int do_debug = debug;
3402    int do_G = 0;    int do_G = 0;
3403    int do_g = 0;    int do_g = 0;
3404    int do_showinfo = showinfo;    int do_showinfo = showinfo;
3405    int do_showrest = 0;    int do_showrest = 0;
3406    int do_posix = 0;    int do_showcaprest = 0;
3407    int erroroffset, len, delimiter;    int do_flip = 0;
3408      int erroroffset, len, delimiter, poffset;
3409    
3410    if (infile == stdin) printf("  re> ");  #if !defined NODFA
3411    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    int dfa_matched = 0;
3412    #endif
3413    
3414      use_utf = 0;
3415      debug_lengths = 1;
3416    
3417      if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
3418    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3419      fflush(outfile);
3420    
3421    p = buffer;    p = buffer;
3422    while (isspace(*p)) p++;    while (isspace(*p)) p++;
3423    if (*p == 0) continue;    if (*p == 0) continue;
3424    
3425    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
3426    complete, read more. */  
3427      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3428        {
3429        pcre_uint32 magic;
3430        pcre_uint8 sbuf[8];
3431        FILE *f;
3432    
3433        p++;
3434        if (*p == '!')
3435          {
3436          do_debug = TRUE;
3437          do_showinfo = TRUE;
3438          p++;
3439          }
3440    
3441        pp = p + (int)strlen((char *)p);
3442        while (isspace(pp[-1])) pp--;
3443        *pp = 0;
3444    
3445        f = fopen((char *)p, "rb");
3446        if (f == NULL)
3447          {
3448          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3449          continue;
3450          }
3451    
3452        first_gotten_store = 0;
3453        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3454    
3455        true_size =
3456          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3457        true_study_size =
3458          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3459    
3460        re = (pcre *)new_malloc(true_size);
3461        if (re == NULL)
3462          {
3463          printf("** Failed to get %d bytes of memory for pcre object\n",
3464            (int)true_size);
3465          yield = 1;
3466          goto EXIT;
3467          }
3468        regex_gotten_store = first_gotten_store;
3469    
3470        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3471    
3472        magic = REAL_PCRE_MAGIC(re);
3473        if (magic != MAGIC_NUMBER)
3474          {
3475          if (swap_uint32(magic) == MAGIC_NUMBER)
3476            {
3477            do_flip = 1;
3478            }
3479          else
3480            {
3481            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3482            new_free(re);
3483            fclose(f);
3484            continue;
3485            }
3486          }
3487    
3488        /* We hide the byte-invert info for little and big endian tests. */
3489        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3490          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3491    
3492        /* Now see if there is any following study data. */
3493    
3494        if (true_study_size != 0)
3495          {
3496          pcre_study_data *psd;
3497    
3498          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3499          extra->flags = PCRE_EXTRA_STUDY_DATA;
3500    
3501          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3502          extra->study_data = psd;
3503    
3504          if (fread(psd, 1, true_study_size, f) != true_study_size)
3505            {
3506            FAIL_READ:
3507            fprintf(outfile, "Failed to read data from %s\n", p);
3508            if (extra != NULL)
3509              {
3510              PCRE_FREE_STUDY(extra);
3511              }
3512            new_free(re);
3513            fclose(f);
3514            continue;
3515            }
3516          fprintf(outfile, "Study data loaded from %s\n", p);
3517          do_study = 1;     /* To get the data output if requested */
3518          }
3519        else fprintf(outfile, "No study data\n");
3520    
3521        /* Flip the necessary bytes. */
3522        if (do_flip)
3523          {
3524          int rc;
3525          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3526          if (rc == PCRE_ERROR_BADMODE)
3527            {
3528            pcre_uint16 flags_in_host_byte_order;
3529            if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3530              flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3531            else
3532              flags_in_host_byte_order = swap_uint16(REAL_PCRE_FLAGS(re));
3533            /* Simulate the result of the function call below. */
3534            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3535              pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3536              PCRE_INFO_OPTIONS);
3537            fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3538              "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3539            new_free(re);
3540            fclose(f);
3541            continue;
3542            }
3543          }
3544    
3545        /* Need to know if UTF-8 for printing data strings. */
3546    
3547        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3548          {
3549          new_free(re);
3550          fclose(f);
3551          continue;
3552          }
3553        use_utf = (get_options & PCRE_UTF8) != 0;
3554    
3555        fclose(f);
3556        goto SHOW_INFO;
3557        }
3558    
3559      /* In-line pattern (the usual case). Get the delimiter and seek the end of
3560      the pattern; if it isn't complete, read more. */
3561    
3562    delimiter = *p++;    delimiter = *p++;
3563    
3564    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
3565      {      {
3566      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3567      goto SKIP_DATA;      goto SKIP_DATA;
3568      }      }
3569    
3570    pp = p;    pp = p;
3571      poffset = (int)(p - buffer);
3572    
3573    for(;;)    for(;;)
3574      {      {
# Line 435  while (!done) Line 3579  while (!done)
3579        pp++;        pp++;
3580        }        }
3581      if (*pp != 0) break;      if (*pp != 0) break;
3582        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
3583        {        {
3584        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
3585        done = 1;        done = 1;
# Line 453  while (!done) Line 3588  while (!done)
3588      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3589      }      }
3590    
3591      /* The buffer may have moved while being extended; reset the start of data
3592      pointer to the correct relative point in the buffer. */
3593    
3594      p = buffer + poffset;
3595    
3596    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
3597    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
3598    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
3599    
3600    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
3601    
3602    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
3603      for callouts. */
3604    
3605    *pp++ = 0;    *pp++ = 0;
3606      strcpy((char *)pbuffer, (char *)p);
3607    
3608    /* Look for options after final delimiter */    /* Look for options after final delimiter */
3609    
3610    options = 0;    options = 0;
3611    study_options = 0;    study_options = force_study_options;
3612    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
3613    
3614    while (*pp != 0)    while (*pp != 0)
3615      {      {
3616      switch (*pp++)      switch (*pp++)
3617        {        {
3618          case 'f': options |= PCRE_FIRSTLINE; break;
3619        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
3620        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
3621        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
3622        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
3623        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
3624    
3625        case '+': do_showrest = 1; break;        case '+':
3626          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3627          break;
3628    
3629          case '=': do_allcaps =