/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 835 by ph10, Wed Dec 28 16:10:09 2011 UTC revision 1313 by ph10, Wed Apr 24 12:07:09 2013 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of all of the 8-bit, 16-bit, and
40    32-bit PCRE libraries in a single program. This is different from the modules
41    such as pcre_compile.c in the library itself, which are compiled separately for
42    each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43    twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44    make use of any of the macros from pcre_internal.h that depend on
45    COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46    SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47    supported library functions. */
48    
49  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
50  #include "config.h"  #include "config.h"
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 58  POSSIBILITY OF SUCH DAMAGE.
58  #include <locale.h>  #include <locale.h>
59  #include <errno.h>  #include <errno.h>
60    
61  #ifdef SUPPORT_LIBREADLINE  /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67  #ifdef HAVE_UNISTD_H  #ifdef HAVE_UNISTD_H
68  #include <unistd.h>  #include <unistd.h>
69  #endif  #endif
70    #if defined(SUPPORT_LIBREADLINE)
71  #include <readline/readline.h>  #include <readline/readline.h>
72  #include <readline/history.h>  #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80  #endif  #endif
   
81    
82  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
83  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 90  input mode under Windows. */ Line 112  input mode under Windows. */
112  #else  #else
113  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
114  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
115    #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
116    #define INPUT_MODE   "r"
117    #define OUTPUT_MODE  "w"
118    #else
119  #define INPUT_MODE   "rb"  #define INPUT_MODE   "rb"
120  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
121  #endif  #endif
122    #endif
123    
124    #ifdef __VMS
125    #include <ssdef.h>
126    void vms_setsymbol( char *, char *, int );
127    #endif
128    
129    
130    #define PRIV(name) name
131    
132  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
133  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
134  internal macros, structures, and other internal data values; pcretest has  internal macros, structures, and other internal data values; pcretest has
# Line 107  appropriately for an application, not fo Line 141  appropriately for an application, not fo
141  #include "pcre.h"  #include "pcre.h"
142  #include "pcre_internal.h"  #include "pcre_internal.h"
143    
144    /* The pcre_printint() function, which prints the internal form of a compiled
145    regex, is held in a separate file so that (a) it can be compiled in either
146    8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
147    when that is compiled in debug mode. */
148    
149    #ifdef SUPPORT_PCRE8
150    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151    #endif
152    #ifdef SUPPORT_PCRE16
153    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154    #endif
155    #ifdef SUPPORT_PCRE32
156    void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
157    #endif
158    
159  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
160  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source files here, changing the names of the
161  external symbols to prevent clashes. */  external symbols to prevent clashes. */
162    
163  #define _pcre_ucp_gentype      ucp_gentype  #define PCRE_INCLUDED
 #define _pcre_ucp_typerange    ucp_typerange  
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utf8_char_sizes  utf8_char_sizes  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
164    
165  #include "pcre_tables.c"  #include "pcre_tables.c"
166    #include "pcre_ucd.c"
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
167    
168  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
169  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
170  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
171  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
172  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
173    
174    #ifdef EBCDIC
175    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
176    #else
177    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
178    #endif
179    
180    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
181    
182  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  /* Posix support is disabled in 16 or 32 bit only mode. */
183    #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
184    #define NOPOSIX
185    #endif
186    
187  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
188  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 150  Makefile. */ Line 192  Makefile. */
192  #include "pcreposix.h"  #include "pcreposix.h"
193  #endif  #endif
194    
195  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
196  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
197  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
198  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
199  UTF8 support if PCRE is built without it. */  
200    #ifndef SUPPORT_UTF
201  #ifndef SUPPORT_UTF8  #ifndef NOUTF
202  #ifndef NOUTF8  #define NOUTF
203  #define NOUTF8  #endif
204    #endif
205    
206    /* To make the code a bit tidier for 8/16/32-bit support, we define macros
207    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
208    only from one place and is handled differently). I couldn't dream up any way of
209    using a single macro to do this in a generic way, because of the many different
210    argument requirements. We know that at least one of SUPPORT_PCRE8 and
211    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
212    use these in the definitions of generic macros.
213    
214    **** Special note about the PCHARSxxx macros: the address of the string to be
215    printed is always given as two arguments: a base address followed by an offset.
216    The base address is cast to the correct data size for 8 or 16 bit data; the
217    offset is in units of this size. If the string were given as base+offset in one
218    argument, the casting might be incorrectly applied. */
219    
220    #ifdef SUPPORT_PCRE8
221    
222    #define PCHARS8(lv, p, offset, len, f) \
223      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
224    
225    #define PCHARSV8(p, offset, len, f) \
226      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
227    
228    #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
229      p = read_capture_name8(p, cn8, re)
230    
231    #define STRLEN8(p) ((int)strlen((char *)p))
232    
233    #define SET_PCRE_CALLOUT8(callout) \
234      pcre_callout = callout
235    
236    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
237       pcre_assign_jit_stack(extra, callback, userdata)
238    
239    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
240      re = pcre_compile((char *)pat, options, error, erroffset, tables)
241    
242    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
243        namesptr, cbuffer, size) \
244      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
245        (char *)namesptr, cbuffer, size)
246    
247    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
248      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
249    
250    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
251        offsets, size_offsets, workspace, size_workspace) \
252      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
253        offsets, size_offsets, workspace, size_workspace)
254    
255    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
256        offsets, size_offsets) \
257      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
258        offsets, size_offsets)
259    
260    #define PCRE_FREE_STUDY8(extra) \
261      pcre_free_study(extra)
262    
263    #define PCRE_FREE_SUBSTRING8(substring) \
264      pcre_free_substring(substring)
265    
266    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
267      pcre_free_substring_list(listptr)
268    
269    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
270        getnamesptr, subsptr) \
271      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
272        (char *)getnamesptr, subsptr)
273    
274    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
275      n = pcre_get_stringnumber(re, (char *)ptr)
276    
277    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
278      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
279    
280    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
281      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
282    
283    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
284      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
285    
286    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
287      pcre_printint(re, outfile, debug_lengths)
288    
289    #define PCRE_STUDY8(extra, re, options, error) \
290      extra = pcre_study(re, options, error)
291    
292    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
293      pcre_jit_stack_alloc(startsize, maxsize)
294    
295    #define PCRE_JIT_STACK_FREE8(stack) \
296      pcre_jit_stack_free(stack)
297    
298    #define pcre8_maketables pcre_maketables
299    
300    #endif /* SUPPORT_PCRE8 */
301    
302    /* -----------------------------------------------------------*/
303    
304    #ifdef SUPPORT_PCRE16
305    
306    #define PCHARS16(lv, p, offset, len, f) \
307      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
308    
309    #define PCHARSV16(p, offset, len, f) \
310      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
311    
312    #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
313      p = read_capture_name16(p, cn16, re)
314    
315    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
316    
317    #define SET_PCRE_CALLOUT16(callout) \
318      pcre16_callout = (int (*)(pcre16_callout_block *))callout
319    
320    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
321      pcre16_assign_jit_stack((pcre16_extra *)extra, \
322        (pcre16_jit_callback)callback, userdata)
323    
324    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
325      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
326        tables)
327    
328    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
329        namesptr, cbuffer, size) \
330      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
331        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
332    
333    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
334      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
335        (PCRE_UCHAR16 *)cbuffer, size/2)
336    
337    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338        offsets, size_offsets, workspace, size_workspace) \
339      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
340        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
341        workspace, size_workspace)
342    
343    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344        offsets, size_offsets) \
345      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
346        len, start_offset, options, offsets, size_offsets)
347    
348    #define PCRE_FREE_STUDY16(extra) \
349      pcre16_free_study((pcre16_extra *)extra)
350    
351    #define PCRE_FREE_SUBSTRING16(substring) \
352      pcre16_free_substring((PCRE_SPTR16)substring)
353    
354    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
355      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
356    
357    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
358        getnamesptr, subsptr) \
359      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
360        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
361    
362    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
363      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
364    
365    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
366      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
367        (PCRE_SPTR16 *)(void*)subsptr)
368    
369    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
370      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
371        (PCRE_SPTR16 **)(void*)listptr)
372    
373    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
374      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
375        tables)
376    
377    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
378      pcre16_printint(re, outfile, debug_lengths)
379    
380    #define PCRE_STUDY16(extra, re, options, error) \
381      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
382    
383    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
384      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
385    
386    #define PCRE_JIT_STACK_FREE16(stack) \
387      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
388    
389    #endif /* SUPPORT_PCRE16 */
390    
391    /* -----------------------------------------------------------*/
392    
393    #ifdef SUPPORT_PCRE32
394    
395    #define PCHARS32(lv, p, offset, len, f) \
396      lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
397    
398    #define PCHARSV32(p, offset, len, f)                \
399      (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
400    
401    #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
402      p = read_capture_name32(p, cn32, re)
403    
404    #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
405    
406    #define SET_PCRE_CALLOUT32(callout) \
407      pcre32_callout = (int (*)(pcre32_callout_block *))callout
408    
409    #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
410      pcre32_assign_jit_stack((pcre32_extra *)extra, \
411        (pcre32_jit_callback)callback, userdata)
412    
413    #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
414      re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
415        tables)
416    
417    #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
418        namesptr, cbuffer, size) \
419      rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
420        count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
421    
422    #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
423      rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
424        (PCRE_UCHAR32 *)cbuffer, size/2)
425    
426    #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
427        offsets, size_offsets, workspace, size_workspace) \
428      count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
429        (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
430        workspace, size_workspace)
431    
432    #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433        offsets, size_offsets) \
434      count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
435        len, start_offset, options, offsets, size_offsets)
436    
437    #define PCRE_FREE_STUDY32(extra) \
438      pcre32_free_study((pcre32_extra *)extra)
439    
440    #define PCRE_FREE_SUBSTRING32(substring) \
441      pcre32_free_substring((PCRE_SPTR32)substring)
442    
443    #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
444      pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
445    
446    #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
447        getnamesptr, subsptr) \
448      rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
449        count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
450    
451    #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
452      n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
453    
454    #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
455      rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
456        (PCRE_SPTR32 *)(void*)subsptr)
457    
458    #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
459      rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
460        (PCRE_SPTR32 **)(void*)listptr)
461    
462    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
463      rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
464        tables)
465    
466    #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
467      pcre32_printint(re, outfile, debug_lengths)
468    
469    #define PCRE_STUDY32(extra, re, options, error) \
470      extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
471    
472    #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
473      (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
474    
475    #define PCRE_JIT_STACK_FREE32(stack) \
476      pcre32_jit_stack_free((pcre32_jit_stack *)stack)
477    
478    #endif /* SUPPORT_PCRE32 */
479    
480    
481    /* ----- More than one mode is supported; a runtime test is needed, except for
482    pcre_config(), and the JIT stack functions, when it doesn't matter which
483    available version is called. ----- */
484    
485    enum {
486      PCRE8_MODE,
487      PCRE16_MODE,
488      PCRE32_MODE
489    };
490    
491    #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
492         defined (SUPPORT_PCRE32)) >= 2
493    
494    #define CHAR_SIZE (1 << pcre_mode)
495    
496    /* There doesn't seem to be an easy way of writing these macros that can cope
497    with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
498    cases separately. */
499    
500    /* ----- All three modes supported ----- */
501    
502    #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
503    
504    #define PCHARS(lv, p, offset, len, f) \
505      if (pcre_mode == PCRE32_MODE) \
506        PCHARS32(lv, p, offset, len, f); \
507      else if (pcre_mode == PCRE16_MODE) \
508        PCHARS16(lv, p, offset, len, f); \
509      else \
510        PCHARS8(lv, p, offset, len, f)
511    
512    #define PCHARSV(p, offset, len, f) \
513      if (pcre_mode == PCRE32_MODE) \
514        PCHARSV32(p, offset, len, f); \
515      else if (pcre_mode == PCRE16_MODE) \
516        PCHARSV16(p, offset, len, f); \
517      else \
518        PCHARSV8(p, offset, len, f)
519    
520    #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
521      if (pcre_mode == PCRE32_MODE) \
522        READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
523      else if (pcre_mode == PCRE16_MODE) \
524        READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
525      else \
526        READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
527    
528    #define SET_PCRE_CALLOUT(callout) \
529      if (pcre_mode == PCRE32_MODE) \
530        SET_PCRE_CALLOUT32(callout); \
531      else if (pcre_mode == PCRE16_MODE) \
532        SET_PCRE_CALLOUT16(callout); \
533      else \
534        SET_PCRE_CALLOUT8(callout)
535    
536    #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
537    
538    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
539      if (pcre_mode == PCRE32_MODE) \
540        PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
541      else if (pcre_mode == PCRE16_MODE) \
542        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
543      else \
544        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
545    
546    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
547      if (pcre_mode == PCRE32_MODE) \
548        PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
549      else if (pcre_mode == PCRE16_MODE) \
550        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
551      else \
552        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
553    
554    #define PCRE_CONFIG pcre_config
555    
556    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
557        namesptr, cbuffer, size) \
558      if (pcre_mode == PCRE32_MODE) \
559        PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
560          namesptr, cbuffer, size); \
561      else if (pcre_mode == PCRE16_MODE) \
562        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
563          namesptr, cbuffer, size); \
564      else \
565        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
566          namesptr, cbuffer, size)
567    
568    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
569      if (pcre_mode == PCRE32_MODE) \
570        PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
571      else if (pcre_mode == PCRE16_MODE) \
572        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
573      else \
574        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
575    
576    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
577        offsets, size_offsets, workspace, size_workspace) \
578      if (pcre_mode == PCRE32_MODE) \
579        PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
580          offsets, size_offsets, workspace, size_workspace); \
581      else if (pcre_mode == PCRE16_MODE) \
582        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
583          offsets, size_offsets, workspace, size_workspace); \
584      else \
585        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
586          offsets, size_offsets, workspace, size_workspace)
587    
588    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
589        offsets, size_offsets) \
590      if (pcre_mode == PCRE32_MODE) \
591        PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
592          offsets, size_offsets); \
593      else if (pcre_mode == PCRE16_MODE) \
594        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
595          offsets, size_offsets); \
596      else \
597        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
598          offsets, size_offsets)
599    
600    #define PCRE_FREE_STUDY(extra) \
601      if (pcre_mode == PCRE32_MODE) \
602        PCRE_FREE_STUDY32(extra); \
603      else if (pcre_mode == PCRE16_MODE) \
604        PCRE_FREE_STUDY16(extra); \
605      else \
606        PCRE_FREE_STUDY8(extra)
607    
608    #define PCRE_FREE_SUBSTRING(substring) \
609      if (pcre_mode == PCRE32_MODE) \
610        PCRE_FREE_SUBSTRING32(substring); \
611      else if (pcre_mode == PCRE16_MODE) \
612        PCRE_FREE_SUBSTRING16(substring); \
613      else \
614        PCRE_FREE_SUBSTRING8(substring)
615    
616    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
617      if (pcre_mode == PCRE32_MODE) \
618        PCRE_FREE_SUBSTRING_LIST32(listptr); \
619      else if (pcre_mode == PCRE16_MODE) \
620        PCRE_FREE_SUBSTRING_LIST16(listptr); \
621      else \
622        PCRE_FREE_SUBSTRING_LIST8(listptr)
623    
624    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
625        getnamesptr, subsptr) \
626      if (pcre_mode == PCRE32_MODE) \
627        PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
628          getnamesptr, subsptr); \
629      else if (pcre_mode == PCRE16_MODE) \
630        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
631          getnamesptr, subsptr); \
632      else \
633        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
634          getnamesptr, subsptr)
635    
636    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
637      if (pcre_mode == PCRE32_MODE) \
638        PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
639      else if (pcre_mode == PCRE16_MODE) \
640        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
641      else \
642        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
643    
644    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
645      if (pcre_mode == PCRE32_MODE) \
646        PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
647      else if (pcre_mode == PCRE16_MODE) \
648        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
649      else \
650        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
651    
652    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
653      if (pcre_mode == PCRE32_MODE) \
654        PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
655      else if (pcre_mode == PCRE16_MODE) \
656        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
657      else \
658        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
659    
660    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
661      (pcre_mode == PCRE32_MODE ? \
662         PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
663        : pcre_mode == PCRE16_MODE ? \
664          PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
665          : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
666    
667    #define PCRE_JIT_STACK_FREE(stack) \
668      if (pcre_mode == PCRE32_MODE) \
669        PCRE_JIT_STACK_FREE32(stack); \
670      else if (pcre_mode == PCRE16_MODE) \
671        PCRE_JIT_STACK_FREE16(stack); \
672      else \
673        PCRE_JIT_STACK_FREE8(stack)
674    
675    #define PCRE_MAKETABLES \
676      (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
677    
678    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
679      if (pcre_mode == PCRE32_MODE) \
680        PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
681      else if (pcre_mode == PCRE16_MODE) \
682        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
683      else \
684        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
685    
686    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
687      if (pcre_mode == PCRE32_MODE) \
688        PCRE_PRINTINT32(re, outfile, debug_lengths); \
689      else if (pcre_mode == PCRE16_MODE) \
690        PCRE_PRINTINT16(re, outfile, debug_lengths); \
691      else \
692        PCRE_PRINTINT8(re, outfile, debug_lengths)
693    
694    #define PCRE_STUDY(extra, re, options, error) \
695      if (pcre_mode == PCRE32_MODE) \
696        PCRE_STUDY32(extra, re, options, error); \
697      else if (pcre_mode == PCRE16_MODE) \
698        PCRE_STUDY16(extra, re, options, error); \
699      else \
700        PCRE_STUDY8(extra, re, options, error)
701    
702    
703    /* ----- Two out of three modes are supported ----- */
704    
705    #else
706    
707    /* We can use some macro trickery to make a single set of definitions work in
708    the three different cases. */
709    
710    /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
711    
712    #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
713    #define BITONE 32
714    #define BITTWO 16
715    
716    /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
717    
718    #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
719    #define BITONE 32
720    #define BITTWO 8
721    
722    /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
723    
724    #else
725    #define BITONE 16
726    #define BITTWO 8
727  #endif  #endif
728    
729    #define glue(a,b) a##b
730    #define G(a,b) glue(a,b)
731    
732    
733    /* ----- Common macros for two-mode cases ----- */
734    
735    #define PCHARS(lv, p, offset, len, f) \
736      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
737        G(PCHARS,BITONE)(lv, p, offset, len, f); \
738      else \
739        G(PCHARS,BITTWO)(lv, p, offset, len, f)
740    
741    #define PCHARSV(p, offset, len, f) \
742      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
743        G(PCHARSV,BITONE)(p, offset, len, f); \
744      else \
745        G(PCHARSV,BITTWO)(p, offset, len, f)
746    
747    #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
748      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
749        G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
750      else \
751        G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
752    
753    #define SET_PCRE_CALLOUT(callout) \
754      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
755        G(SET_PCRE_CALLOUT,BITONE)(callout); \
756      else \
757        G(SET_PCRE_CALLOUT,BITTWO)(callout)
758    
759    #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
760      G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
761    
762    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
763      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
764        G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
765      else \
766        G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
767    
768    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
769      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
770        G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
771      else \
772        G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
773    
774    #define PCRE_CONFIG G(G(pcre,BITONE),_config)
775    
776    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
777        namesptr, cbuffer, size) \
778      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
779        G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
780          namesptr, cbuffer, size); \
781      else \
782        G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
783          namesptr, cbuffer, size)
784    
785    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
786      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787        G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
788      else \
789        G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
790    
791    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
792        offsets, size_offsets, workspace, size_workspace) \
793      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
794        G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
795          offsets, size_offsets, workspace, size_workspace); \
796      else \
797        G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
798          offsets, size_offsets, workspace, size_workspace)
799    
800    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
801        offsets, size_offsets) \
802      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
803        G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
804          offsets, size_offsets); \
805      else \
806        G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
807          offsets, size_offsets)
808    
809    #define PCRE_FREE_STUDY(extra) \
810      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811        G(PCRE_FREE_STUDY,BITONE)(extra); \
812      else \
813        G(PCRE_FREE_STUDY,BITTWO)(extra)
814    
815    #define PCRE_FREE_SUBSTRING(substring) \
816      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817        G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
818      else \
819        G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
820    
821    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
822      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
823        G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
824      else \
825        G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
826    
827    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
828        getnamesptr, subsptr) \
829      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
830        G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
831          getnamesptr, subsptr); \
832      else \
833        G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
834          getnamesptr, subsptr)
835    
836    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
837      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838        G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
839      else \
840        G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
841    
842    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
843      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
844        G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
845      else \
846        G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
847    
848    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
849      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
850        G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
851      else \
852        G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
853    
854    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
855      (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
856         G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
857        : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
858    
859    #define PCRE_JIT_STACK_FREE(stack) \
860      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
861        G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
862      else \
863        G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
864    
865    #define PCRE_MAKETABLES \
866      (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
867        G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
868    
869    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
870      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
871        G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
872      else \
873        G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
874    
875    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
876      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
877        G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
878      else \
879        G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
880    
881    #define PCRE_STUDY(extra, re, options, error) \
882      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
883        G(PCRE_STUDY,BITONE)(extra, re, options, error); \
884      else \
885        G(PCRE_STUDY,BITTWO)(extra, re, options, error)
886    
887    #endif  /* Two out of three modes */
888    
889    /* ----- End of cases where more than one mode is supported ----- */
890    
891    
892    /* ----- Only 8-bit mode is supported ----- */
893    
894    #elif defined SUPPORT_PCRE8
895    #define CHAR_SIZE                 1
896    #define PCHARS                    PCHARS8
897    #define PCHARSV                   PCHARSV8
898    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
899    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
900    #define STRLEN                    STRLEN8
901    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
902    #define PCRE_COMPILE              PCRE_COMPILE8
903    #define PCRE_CONFIG               pcre_config
904    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
905    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
906    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
907    #define PCRE_EXEC                 PCRE_EXEC8
908    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
909    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
910    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
911    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
912    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
913    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
914    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
915    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
916    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
917    #define PCRE_MAKETABLES           pcre_maketables()
918    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
919    #define PCRE_PRINTINT             PCRE_PRINTINT8
920    #define PCRE_STUDY                PCRE_STUDY8
921    
922    /* ----- Only 16-bit mode is supported ----- */
923    
924    #elif defined SUPPORT_PCRE16
925    #define CHAR_SIZE                 2
926    #define PCHARS                    PCHARS16
927    #define PCHARSV                   PCHARSV16
928    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
929    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
930    #define STRLEN                    STRLEN16
931    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
932    #define PCRE_COMPILE              PCRE_COMPILE16
933    #define PCRE_CONFIG               pcre16_config
934    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
935    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
936    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
937    #define PCRE_EXEC                 PCRE_EXEC16
938    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
939    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
940    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
941    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
942    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
943    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
944    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
945    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
946    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
947    #define PCRE_MAKETABLES           pcre16_maketables()
948    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
949    #define PCRE_PRINTINT             PCRE_PRINTINT16
950    #define PCRE_STUDY                PCRE_STUDY16
951    
952    /* ----- Only 32-bit mode is supported ----- */
953    
954    #elif defined SUPPORT_PCRE32
955    #define CHAR_SIZE                 4
956    #define PCHARS                    PCHARS32
957    #define PCHARSV                   PCHARSV32
958    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME32
959    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT32
960    #define STRLEN                    STRLEN32
961    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK32
962    #define PCRE_COMPILE              PCRE_COMPILE32
963    #define PCRE_CONFIG               pcre32_config
964    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
965    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING32
966    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC32
967    #define PCRE_EXEC                 PCRE_EXEC32
968    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY32
969    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING32
970    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST32
971    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING32
972    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER32
973    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING32
974    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST32
975    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC32
976    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE32
977    #define PCRE_MAKETABLES           pcre32_maketables()
978    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
979    #define PCRE_PRINTINT             PCRE_PRINTINT32
980    #define PCRE_STUDY                PCRE_STUDY32
981    
982  #endif  #endif
983    
984    /* ----- End of mode-specific function call macros ----- */
985    
986    
987  /* Other parameters */  /* Other parameters */
988    
# Line 173  UTF8 support if PCRE is built without it Line 994  UTF8 support if PCRE is built without it
994  #endif  #endif
995  #endif  #endif
996    
997    #if !defined NODFA
998    #define DFA_WS_DIMENSION 1000
999    #endif
1000    
1001  /* This is the default loop count for timing. */  /* This is the default loop count for timing. */
1002    
1003  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
# Line 187  static int callout_fail_count; Line 1012  static int callout_fail_count;
1012  static int callout_fail_id;  static int callout_fail_id;
1013  static int debug_lengths;  static int debug_lengths;
1014  static int first_callout;  static int first_callout;
1015    static int jit_was_used;
1016  static int locale_set = 0;  static int locale_set = 0;
1017  static int show_malloc;  static int show_malloc;
1018  static int use_utf8;  static int use_utf;
1019  static size_t gotten_store;  static size_t gotten_store;
1020    static size_t first_gotten_store = 0;
1021  static const unsigned char *last_callout_mark = NULL;  static const unsigned char *last_callout_mark = NULL;
1022    
1023  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
1024    
1025  static int buffer_size = 50000;  static int buffer_size = 50000;
1026  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
1027  static uschar *dbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
1028  static uschar *pbuffer = NULL;  
1029    /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1030    
1031    #ifdef COMPILE_PCRE16
1032    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1033    #endif
1034    
1035    #ifdef COMPILE_PCRE32
1036    #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1037    #endif
1038    
1039    /* We need buffers for building 16/32-bit strings, and the tables of operator
1040    lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1041    pattern for saving/reloading testing. Luckily, the data for these tables is
1042    defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1043    are used in the tables) are adjusted appropriately for the 16/32-bit world.
1044    LINK_SIZE is also used later in this program. */
1045    
1046    #ifdef SUPPORT_PCRE16
1047    #undef IMM2_SIZE
1048    #define IMM2_SIZE 1
1049    
1050    #if LINK_SIZE == 2
1051    #undef LINK_SIZE
1052    #define LINK_SIZE 1
1053    #elif LINK_SIZE == 3 || LINK_SIZE == 4
1054    #undef LINK_SIZE
1055    #define LINK_SIZE 2
1056    #else
1057    #error LINK_SIZE must be either 2, 3, or 4
1058    #endif
1059    
1060    static int buffer16_size = 0;
1061    static pcre_uint16 *buffer16 = NULL;
1062    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1063    #endif  /* SUPPORT_PCRE16 */
1064    
1065    #ifdef SUPPORT_PCRE32
1066    #undef IMM2_SIZE
1067    #define IMM2_SIZE 1
1068    #undef LINK_SIZE
1069    #define LINK_SIZE 1
1070    
1071    static int buffer32_size = 0;
1072    static pcre_uint32 *buffer32 = NULL;
1073    static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1074    #endif  /* SUPPORT_PCRE32 */
1075    
1076    /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1077    support, it can be changed by an option. If there is no 8-bit support, there
1078    must be 16-or 32-bit support, so default it to 1. */
1079    
1080    #if defined SUPPORT_PCRE8
1081    static int pcre_mode = PCRE8_MODE;
1082    #elif defined SUPPORT_PCRE16
1083    static int pcre_mode = PCRE16_MODE;
1084    #elif defined SUPPORT_PCRE32
1085    static int pcre_mode = PCRE32_MODE;
1086    #endif
1087    
1088    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1089    
1090    static int jit_study_bits[] =
1091      {
1092      PCRE_STUDY_JIT_COMPILE,
1093      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1094      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1095      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1096      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1097      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1098      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1099        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1100    };
1101    
1102    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1103      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1104    
1105  /* Textual explanations for runtime error codes */  /* Textual explanations for runtime error codes */
1106    
# Line 213  static const char *errtexts[] = { Line 1115  static const char *errtexts[] = {
1115    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
1116    "match limit exceeded",    "match limit exceeded",
1117    "callout error code",    "callout error code",
1118    NULL,  /* BADUTF8 is handled specially */    NULL,  /* BADUTF8/16 is handled specially */
1119    "bad UTF-8 offset",    NULL,  /* BADUTF8/16 offset is handled specially */
1120    NULL,  /* PARTIAL is handled specially */    NULL,  /* PARTIAL is handled specially */
1121    "not used - internal error",    "not used - internal error",
1122    "internal error - pattern overwritten?",    "internal error - pattern overwritten?",
# Line 228  static const char *errtexts[] = { Line 1130  static const char *errtexts[] = {
1130    "not used - internal error",    "not used - internal error",
1131    "invalid combination of newline options",    "invalid combination of newline options",
1132    "bad offset value",    "bad offset value",
1133    NULL,  /* SHORTUTF8 is handled specially */    NULL,  /* SHORTUTF8/16 is handled specially */
1134    "nested recursion at the same subject position",    "nested recursion at the same subject position",
1135    "JIT stack limit reached"    "JIT stack limit reached",
1136      "pattern compiled in wrong mode: 8-bit/16-bit error",
1137      "pattern compiled with other endianness",
1138      "invalid data in workspace for DFA restart",
1139      "bad JIT option",
1140      "bad length"
1141  };  };
1142    
1143    
# Line 246  the L (locale) option also adjusts the t Line 1153  the L (locale) option also adjusts the t
1153  /* This is the set of tables distributed as default with PCRE. It recognizes  /* This is the set of tables distributed as default with PCRE. It recognizes
1154  only ASCII characters. */  only ASCII characters. */
1155    
1156  static const unsigned char tables0[] = {  static const pcre_uint8 tables0[] = {
1157    
1158  /* This table is a lower casing table. */  /* This table is a lower casing table. */
1159    
# Line 419  graph, print, punct, and cntrl. Other cl Line 1326  graph, print, punct, and cntrl. Other cl
1326  be at least an approximation of ISO 8859. In particular, there are characters  be at least an approximation of ISO 8859. In particular, there are characters
1327  greater than 128 that are marked as spaces, letters, etc. */  greater than 128 that are marked as spaces, letters, etc. */
1328    
1329  static const unsigned char tables1[] = {  static const pcre_uint8 tables1[] = {
1330  0,1,2,3,4,5,6,7,  0,1,2,3,4,5,6,7,
1331  8,9,10,11,12,13,14,15,  8,9,10,11,12,13,14,15,
1332  16,17,18,19,20,21,22,23,  16,17,18,19,20,21,22,23,
# Line 582  return sys_errlist[n]; Line 1489  return sys_errlist[n];
1489  #endif /* HAVE_STRERROR */  #endif /* HAVE_STRERROR */
1490    
1491    
1492    
1493    /*************************************************
1494    *       Print newline configuration              *
1495    *************************************************/
1496    
1497    /*
1498    Arguments:
1499      rc         the return code from PCRE_CONFIG_NEWLINE
1500      isc        TRUE if called from "-C newline"
1501    Returns:     nothing
1502    */
1503    
1504    static void
1505    print_newline_config(int rc, BOOL isc)
1506    {
1507    const char *s = NULL;
1508    if (!isc) printf("  Newline sequence is ");
1509    switch(rc)
1510      {
1511      case CHAR_CR: s = "CR"; break;
1512      case CHAR_LF: s = "LF"; break;
1513      case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1514      case -1: s = "ANY"; break;
1515      case -2: s = "ANYCRLF"; break;
1516    
1517      default:
1518      printf("a non-standard value: 0x%04x\n", rc);
1519      return;
1520      }
1521    
1522    printf("%s\n", s);
1523    }
1524    
1525    
1526    
1527  /*************************************************  /*************************************************
1528  *         JIT memory callback                    *  *         JIT memory callback                    *
1529  *************************************************/  *************************************************/
1530    
1531  static pcre_jit_stack* jit_callback(void *arg)  static pcre_jit_stack* jit_callback(void *arg)
1532  {  {
1533    jit_was_used = TRUE;
1534  return (pcre_jit_stack *)arg;  return (pcre_jit_stack *)arg;
1535  }  }
1536    
1537    
1538    #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1539  /*************************************************  /*************************************************
1540  *        Read or extend an input line            *  *            Convert UTF-8 string to value       *
1541  *************************************************/  *************************************************/
1542    
1543  /* Input lines are read into buffer, but both patterns and data lines can be  /* This function takes one or more bytes that represents a UTF-8 character,
1544  continued over multiple input lines. In addition, if the buffer fills up, we  and returns the value of the character.
 want to automatically expand it so as to be able to handle extremely large  
 lines that are needed for certain stress tests. When the input buffer is  
 expanded, the other two buffers must also be expanded likewise, and the  
 contents of pbuffer, which are a copy of the input for callouts, must be  
 preserved (for when expansion happens for a data line). This is not the most  
 optimal way of handling this, but hey, this is just a test program!  
1545    
1546  Arguments:  Argument:
1547    f            the file to read    utf8bytes   a pointer to the byte vector
1548    start        where in buffer to start (this *must* be within buffer)    vptr        a pointer to an int to receive the value
   prompt       for stdin or readline()  
1549    
1550  Returns:       pointer to the start of new data  Returns:      >  0 => the number of bytes consumed
1551                 could be a copy of start, or could be moved                -6 to 0 => malformed UTF-8 character at offset = (-return)
                NULL if no data read and EOF reached  
1552  */  */
1553    
1554  static uschar *  static int
1555  extend_inputline(FILE *f, uschar *start, const char *prompt)  utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1556  {  {
1557  uschar *here = start;  pcre_uint32 c = *utf8bytes++;
1558    pcre_uint32 d = c;
1559    int i, j, s;
1560    
1561  for (;;)  for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1562    {    {
1563    int rlen = (int)(buffer_size - (here - buffer));    if ((d & 0x80) == 0) break;
1564      d <<= 1;
1565      }
1566    
1567    if (rlen > 1000)  if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1568      {  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
     int dlen;  
1569    
1570      /* If libreadline support is required, use readline() to read a line if the  /* i now has a value in the range 1-5 */
     input is a terminal. Note that readline() removes the trailing newline, so  
     we must put it back again, to be compatible with fgets(). */  
1571    
1572  #ifdef SUPPORT_LIBREADLINE  s = 6*i;
1573      if (isatty(fileno(f)))  d = (c & utf8_table3[i]) << s;
       {  
       size_t len;  
       char *s = readline(prompt);  
       if (s == NULL) return (here == start)? NULL : start;  
       len = strlen(s);  
       if (len > 0) add_history(s);  
       if (len > rlen - 1) len = rlen - 1;  
       memcpy(here, s, len);  
       here[len] = '\n';  
       here[len+1] = 0;  
       free(s);  
       }  
     else  
 #endif  
1574    
1575      /* Read the next line by normal means, prompting if the file is stdin. */  for (j = 0; j < i; j++)
1576      {
1577      c = *utf8bytes++;
1578      if ((c & 0xc0) != 0x80) return -(j+1);
1579      s -= 6;
1580      d |= (c & 0x3f) << s;
1581      }
1582    
1583        {  /* Check that encoding was the correct unique one */
       if (f == stdin) printf("%s", prompt);  
       if (fgets((char *)here, rlen,  f) == NULL)  
         return (here == start)? NULL : start;  
       }  
1584    
1585      dlen = (int)strlen((char *)here);  for (j = 0; j < utf8_table1_size; j++)
1586      if (dlen > 0 && here[dlen - 1] == '\n') return start;    if (d <= (pcre_uint32)utf8_table1[j]) break;
1587      here += dlen;  if (j != i) return -(i+1);
     }  
1588    
1589    else  /* Valid value */
     {  
     int new_buffer_size = 2*buffer_size;  
     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);  
     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);  
     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);  
1590    
1591      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)  *vptr = d;
1592        {  return i+1;
1593        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);  }
1594        exit(1);  #endif /* NOUTF || SUPPORT_PCRE16 */
       }  
1595    
     memcpy(new_buffer, buffer, buffer_size);  
     memcpy(new_pbuffer, pbuffer, buffer_size);  
1596    
     buffer_size = new_buffer_size;  
1597    
1598      start = new_buffer + (start - buffer);  #if defined SUPPORT_PCRE8 && !defined NOUTF
1599    /*************************************************
1600    *       Convert character value to UTF-8         *
1601    *************************************************/
1602    
1603    /* This function takes an integer value in the range 0 - 0x7fffffff
1604    and encodes it as a UTF-8 character in 0 to 6 bytes.
1605    
1606    Arguments:
1607      cvalue     the character value
1608      utf8bytes  pointer to buffer for result - at least 6 bytes long
1609    
1610    Returns:     number of characters placed in the buffer
1611    */
1612    
1613    static int
1614    ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1615    {
1616    register int i, j;
1617    if (cvalue > 0x7fffffffu)
1618      return -1;
1619    for (i = 0; i < utf8_table1_size; i++)
1620      if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1621    utf8bytes += i;
1622    for (j = i; j > 0; j--)
1623     {
1624     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1625     cvalue >>= 6;
1626     }
1627    *utf8bytes = utf8_table2[i] | cvalue;
1628    return i + 1;
1629    }
1630    #endif
1631    
1632    
1633    #ifdef SUPPORT_PCRE16
1634    /*************************************************
1635    *         Convert a string to 16-bit             *
1636    *************************************************/
1637    
1638    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1639    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1640    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1641    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1642    result is always left in buffer16.
1643    
1644    Note that this function does not object to surrogate values. This is
1645    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1646    for the purpose of testing that they are correctly faulted.
1647    
1648    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1649    in UTF-8 so that values greater than 255 can be handled.
1650    
1651    Arguments:
1652      data       TRUE if converting a data line; FALSE for a regex
1653      p          points to a byte string
1654      utf        true if UTF-8 (to be converted to UTF-16)
1655      len        number of bytes in the string (excluding trailing zero)
1656    
1657    Returns:     number of 16-bit data items used (excluding trailing zero)
1658                 OR -1 if a UTF-8 string is malformed
1659                 OR -2 if a value > 0x10ffff is encountered
1660                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1661    */
1662    
1663    static int
1664    to16(int data, pcre_uint8 *p, int utf, int len)
1665    {
1666    pcre_uint16 *pp;
1667    
1668    if (buffer16_size < 2*len + 2)
1669      {
1670      if (buffer16 != NULL) free(buffer16);
1671      buffer16_size = 2*len + 2;
1672      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1673      if (buffer16 == NULL)
1674        {
1675        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1676        exit(1);
1677        }
1678      }
1679    
1680    pp = buffer16;
1681    
1682    if (!utf && !data)
1683      {
1684      while (len-- > 0) *pp++ = *p++;
1685      }
1686    
1687    else
1688      {
1689      pcre_uint32 c = 0;
1690      while (len > 0)
1691        {
1692        int chlen = utf82ord(p, &c);
1693        if (chlen <= 0) return -1;
1694        if (c > 0x10ffff) return -2;
1695        p += chlen;
1696        len -= chlen;
1697        if (c < 0x10000) *pp++ = c; else
1698          {
1699          if (!utf) return -3;
1700          c -= 0x10000;
1701          *pp++ = 0xD800 | (c >> 10);
1702          *pp++ = 0xDC00 | (c & 0x3ff);
1703          }
1704        }
1705      }
1706    
1707    *pp = 0;
1708    return pp - buffer16;
1709    }
1710    #endif
1711    
1712    #ifdef SUPPORT_PCRE32
1713    /*************************************************
1714    *         Convert a string to 32-bit             *
1715    *************************************************/
1716    
1717    /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1718    8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1719    times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1720    in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1721    result is always left in buffer32.
1722    
1723    Note that this function does not object to surrogate values. This is
1724    deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1725    for the purpose of testing that they are correctly faulted.
1726    
1727    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1728    in UTF-8 so that values greater than 255 can be handled.
1729    
1730    Arguments:
1731      data       TRUE if converting a data line; FALSE for a regex
1732      p          points to a byte string
1733      utf        true if UTF-8 (to be converted to UTF-32)
1734      len        number of bytes in the string (excluding trailing zero)
1735    
1736    Returns:     number of 32-bit data items used (excluding trailing zero)
1737                 OR -1 if a UTF-8 string is malformed
1738                 OR -2 if a value > 0x10ffff is encountered
1739                 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1740    */
1741    
1742    static int
1743    to32(int data, pcre_uint8 *p, int utf, int len)
1744    {
1745    pcre_uint32 *pp;
1746    
1747    if (buffer32_size < 4*len + 4)
1748      {
1749      if (buffer32 != NULL) free(buffer32);
1750      buffer32_size = 4*len + 4;
1751      buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1752      if (buffer32 == NULL)
1753        {
1754        fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1755        exit(1);
1756        }
1757      }
1758    
1759    pp = buffer32;
1760    
1761    if (!utf && !data)
1762      {
1763      while (len-- > 0) *pp++ = *p++;
1764      }
1765    
1766    else
1767      {
1768      pcre_uint32 c = 0;
1769      while (len > 0)
1770        {
1771        int chlen = utf82ord(p, &c);
1772        if (chlen <= 0) return -1;
1773        if (utf)
1774          {
1775          if (c > 0x10ffff) return -2;
1776          if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1777          }
1778    
1779        p += chlen;
1780        len -= chlen;
1781        *pp++ = c;
1782        }
1783      }
1784    
1785    *pp = 0;
1786    return pp - buffer32;
1787    }
1788    
1789    /* Check that a 32-bit character string is valid UTF-32.
1790    
1791    Arguments:
1792      string       points to the string
1793      length       length of string, or -1 if the string is zero-terminated
1794    
1795    Returns:       TRUE  if the string is a valid UTF-32 string
1796                   FALSE otherwise
1797    */
1798    
1799    #ifdef NEVER   /* Not used */
1800    #ifdef SUPPORT_UTF
1801    static BOOL
1802    valid_utf32(pcre_uint32 *string, int length)
1803    {
1804    register pcre_uint32 *p;
1805    register pcre_uint32 c;
1806    
1807    for (p = string; length-- > 0; p++)
1808      {
1809      c = *p;
1810      if (c > 0x10ffffu) return FALSE;                 /* Too big */
1811      if ((c & 0xfffff800u) == 0xd800u) return FALSE;  /* Surrogate */
1812      }
1813    
1814    return TRUE;
1815    }
1816    #endif /* SUPPORT_UTF */
1817    #endif /* NEVER */
1818    #endif /* SUPPORT_PCRE32 */
1819    
1820    
1821    /*************************************************
1822    *        Read or extend an input line            *
1823    *************************************************/
1824    
1825    /* Input lines are read into buffer, but both patterns and data lines can be
1826    continued over multiple input lines. In addition, if the buffer fills up, we
1827    want to automatically expand it so as to be able to handle extremely large
1828    lines that are needed for certain stress tests. When the input buffer is
1829    expanded, the other two buffers must also be expanded likewise, and the
1830    contents of pbuffer, which are a copy of the input for callouts, must be
1831    preserved (for when expansion happens for a data line). This is not the most
1832    optimal way of handling this, but hey, this is just a test program!
1833    
1834    Arguments:
1835      f            the file to read
1836      start        where in buffer to start (this *must* be within buffer)
1837      prompt       for stdin or readline()
1838    
1839    Returns:       pointer to the start of new data
1840                   could be a copy of start, or could be moved
1841                   NULL if no data read and EOF reached
1842    */
1843    
1844    static pcre_uint8 *
1845    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1846    {
1847    pcre_uint8 *here = start;
1848    
1849    for (;;)
1850      {
1851      size_t rlen = (size_t)(buffer_size - (here - buffer));
1852    
1853      if (rlen > 1000)
1854        {
1855        int dlen;
1856    
1857        /* If libreadline or libedit support is required, use readline() to read a
1858        line if the input is a terminal. Note that readline() removes the trailing
1859        newline, so we must put it back again, to be compatible with fgets(). */
1860    
1861    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1862        if (isatty(fileno(f)))
1863          {
1864          size_t len;
1865          char *s = readline(prompt);
1866          if (s == NULL) return (here == start)? NULL : start;
1867          len = strlen(s);
1868          if (len > 0) add_history(s);
1869          if (len > rlen - 1) len = rlen - 1;
1870          memcpy(here, s, len);
1871          here[len] = '\n';
1872          here[len+1] = 0;
1873          free(s);
1874          }
1875        else
1876    #endif
1877    
1878        /* Read the next line by normal means, prompting if the file is stdin. */
1879    
1880          {
1881          if (f == stdin) printf("%s", prompt);
1882          if (fgets((char *)here, rlen,  f) == NULL)
1883            return (here == start)? NULL : start;
1884          }
1885    
1886        dlen = (int)strlen((char *)here);
1887        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1888        here += dlen;
1889        }
1890    
1891      else
1892        {
1893        int new_buffer_size = 2*buffer_size;
1894        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1895        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1896    
1897        if (new_buffer == NULL || new_pbuffer == NULL)
1898          {
1899          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1900          exit(1);
1901          }
1902    
1903        memcpy(new_buffer, buffer, buffer_size);
1904        memcpy(new_pbuffer, pbuffer, buffer_size);
1905    
1906        buffer_size = new_buffer_size;
1907    
1908        start = new_buffer + (start - buffer);
1909      here = new_buffer + (here - buffer);      here = new_buffer + (here - buffer);
1910    
1911      free(buffer);      free(buffer);
     free(dbuffer);  
1912      free(pbuffer);      free(pbuffer);
1913    
1914      buffer = new_buffer;      buffer = new_buffer;
     dbuffer = new_dbuffer;  
1915      pbuffer = new_pbuffer;      pbuffer = new_pbuffer;
1916      }      }
1917    }    }
# Line 698  return NULL;  /* Control never gets here Line 1921  return NULL;  /* Control never gets here
1921    
1922    
1923    
   
   
   
   
1924  /*************************************************  /*************************************************
1925  *          Read number from string               *  *          Read number from string               *
1926  *************************************************/  *************************************************/
# Line 718  Returns:        the unsigned long Line 1937  Returns:        the unsigned long
1937  */  */
1938    
1939  static int  static int
1940  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1941  {  {
1942  int result = 0;  int result = 0;
1943  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 729  return(result); Line 1948  return(result);
1948    
1949    
1950    
   
1951  /*************************************************  /*************************************************
1952  *            Convert UTF-8 string to value       *  *             Print one character                *
1953  *************************************************/  *************************************************/
1954    
1955  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
   
 Argument:  
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
   
 Returns:      >  0 => the number of bytes consumed  
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
1956    
1957  #if !defined NOUTF8  static int pchar(pcre_uint32 c, FILE *f)
   
 static int  
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1958  {  {
1959  int c = *utf8bytes++;  int n = 0;
1960  int d = c;  if (PRINTOK(c))
1961  int i, j, s;    {
1962      if (f != NULL) fprintf(f, "%c", c);
1963      return 1;
1964      }
1965    
1966  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  if (c < 0x100)
1967    {    {
1968    if ((d & 0x80) == 0) break;    if (use_utf)
1969    d <<= 1;      {
1970        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1971        return 6;
1972        }
1973      else
1974        {
1975        if (f != NULL) fprintf(f, "\\x%02x", c);
1976        return 4;
1977        }
1978    }    }
1979    
1980  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1981  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  return n >= 0 ? n : 0;
1982    }
1983    
 /* i now has a value in the range 1-5 */  
1984    
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
1985    
1986  for (j = 0; j < i; j++)  #ifdef SUPPORT_PCRE8
1987    {  /*************************************************
1988    c = *utf8bytes++;  *         Print 8-bit character string           *
1989    if ((c & 0xc0) != 0x80) return -(j+1);  *************************************************/
   s -= 6;  
   d |= (c & 0x3f) << s;  
   }  
1990    
1991  /* Check that encoding was the correct unique one */  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1992    If handed a NULL file, just counts chars without printing. */
1993    
1994  for (j = 0; j < utf8_table1_size; j++)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1995    if (d <= utf8_table1[j]) break;  {
1996  if (j != i) return -(i+1);  pcre_uint32 c = 0;
1997    int yield = 0;
1998    
1999  /* Valid value */  if (length < 0)
2000      length = strlen((char *)p);
2001    
2002  *vptr = d;  while (length-- > 0)
2003  return i+1;    {
2004  }  #if !defined NOUTF
2005      if (use_utf)
2006        {
2007        int rc = utf82ord(p, &c);
2008        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
2009          {
2010          length -= rc - 1;
2011          p += rc;
2012          yield += pchar(c, f);
2013          continue;
2014          }
2015        }
2016    #endif
2017      c = *p++;
2018      yield += pchar(c, f);
2019      }
2020    
2021    return yield;
2022    }
2023  #endif  #endif
2024    
2025    
2026    
2027    #ifdef SUPPORT_PCRE16
2028  /*************************************************  /*************************************************
2029  *       Convert character value to UTF-8         *  *    Find length of 0-terminated 16-bit string   *
2030  *************************************************/  *************************************************/
2031    
2032  /* This function takes an integer value in the range 0 - 0x7fffffff  static int strlen16(PCRE_SPTR16 p)
2033  and encodes it as a UTF-8 character in 0 to 6 bytes.  {
2034    int len = 0;
2035    while (*p++ != 0) len++;
2036    return len;
2037    }
2038    #endif  /* SUPPORT_PCRE16 */
2039    
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
2040    
 Returns:     number of characters placed in the buffer  
 */  
2041    
2042  #if !defined NOUTF8  #ifdef SUPPORT_PCRE32
2043    /*************************************************
2044    *    Find length of 0-terminated 32-bit string   *
2045    *************************************************/
2046    
2047  static int  static int strlen32(PCRE_SPTR32 p)
 ord2utf8(int cvalue, uschar *utf8bytes)  
2048  {  {
2049  register int i, j;  int len = 0;
2050  for (i = 0; i < utf8_table1_size; i++)  while (*p++ != 0) len++;
2051    if (cvalue <= utf8_table1[i]) break;  return len;
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
2052  }  }
2053    #endif  /* SUPPORT_PCRE32 */
 #endif  
2054    
2055    
2056    
2057    #ifdef SUPPORT_PCRE16
2058  /*************************************************  /*************************************************
2059  *             Print character string             *  *           Print 16-bit character string        *
2060  *************************************************/  *************************************************/
2061    
2062  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2063  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
2064    
2065  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2066  {  {
 int c = 0;  
2067  int yield = 0;  int yield = 0;
2068    
2069    if (length < 0)
2070      length = strlen16(p);
2071    
2072  while (length-- > 0)  while (length-- > 0)
2073    {    {
2074  #if !defined NOUTF8    pcre_uint32 c = *p++ & 0xffff;
2075    if (use_utf8)  #if !defined NOUTF
2076      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2077      {      {
2078      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
2079        if (d >= 0xDC00 && d <= 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
2080        {        {
2081        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2082        p += rc;        length--;
2083        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
2084        }        }
2085      }      }
2086  #endif  #endif
2087      yield += pchar(c, f);
2088      }
2089    
2090     /* Not UTF-8, or malformed UTF-8  */  return yield;
2091    }
2092    #endif  /* SUPPORT_PCRE16 */
2093    
2094    c = *p++;  
2095    if (PRINTHEX(c))  
2096      {  #ifdef SUPPORT_PCRE32
2097      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
2098      yield++;  *           Print 32-bit character string        *
2099      }  *************************************************/
2100    else  
2101      {  /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2102      if (f != NULL) fprintf(f, "\\x%02x", c);  If handed a NULL file, just counts chars without printing. */
2103      yield += 4;  
2104      }  static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2105    {
2106    int yield = 0;
2107    
2108    (void)(utf);  /* Avoid compiler warning */
2109    
2110    if (length < 0)
2111      length = strlen32(p);
2112    
2113    while (length-- > 0)
2114      {
2115      pcre_uint32 c = *p++;
2116      yield += pchar(c, f);
2117    }    }
2118    
2119  return yield;  return yield;
2120  }  }
2121    #endif  /* SUPPORT_PCRE32 */
2122    
2123    
2124    
2125    #ifdef SUPPORT_PCRE8
2126    /*************************************************
2127    *     Read a capture name (8-bit) and check it   *
2128    *************************************************/
2129    
2130    static pcre_uint8 *
2131    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2132    {
2133    pcre_uint8 *npp = *pp;
2134    while (isalnum(*p)) *npp++ = *p++;
2135    *npp++ = 0;
2136    *npp = 0;
2137    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2138      {
2139      fprintf(outfile, "no parentheses with name \"");
2140      PCHARSV(*pp, 0, -1, outfile);
2141      fprintf(outfile, "\"\n");
2142      }
2143    
2144    *pp = npp;
2145    return p;
2146    }
2147    #endif  /* SUPPORT_PCRE8 */
2148    
2149    
2150    
2151    #ifdef SUPPORT_PCRE16
2152    /*************************************************
2153    *     Read a capture name (16-bit) and check it  *
2154    *************************************************/
2155    
2156    /* Note that the text being read is 8-bit. */
2157    
2158    static pcre_uint8 *
2159    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2160    {
2161    pcre_uint16 *npp = *pp;
2162    while (isalnum(*p)) *npp++ = *p++;
2163    *npp++ = 0;
2164    *npp = 0;
2165    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2166      {
2167      fprintf(outfile, "no parentheses with name \"");
2168      PCHARSV(*pp, 0, -1, outfile);
2169      fprintf(outfile, "\"\n");
2170      }
2171    *pp = npp;
2172    return p;
2173    }
2174    #endif  /* SUPPORT_PCRE16 */
2175    
2176    
2177    
2178    #ifdef SUPPORT_PCRE32
2179    /*************************************************
2180    *     Read a capture name (32-bit) and check it  *
2181    *************************************************/
2182    
2183    /* Note that the text being read is 8-bit. */
2184    
2185    static pcre_uint8 *
2186    read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2187    {
2188    pcre_uint32 *npp = *pp;
2189    while (isalnum(*p)) *npp++ = *p++;
2190    *npp++ = 0;
2191    *npp = 0;
2192    if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2193      {
2194      fprintf(outfile, "no parentheses with name \"");
2195      PCHARSV(*pp, 0, -1, outfile);
2196      fprintf(outfile, "\"\n");
2197      }
2198    *pp = npp;
2199    return p;
2200    }
2201    #endif  /* SUPPORT_PCRE32 */
2202    
2203    
2204    
# Line 916  if (callout_extra) Line 2227  if (callout_extra)
2227      else      else
2228        {        {
2229        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
2230        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
2231          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
2232        fprintf(f, "\n");        fprintf(f, "\n");
2233        }        }
# Line 929  printed lengths of the substrings. */ Line 2240  printed lengths of the substrings. */
2240    
2241  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
2242    
2243  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2244  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
2245    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
2246    
2247  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2248    
2249  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
2250    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
2251    
2252  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 974  first_callout = 0; Line 2285  first_callout = 0;
2285    
2286  if (cb->mark != last_callout_mark)  if (cb->mark != last_callout_mark)
2287    {    {
2288    fprintf(outfile, "Latest Mark: %s\n",    if (cb->mark == NULL)
2289      (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));      fprintf(outfile, "Latest Mark: <unset>\n");
2290    last_callout_mark = cb->mark;    else
2291        {
2292        fprintf(outfile, "Latest Mark: ");
2293        PCHARSV(cb->mark, 0, -1, outfile);
2294        putc('\n', outfile);
2295        }
2296      last_callout_mark = cb->mark;
2297      }
2298    
2299    if (cb->callout_data != NULL)
2300      {
2301      int callout_data = *((int *)(cb->callout_data));
2302      if (callout_data != 0)
2303        {
2304        fprintf(outfile, "Callout data = %d\n", callout_data);
2305        return callout_data;
2306        }
2307      }
2308    
2309    return (cb->callout_number != callout_fail_id)? 0 :
2310           (++callout_count >= callout_fail_count)? 1 : 0;
2311    }
2312    
2313    
2314    /*************************************************
2315    *            Local malloc functions              *
2316    *************************************************/
2317    
2318    /* Alternative malloc function, to test functionality and save the size of a
2319    compiled re, which is the first store request that pcre_compile() makes. The
2320    show_malloc variable is set only during matching. */
2321    
2322    static void *new_malloc(size_t size)
2323    {
2324    void *block = malloc(size);
2325    gotten_store = size;
2326    if (first_gotten_store == 0) first_gotten_store = size;
2327    if (show_malloc)
2328      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
2329    return block;
2330    }
2331    
2332    static void new_free(void *block)
2333    {
2334    if (show_malloc)
2335      fprintf(outfile, "free             %p\n", block);
2336    free(block);
2337    }
2338    
2339    /* For recursion malloc/free, to test stacking calls */
2340    
2341    static void *stack_malloc(size_t size)
2342    {
2343    void *block = malloc(size);
2344    if (show_malloc)
2345      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2346    return block;
2347    }
2348    
2349    static void stack_free(void *block)
2350    {
2351    if (show_malloc)
2352      fprintf(outfile, "stack_free       %p\n", block);
2353    free(block);
2354    }
2355    
2356    
2357    /*************************************************
2358    *          Call pcre_fullinfo()                  *
2359    *************************************************/
2360    
2361    /* Get one piece of information from the pcre_fullinfo() function. When only
2362    one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2363    value, but the code is defensive.
2364    
2365    Arguments:
2366      re        compiled regex
2367      study     study data
2368      option    PCRE_INFO_xxx option
2369      ptr       where to put the data
2370    
2371    Returns:    0 when OK, < 0 on error
2372    */
2373    
2374    static int
2375    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2376    {
2377    int rc;
2378    
2379    if (pcre_mode == PCRE32_MODE)
2380    #ifdef SUPPORT_PCRE32
2381      rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2382    #else
2383      rc = PCRE_ERROR_BADMODE;
2384    #endif
2385    else if (pcre_mode == PCRE16_MODE)
2386    #ifdef SUPPORT_PCRE16
2387      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2388    #else
2389      rc = PCRE_ERROR_BADMODE;
2390    #endif
2391    else
2392    #ifdef SUPPORT_PCRE8
2393      rc = pcre_fullinfo(re, study, option, ptr);
2394    #else
2395      rc = PCRE_ERROR_BADMODE;
2396    #endif
2397    
2398    if (rc < 0 && rc != PCRE_ERROR_UNSET)
2399      {
2400      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2401        pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2402      if (rc == PCRE_ERROR_BADMODE)
2403        fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2404          "%d-bit mode\n", 8 * CHAR_SIZE,
2405          8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2406      }
2407    
2408    return rc;
2409    }
2410    
2411    
2412    
2413    /*************************************************
2414    *             Swap byte functions                *
2415    *************************************************/
2416    
2417    /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2418    value, respectively.
2419    
2420    Arguments:
2421      value        any number
2422    
2423    Returns:       the byte swapped value
2424    */
2425    
2426    static pcre_uint32
2427    swap_uint32(pcre_uint32 value)
2428    {
2429    return ((value & 0x000000ff) << 24) |
2430           ((value & 0x0000ff00) <<  8) |
2431           ((value & 0x00ff0000) >>  8) |
2432           (value >> 24);
2433    }
2434    
2435    static pcre_uint16
2436    swap_uint16(pcre_uint16 value)
2437    {
2438    return (value >> 8) | (value << 8);
2439    }
2440    
2441    
2442    
2443    /*************************************************
2444    *        Flip bytes in a compiled pattern        *
2445    *************************************************/
2446    
2447    /* This function is called if the 'F' option was present on a pattern that is
2448    to be written to a file. We flip the bytes of all the integer fields in the
2449    regex data block and the study block. In 16-bit mode this also flips relevant
2450    bytes in the pattern itself. This is to make it possible to test PCRE's
2451    ability to reload byte-flipped patterns, e.g. those compiled on a different
2452    architecture. */
2453    
2454    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2455    static void
2456    regexflip8_or_16(pcre *ere, pcre_extra *extra)
2457    {
2458    real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2459    #ifdef SUPPORT_PCRE16
2460    int op;
2461    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2462    int length = re->name_count * re->name_entry_size;
2463    #ifdef SUPPORT_UTF
2464    BOOL utf = (re->options & PCRE_UTF16) != 0;
2465    BOOL utf16_char = FALSE;
2466    #endif /* SUPPORT_UTF */
2467    #endif /* SUPPORT_PCRE16 */
2468    
2469    /* Always flip the bytes in the main data block and study blocks. */
2470    
2471    re->magic_number = REVERSED_MAGIC_NUMBER;
2472    re->size = swap_uint32(re->size);
2473    re->options = swap_uint32(re->options);
2474    re->flags = swap_uint32(re->flags);
2475    re->limit_match = swap_uint32(re->limit_match);
2476    re->limit_recursion = swap_uint32(re->limit_recursion);
2477    re->first_char = swap_uint16(re->first_char);
2478    re->req_char = swap_uint16(re->req_char);
2479    re->max_lookbehind = swap_uint16(re->max_lookbehind);
2480    re->top_bracket = swap_uint16(re->top_bracket);
2481    re->top_backref = swap_uint16(re->top_backref);
2482    re->name_table_offset = swap_uint16(re->name_table_offset);
2483    re->name_entry_size = swap_uint16(re->name_entry_size);
2484    re->name_count = swap_uint16(re->name_count);
2485    re->ref_count = swap_uint16(re->ref_count);
2486    
2487    if (extra != NULL)
2488      {
2489      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2490      rsd->size = swap_uint32(rsd->size);
2491      rsd->flags = swap_uint32(rsd->flags);
2492      rsd->minlength = swap_uint32(rsd->minlength);
2493    }    }
2494    
2495  if (cb->callout_data != NULL)  /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2496    in the name table, if present, and then in the pattern itself. */
2497    
2498    #ifdef SUPPORT_PCRE16
2499    if (pcre_mode != PCRE16_MODE) return;
2500    
2501    while(TRUE)
2502    {    {
2503    int callout_data = *((int *)(cb->callout_data));    /* Swap previous characters. */
2504    if (callout_data != 0)    while (length-- > 0)
2505      {      {
2506      fprintf(outfile, "Callout data = %d\n", callout_data);      *ptr = swap_uint16(*ptr);
2507      return callout_data;      ptr++;
2508      }      }
2509    }  #ifdef SUPPORT_UTF
2510      if (utf16_char)
2511        {
2512        if ((ptr[-1] & 0xfc00) == 0xd800)
2513          {
2514          /* We know that there is only one extra character in UTF-16. */
2515          *ptr = swap_uint16(*ptr);
2516          ptr++;
2517          }
2518        }
2519      utf16_char = FALSE;
2520    #endif /* SUPPORT_UTF */
2521    
2522  return (cb->callout_number != callout_fail_id)? 0 :    /* Get next opcode. */
        (++callout_count >= callout_fail_count)? 1 : 0;  
 }  
2523    
2524      length = 0;
2525      op = *ptr;
2526      *ptr++ = swap_uint16(op);
2527    
2528  /*************************************************    switch (op)
2529  *            Local malloc functions              *      {
2530  *************************************************/      case OP_END:
2531        return;
2532    
2533  /* Alternative malloc function, to test functionality and save the size of a  #ifdef SUPPORT_UTF
2534  compiled re. The show_malloc variable is set only during matching. */      case OP_CHAR:
2535        case OP_CHARI:
2536        case OP_NOT:
2537        case OP_NOTI:
2538        case OP_STAR:
2539        case OP_MINSTAR:
2540        case OP_PLUS:
2541        case OP_MINPLUS:
2542        case OP_QUERY:
2543        case OP_MINQUERY:
2544        case OP_UPTO:
2545        case OP_MINUPTO:
2546        case OP_EXACT:
2547        case OP_POSSTAR:
2548        case OP_POSPLUS:
2549        case OP_POSQUERY:
2550        case OP_POSUPTO:
2551        case OP_STARI:
2552        case OP_MINSTARI:
2553        case OP_PLUSI:
2554        case OP_MINPLUSI:
2555        case OP_QUERYI:
2556        case OP_MINQUERYI:
2557        case OP_UPTOI:
2558        case OP_MINUPTOI:
2559        case OP_EXACTI:
2560        case OP_POSSTARI:
2561        case OP_POSPLUSI:
2562        case OP_POSQUERYI:
2563        case OP_POSUPTOI:
2564        case OP_NOTSTAR:
2565        case OP_NOTMINSTAR:
2566        case OP_NOTPLUS:
2567        case OP_NOTMINPLUS:
2568        case OP_NOTQUERY:
2569        case OP_NOTMINQUERY:
2570        case OP_NOTUPTO:
2571        case OP_NOTMINUPTO:
2572        case OP_NOTEXACT:
2573        case OP_NOTPOSSTAR:
2574        case OP_NOTPOSPLUS:
2575        case OP_NOTPOSQUERY:
2576        case OP_NOTPOSUPTO:
2577        case OP_NOTSTARI:
2578        case OP_NOTMINSTARI:
2579        case OP_NOTPLUSI:
2580        case OP_NOTMINPLUSI:
2581        case OP_NOTQUERYI:
2582        case OP_NOTMINQUERYI:
2583        case OP_NOTUPTOI:
2584        case OP_NOTMINUPTOI:
2585        case OP_NOTEXACTI:
2586        case OP_NOTPOSSTARI:
2587        case OP_NOTPOSPLUSI:
2588        case OP_NOTPOSQUERYI:
2589        case OP_NOTPOSUPTOI:
2590        if (utf) utf16_char = TRUE;
2591    #endif
2592        /* Fall through. */
2593    
2594  static void *new_malloc(size_t size)      default:
2595  {      length = OP_lengths16[op] - 1;
2596  void *block = malloc(size);      break;
2597  gotten_store = size;  
2598  if (show_malloc)      case OP_CLASS:
2599    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);      case OP_NCLASS:
2600  return block;      /* Skip the character bit map. */
2601  }      ptr += 32/sizeof(pcre_uint16);
2602        length = 0;
2603        break;
2604    
2605        case OP_XCLASS:
2606        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2607        if (LINK_SIZE > 1)
2608          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2609            - (1 + LINK_SIZE + 1));
2610        else
2611          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2612    
2613  static void new_free(void *block)      /* Reverse the size of the XCLASS instance. */
2614  {      *ptr = swap_uint16(*ptr);
2615  if (show_malloc)      ptr++;
2616    fprintf(outfile, "free             %p\n", block);      if (LINK_SIZE > 1)
2617  free(block);        {
2618          *ptr = swap_uint16(*ptr);
2619          ptr++;
2620          }
2621    
2622        op = *ptr;
2623        *ptr = swap_uint16(op);
2624        ptr++;
2625        if ((op & XCL_MAP) != 0)
2626          {
2627          /* Skip the character bit map. */
2628          ptr += 32/sizeof(pcre_uint16);
2629          length -= 32/sizeof(pcre_uint16);
2630          }
2631        break;
2632        }
2633      }
2634    /* Control should never reach here in 16 bit mode. */
2635    #endif /* SUPPORT_PCRE16 */
2636  }  }
2637    #endif /* SUPPORT_PCRE[8|16] */
2638    
 /* For recursion malloc/free, to test stacking calls */  
2639    
 static void *stack_malloc(size_t size)  
 {  
 void *block = malloc(size);  
 if (show_malloc)  
   fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);  
 return block;  
 }  
2640    
2641  static void stack_free(void *block)  #if defined SUPPORT_PCRE32
2642    static void
2643    regexflip_32(pcre *ere, pcre_extra *extra)
2644  {  {
2645  if (show_malloc)  real_pcre32 *re = (real_pcre32 *)ere;
2646    fprintf(outfile, "stack_free       %p\n", block);  int op;
2647  free(block);  pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2648  }  int length = re->name_count * re->name_entry_size;
2649    
2650    /* Always flip the bytes in the main data block and study blocks. */
2651    
2652    re->magic_number = REVERSED_MAGIC_NUMBER;
2653    re->size = swap_uint32(re->size);
2654    re->options = swap_uint32(re->options);
2655    re->flags = swap_uint32(re->flags);
2656    re->limit_match = swap_uint32(re->limit_match);
2657    re->limit_recursion = swap_uint32(re->limit_recursion);
2658    re->first_char = swap_uint32(re->first_char);
2659    re->req_char = swap_uint32(re->req_char);
2660    re->max_lookbehind = swap_uint16(re->max_lookbehind);
2661    re->top_bracket = swap_uint16(re->top_bracket);
2662    re->top_backref = swap_uint16(re->top_backref);
2663    re->name_table_offset = swap_uint16(re->name_table_offset);
2664    re->name_entry_size = swap_uint16(re->name_entry_size);
2665    re->name_count = swap_uint16(re->name_count);
2666    re->ref_count = swap_uint16(re->ref_count);
2667    
2668    if (extra != NULL)
2669      {
2670      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2671      rsd->size = swap_uint32(rsd->size);
2672      rsd->flags = swap_uint32(rsd->flags);
2673      rsd->minlength = swap_uint32(rsd->minlength);
2674      }
2675    
2676  /*************************************************  /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2677  *          Call pcre_fullinfo()                  *  the pattern itself. */
2678  *************************************************/  
2679    while(TRUE)
2680      {
2681      /* Swap previous characters. */
2682      while (length-- > 0)
2683        {
2684        *ptr = swap_uint32(*ptr);
2685        ptr++;
2686        }
2687    
2688  /* Get one piece of information from the pcre_fullinfo() function */    /* Get next opcode. */
2689    
2690  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)    length = 0;
2691  {    op = *ptr;
2692  int rc;    *ptr++ = swap_uint32(op);
2693  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
2694    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);    switch (op)
2695        {
2696        case OP_END:
2697        return;
2698    
2699        default:
2700        length = OP_lengths32[op] - 1;
2701        break;
2702    
2703        case OP_CLASS:
2704        case OP_NCLASS:
2705        /* Skip the character bit map. */
2706        ptr += 32/sizeof(pcre_uint32);
2707        length = 0;
2708        break;
2709    
2710        case OP_XCLASS:
2711        /* LINK_SIZE can only be 1 in 32-bit mode. */
2712        length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2713    
2714        /* Reverse the size of the XCLASS instance. */
2715        *ptr = swap_uint32(*ptr);
2716        ptr++;
2717    
2718        op = *ptr;
2719        *ptr = swap_uint32(op);
2720        ptr++;
2721        if ((op & XCL_MAP) != 0)
2722          {
2723          /* Skip the character bit map. */
2724          ptr += 32/sizeof(pcre_uint32);
2725          length -= 32/sizeof(pcre_uint32);
2726          }
2727        break;
2728        }
2729      }
2730    /* Control should never reach here in 32 bit mode. */
2731  }  }
2732    
2733    #endif /* SUPPORT_PCRE32 */
2734    
2735    
 /*************************************************  
 *         Byte flipping function                 *  
 *************************************************/  
2736    
2737  static unsigned long int  static void
2738  byteflip(unsigned long int value, int n)  regexflip(pcre *ere, pcre_extra *extra)
2739  {  {
2740  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  #if defined SUPPORT_PCRE32
2741  return ((value & 0x000000ff) << 24) |    if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2742         ((value & 0x0000ff00) <<  8) |      regexflip_32(ere, extra);
2743         ((value & 0x00ff0000) >>  8) |  #endif
2744         ((value & 0xff000000) >> 24);  #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2745      if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2746        regexflip8_or_16(ere, extra);
2747    #endif
2748  }  }
2749    
2750    
2751    
   
2752  /*************************************************  /*************************************************
2753  *        Check match or recursion limit          *  *        Check match or recursion limit          *
2754  *************************************************/  *************************************************/
2755    
2756  static int  static int
2757  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2758    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
2759    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
2760  {  {
# Line 1087  for (;;) Line 2769  for (;;)
2769    {    {
2770    *limit = mid;    *limit = mid;
2771    
2772    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2773      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2774    
2775    if (count == errnumber)    if (count == errnumber)
# Line 1132  Returns:    < 0, = 0, or > 0, according Line 2814  Returns:    < 0, = 0, or > 0, according
2814  */  */
2815    
2816  static int  static int
2817  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2818  {  {
2819  while (n--)  while (n--)
2820    {    {
# Line 1159  Returns:      appropriate PCRE_NEWLINE_x Line 2841  Returns:      appropriate PCRE_NEWLINE_x
2841  */  */
2842    
2843  static int  static int
2844  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2845  {  {
2846  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2847  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2848  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2849  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2850  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2851  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2852  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2853  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2854  return 0;  return 0;
2855  }  }
# Line 1183  usage(void) Line 2865  usage(void)
2865  {  {
2866  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2867  printf("Input and output default to stdin and stdout.\n");  printf("Input and output default to stdin and stdout.\n");
2868  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2869  printf("If input is a terminal, readline() is used to read from it.\n");  printf("If input is a terminal, readline() is used to read from it.\n");
2870  #else  #else
2871  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2872  #endif  #endif
2873  printf("\nOptions:\n");  printf("\nOptions:\n");
2874  printf("  -b       show compiled code (bytecode)\n");  #ifdef SUPPORT_PCRE16
2875    printf("  -16      use the 16-bit library\n");
2876    #endif
2877    #ifdef SUPPORT_PCRE32
2878    printf("  -32      use the 32-bit library\n");
2879    #endif
2880    printf("  -b       show compiled code\n");
2881  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2882    printf("  -C arg   show a specific compile-time option\n");
2883    printf("           and exit with its value. The arg can be:\n");
2884    printf("     linksize     internal link size [2, 3, 4]\n");
2885    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2886    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2887    printf("     pcre32       32 bit library support enabled [0, 1]\n");
2888    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2889    printf("     ucp          Unicode Properties supported [0, 1]\n");
2890    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2891    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2892  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2893  #if !defined NODFA  #if !defined NODFA
2894  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
# Line 1207  printf("  -q       quiet: do not output Line 2905  printf("  -q       quiet: do not output
2905  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2906  printf("  -s       force each pattern to be studied at basic level\n"  printf("  -s       force each pattern to be studied at basic level\n"
2907         "  -s+      force each pattern to be studied, using JIT if available\n"         "  -s+      force each pattern to be studied, using JIT if available\n"
2908           "  -s++     ditto, verifying when JIT was actually used\n"
2909           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2910           "             where 1 <= n <= 7 selects JIT options\n"
2911           "  -s++n    ditto, verifying when JIT was actually used\n"
2912         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2913  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2914  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 1226  options, followed by a set of test data, Line 2928  options, followed by a set of test data,
2928  int main(int argc, char **argv)  int main(int argc, char **argv)
2929  {  {
2930  FILE *infile = stdin;  FILE *infile = stdin;
2931    const char *version;
2932  int options = 0;  int options = 0;
2933  int study_options = 0;  int study_options = 0;
2934  int default_find_match_limit = FALSE;  int default_find_match_limit = FALSE;
# Line 1240  int quiet = 0; Line 2943  int quiet = 0;
2943  int size_offsets = 45;  int size_offsets = 45;
2944  int size_offsets_max;  int size_offsets_max;
2945  int *offsets = NULL;  int *offsets = NULL;
 #if !defined NOPOSIX  
 int posix = 0;  
 #endif  
2946  int debug = 0;  int debug = 0;
2947  int done = 0;  int done = 0;
2948  int all_use_dfa = 0;  int all_use_dfa = 0;
2949    int verify_jit = 0;
2950  int yield = 0;  int yield = 0;
2951  int stack_size;  int stack_size;
2952    pcre_uint8 *dbuffer = NULL;
2953    size_t dbuffer_size = 1u << 14;
2954    
2955  pcre_jit_stack *jit_stack = NULL;  #if !defined NOPOSIX
2956    int posix = 0;
2957    #endif
2958    #if !defined NODFA
2959    int *dfa_workspace = NULL;
2960    #endif
2961    
2962    pcre_jit_stack *jit_stack = NULL;
2963    
2964  /* These vectors store, end-to-end, a list of captured substring names. Assume  /* These vectors store, end-to-end, a list of zero-terminated captured
2965  that 1024 is plenty long enough for the few names we'll be testing. */  substring names, each list itself being terminated by an empty name. Assume
2966    that 1024 is plenty long enough for the few names we'll be testing. It is
2967    easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2968    for the actual memory, to ensure alignment. */
2969    
2970    pcre_uint32 copynames[1024];
2971    pcre_uint32 getnames[1024];
2972    
2973    #ifdef SUPPORT_PCRE32
2974    pcre_uint32 *cn32ptr;
2975    pcre_uint32 *gn32ptr;
2976    #endif
2977    
2978  uschar copynames[1024];  #ifdef SUPPORT_PCRE16
2979  uschar getnames[1024];  pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2980    pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2981    pcre_uint16 *cn16ptr;
2982    pcre_uint16 *gn16ptr;
2983    #endif
2984    
2985  uschar *copynamesptr;  #ifdef SUPPORT_PCRE8
2986  uschar *getnamesptr;  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2987    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2988    pcre_uint8 *cn8ptr;
2989    pcre_uint8 *gn8ptr;
2990    #endif
2991    
2992  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that valgrind will check their misuse when
2993  when I am debugging. They grow automatically when very long lines are read. */  debugging. They grow automatically when very long lines are read. The 16-
2994    and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2995    
2996  buffer = (unsigned char *)malloc(buffer_size);  buffer = (pcre_uint8 *)malloc(buffer_size);
2997  dbuffer = (unsigned char *)malloc(buffer_size);  pbuffer = (pcre_uint8 *)malloc(buffer_size);
 pbuffer = (unsigned char *)malloc(buffer_size);  
2998    
2999  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
3000    
# Line 1281  it set 0x8000, but then I was advised th Line 3009  it set 0x8000, but then I was advised th
3009  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
3010  #endif  #endif
3011    
3012    /* Get the version number: both pcre_version() and pcre16_version() give the
3013    same answer. We just need to ensure that we call one that is available. */
3014    
3015    #if defined SUPPORT_PCRE8
3016    version = pcre_version();
3017    #elif defined SUPPORT_PCRE16
3018    version = pcre16_version();
3019    #elif defined SUPPORT_PCRE32
3020    version = pcre32_version();
3021    #endif
3022    
3023  /* Scan options */  /* Scan options */
3024    
3025  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
3026    {    {
3027    unsigned char *endptr;    pcre_uint8 *endptr;
3028      char *arg = argv[op];
3029    
3030    if (strcmp(argv[op], "-m") == 0) showstore = 1;    if (strcmp(arg, "-m") == 0) showstore = 1;
3031    else if (strcmp(argv[op], "-s") == 0) force_study = 0;    else if (strcmp(arg, "-s") == 0) force_study = 0;
3032    else if (strcmp(argv[op], "-s+") == 0)  
3033      else if (strncmp(arg, "-s+", 3) == 0)
3034      {      {
3035        arg += 3;
3036        if (*arg == '+') { arg++; verify_jit = TRUE; }
3037      force_study = 1;      force_study = 1;
3038      force_study_options = PCRE_STUDY_JIT_COMPILE;      if (*arg == 0)
3039          force_study_options = jit_study_bits[6];
3040        else if (*arg >= '1' && *arg <= '7')
3041          force_study_options = jit_study_bits[*arg - '1'];
3042        else goto BAD_ARG;
3043        }
3044      else if (strcmp(arg, "-8") == 0)
3045        {
3046    #ifdef SUPPORT_PCRE8
3047        pcre_mode = PCRE8_MODE;
3048    #else
3049        printf("** This version of PCRE was built without 8-bit support\n");
3050        exit(1);
3051    #endif
3052        }
3053      else if (strcmp(arg, "-16") == 0)
3054        {
3055    #ifdef SUPPORT_PCRE16
3056        pcre_mode = PCRE16_MODE;
3057    #else
3058        printf("** This version of PCRE was built without 16-bit support\n");
3059        exit(1);
3060    #endif
3061        }
3062      else if (strcmp(arg, "-32") == 0)
3063        {
3064    #ifdef SUPPORT_PCRE32
3065        pcre_mode = PCRE32_MODE;
3066    #else
3067        printf("** This version of PCRE was built without 32-bit support\n");
3068        exit(1);
3069    #endif
3070      }      }
3071    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(arg, "-q") == 0) quiet = 1;
3072    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(arg, "-b") == 0) debug = 1;
3073    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(arg, "-i") == 0) showinfo = 1;
3074    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3075    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;    else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3076  #if !defined NODFA  #if !defined NODFA
3077    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3078  #endif  #endif
3079    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3080        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3081          *endptr == 0))          *endptr == 0))
3082      {      {
3083      op++;      op++;
3084      argc--;      argc--;
3085      }      }
3086    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
3087      {      {
3088      int both = argv[op][2] == 0;      int both = arg[2] == 0;
3089      int temp;      int temp;
3090      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3091                       *endptr == 0))                       *endptr == 0))
3092        {        {
3093        timeitm = temp;        timeitm = temp;
# Line 1323  while (argc > 1 && argv[op][0] == '-') Line 3097  while (argc > 1 && argv[op][0] == '-')
3097      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
3098      if (both) timeit = timeitm;      if (both) timeit = timeitm;
3099      }      }
3100    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3101        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3102          *endptr == 0))          *endptr == 0))
3103      {      {
3104  #if defined(_WIN32) || defined(WIN32) || defined(__minix)  #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
3105      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
3106      exit(1);      exit(1);
3107  #else  #else
# Line 1346  while (argc > 1 && argv[op][0] == '-') Line 3120  while (argc > 1 && argv[op][0] == '-')
3120  #endif  #endif
3121      }      }
3122  #if !defined NOPOSIX  #if !defined NOPOSIX
3123    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
3124  #endif  #endif
3125    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
3126      {      {
3127      int rc;      int rc;
3128      unsigned long int lrc;      unsigned long int lrc;
3129      printf("PCRE version %s\n", pcre_version());  
3130        if (argc > 2)
3131          {
3132          if (strcmp(argv[op + 1], "linksize") == 0)
3133            {
3134            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3135            printf("%d\n", rc);
3136            yield = rc;
3137    
3138    #ifdef __VMS
3139            vms_setsymbol("LINKSIZE",0,yield );
3140    #endif
3141            }
3142          else if (strcmp(argv[op + 1], "pcre8") == 0)
3143            {
3144    #ifdef SUPPORT_PCRE8
3145            printf("1\n");
3146            yield = 1;
3147    #else
3148            printf("0\n");
3149            yield = 0;
3150    #endif
3151    #ifdef __VMS
3152            vms_setsymbol("PCRE8",0,yield );
3153    #endif
3154            }
3155          else if (strcmp(argv[op + 1], "pcre16") == 0)
3156            {
3157    #ifdef SUPPORT_PCRE16
3158            printf("1\n");
3159            yield = 1;
3160    #else
3161            printf("0\n");
3162            yield = 0;
3163    #endif
3164    #ifdef __VMS
3165            vms_setsymbol("PCRE16",0,yield );
3166    #endif
3167            }
3168          else if (strcmp(argv[op + 1], "pcre32") == 0)
3169            {
3170    #ifdef SUPPORT_PCRE32
3171            printf("1\n");
3172            yield = 1;
3173    #else
3174            printf("0\n");
3175            yield = 0;
3176    #endif
3177    #ifdef __VMS
3178            vms_setsymbol("PCRE32",0,yield );
3179    #endif
3180            }
3181          else if (strcmp(argv[op + 1], "utf") == 0)
3182            {
3183    #ifdef SUPPORT_PCRE8
3184            if (pcre_mode == PCRE8_MODE)
3185              (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3186    #endif
3187    #ifdef SUPPORT_PCRE16
3188            if (pcre_mode == PCRE16_MODE)
3189              (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3190    #endif
3191    #ifdef SUPPORT_PCRE32
3192            if (pcre_mode == PCRE32_MODE)
3193              (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3194    #endif
3195            printf("%d\n", rc);
3196            yield = rc;
3197    #ifdef __VMS
3198            vms_setsymbol("UTF",0,yield );
3199    #endif
3200            }
3201          else if (strcmp(argv[op + 1], "ucp") == 0)
3202            {
3203            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3204            printf("%d\n", rc);
3205            yield = rc;
3206            }
3207          else if (strcmp(argv[op + 1], "jit") == 0)
3208            {
3209            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3210            printf("%d\n", rc);
3211            yield = rc;
3212            }
3213          else if (strcmp(argv[op + 1], "newline") == 0)
3214            {
3215            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3216            print_newline_config(rc, TRUE);
3217            }
3218          else if (strcmp(argv[op + 1], "ebcdic") == 0)
3219            {
3220    #ifdef EBCDIC
3221            printf("1\n");
3222            yield = 1;
3223    #else
3224            printf("0\n");
3225    #endif
3226            }
3227          else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3228            {
3229    #ifdef EBCDIC
3230            printf("0x%02x\n", CHAR_LF);
3231    #else
3232            printf("0\n");
3233    #endif
3234            }
3235          else
3236            {
3237            printf("Unknown -C option: %s\n", argv[op + 1]);
3238            }
3239          goto EXIT;
3240          }
3241    
3242        /* No argument for -C: output all configuration information. */
3243    
3244        printf("PCRE version %s\n", version);
3245      printf("Compiled with\n");      printf("Compiled with\n");
3246    
3247    #ifdef EBCDIC
3248        printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3249    #endif
3250    
3251    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3252    are set, either both UTFs are supported or both are not supported. */
3253    
3254    #ifdef SUPPORT_PCRE8
3255        printf("  8-bit support\n");
3256      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3257      printf("  %sUTF-8 support\n", rc? "" : "No ");        printf ("  %sUTF-8 support\n", rc ? "" : "No ");
3258      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #endif
3259    #ifdef SUPPORT_PCRE16
3260        printf("  16-bit support\n");
3261        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3262        printf ("  %sUTF-16 support\n", rc ? "" : "No ");
3263    #endif
3264    #ifdef SUPPORT_PCRE32
3265        printf("  32-bit support\n");
3266        (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3267        printf ("  %sUTF-32 support\n", rc ? "" : "No ");
3268    #endif
3269    
3270        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3271      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
3272      (void)pcre_config(PCRE_CONFIG_JIT, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3273      if (rc)      if (rc)
3274        printf("  Just-in-time compiler support\n");        {
3275          const char *arch;
3276          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3277          printf("  Just-in-time compiler support: %s\n", arch);
3278          }
3279      else      else
3280        printf("  No just-in-time compiler support\n");        printf("  No just-in-time compiler support\n");
3281      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3282      /* Note that these values are always the ASCII values, even      print_newline_config(rc, FALSE);
3283      in EBCDIC environments. CR is 13 and NL is 10. */      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
     printf("  Newline sequence is %s\n", (rc == 13)? "CR" :  
       (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :  
       (rc == -2)? "ANYCRLF" :  
       (rc == -1)? "ANY" : "???");  
     (void)pcre_config(PCRE_CONFIG_BSR, &rc);  
3284      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3285                                       "all Unicode newlines");                                       "all Unicode newlines");
3286      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3287      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
3288      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3289      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
3290      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3291      printf("  Default match limit = %ld\n", lrc);      printf("  Default match limit = %ld\n", lrc);
3292      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3293      printf("  Default recursion depth limit = %ld\n", lrc);      printf("  Default recursion depth limit = %ld\n", lrc);
3294      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3295      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
3296        if (showstore)
3297          {
3298          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3299          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3300          }
3301        printf("\n");
3302      goto EXIT;      goto EXIT;
3303      }      }
3304    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
3305             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
3306      {      {
3307      usage();      usage();
3308      goto EXIT;      goto EXIT;
3309      }      }
3310    else    else
3311      {      {
3312      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
3313        printf("** Unknown or malformed option %s\n", arg);
3314      usage();      usage();
3315      yield = 1;      yield = 1;
3316      goto EXIT;      goto EXIT;
# Line 1440  if (argc > 2) Line 3357  if (argc > 2)
3357    
3358  /* Set alternative malloc function */  /* Set alternative malloc function */
3359    
3360    #ifdef SUPPORT_PCRE8
3361  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
3362  pcre_free = new_free;  pcre_free = new_free;
3363  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
3364  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
3365    #endif
3366    
3367    #ifdef SUPPORT_PCRE16
3368    pcre16_malloc = new_malloc;
3369    pcre16_free = new_free;
3370    pcre16_stack_malloc = stack_malloc;
3371    pcre16_stack_free = stack_free;
3372    #endif
3373    
3374    #ifdef SUPPORT_PCRE32
3375    pcre32_malloc = new_malloc;
3376    pcre32_free = new_free;
3377    pcre32_stack_malloc = stack_malloc;
3378    pcre32_stack_free = stack_free;
3379    #endif
3380    
3381  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
3382    
3383  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3384    
3385  /* Main loop */  /* Main loop */
3386    
# Line 1462  while (!done) Line 3395  while (!done)
3395  #endif  #endif
3396    
3397    const char *error;    const char *error;
3398    unsigned char *markptr;    pcre_uint8 *markptr;
3399    unsigned char *p, *pp, *ppp;    pcre_uint8 *p, *pp, *ppp;
3400    unsigned char *to_file = NULL;    pcre_uint8 *to_file = NULL;
3401    const unsigned char *tables = NULL;    const pcre_uint8 *tables = NULL;
3402      unsigned long int get_options;
3403    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
3404    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
3405    int do_allcaps = 0;    int do_allcaps = 0;
# Line 1481  while (!done) Line 3415  while (!done)
3415    int do_flip = 0;    int do_flip = 0;
3416    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
3417    
3418    use_utf8 = 0;  #if !defined NODFA
3419      int dfa_matched = 0;
3420    #endif
3421    
3422      use_utf = 0;
3423    debug_lengths = 1;    debug_lengths = 1;
3424    
3425    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1496  while (!done) Line 3434  while (!done)
3434    
3435    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3436      {      {
3437      unsigned long int magic, get_options;      pcre_uint32 magic;
3438      uschar sbuf[8];      pcre_uint8 sbuf[8];
3439      FILE *f;      FILE *f;
3440    
3441      p++;      p++;
3442        if (*p == '!')
3443          {
3444          do_debug = TRUE;
3445          do_showinfo = TRUE;
3446          p++;
3447          }
3448    
3449      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
3450      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
3451      *pp = 0;      *pp = 0;
# Line 1512  while (!done) Line 3457  while (!done)
3457        continue;        continue;
3458        }        }
3459    
3460        first_gotten_store = 0;
3461      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3462    
3463      true_size =      true_size =
# Line 1519  while (!done) Line 3465  while (!done)
3465      true_study_size =      true_study_size =
3466        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3467    
3468      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
3469      regex_gotten_store = gotten_store;      if (re == NULL)
3470          {
3471          printf("** Failed to get %d bytes of memory for pcre object\n",
3472            (int)true_size);
3473          yield = 1;
3474          goto EXIT;
3475          }
3476        regex_gotten_store = first_gotten_store;
3477    
3478      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3479    
3480      magic = ((real_pcre *)re)->magic_number;      magic = REAL_PCRE_MAGIC(re);
3481      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
3482        {        {
3483        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
3484          {          {
3485          do_flip = 1;          do_flip = 1;
3486          }          }
3487        else        else
3488          {          {
3489          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3490            new_free(re);
3491          fclose(f);          fclose(f);
3492          continue;          continue;
3493          }          }
3494        }        }
3495    
3496        /* We hide the byte-invert info for little and big endian tests. */
3497      fprintf(outfile, "Compiled pattern%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3498        do_flip? " (byte-inverted)" : "", p);        do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
   
     /* Need to know if UTF-8 for printing data strings */  
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
3499    
3500      /* Now see if there is any following study data. */      /* Now see if there is any following study data. */
3501    
# Line 1563  while (!done) Line 3513  while (!done)
3513          {          {
3514          FAIL_READ:          FAIL_READ:
3515          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
3516          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL)
3517          if (re != NULL) new_free(re);            {
3518              PCRE_FREE_STUDY(extra);
3519              }
3520            new_free(re);
3521          fclose(f);          fclose(f);
3522          continue;          continue;
3523          }          }
# Line 1573  while (!done) Line 3526  while (!done)
3526        }        }
3527      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
3528    
3529        /* Flip the necessary bytes. */
3530        if (do_flip)
3531          {
3532          int rc;
3533          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3534          if (rc == PCRE_ERROR_BADMODE)
3535            {
3536            pcre_uint32 flags_in_host_byte_order;
3537            if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3538              flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3539            else
3540              flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
3541            /* Simulate the result of the function call below. */
3542            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3543              pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3544              PCRE_INFO_OPTIONS);
3545            fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3546              "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3547            new_free(re);
3548            fclose(f);
3549            continue;
3550            }
3551          }
3552    
3553        /* Need to know if UTF-8 for printing data strings. */
3554    
3555        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3556          {
3557          new_free(re);
3558          fclose(f);
3559          continue;
3560          }
3561        use_utf = (get_options & PCRE_UTF8) != 0;
3562    
3563      fclose(f);      fclose(f);
3564      goto SHOW_INFO;      goto SHOW_INFO;
3565      }      }
3566    
3567    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
3568    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
3569    
3570    delimiter = *p++;    delimiter = *p++;
3571    
# Line 1629  while (!done) Line 3616  while (!done)
3616    /* Look for options after final delimiter */    /* Look for options after final delimiter */
3617    
3618    options = 0;    options = 0;
3619      study_options = force_study_options;
3620    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
3621    
3622    while (*pp != 0)    while (*pp != 0)
# Line 1665  while (!done) Line 3653  while (!done)
3653  #endif  #endif
3654    
3655        case 'S':        case 'S':
3656        if (do_study == 0)        do_study = 1;
3657          for (;;)
3658          {          {
3659          do_study = 1;          switch (*pp++)
         if (*pp == '+')  
3660            {            {
3661            study_options |= PCRE_STUDY_JIT_COMPILE;            case 'S':
3662            pp++;            do_study = 0;
3663              no_force_study = 1;
3664              break;
3665    
3666              case '!':
3667              study_options |= PCRE_STUDY_EXTRA_NEEDED;
3668              break;
3669    
3670              case '+':
3671              if (*pp == '+')
3672                {
3673                verify_jit = TRUE;
3674                pp++;
3675                }
3676              if (*pp >= '1' && *pp <= '7')
3677                study_options |= jit_study_bits[*pp++ - '1'];
3678              else
3679                study_options |= jit_study_bits[6];
3680              break;
3681    
3682              case '-':
3683              study_options &= ~PCRE_STUDY_ALLJIT;
3684              break;
3685    
3686              default:
3687              pp--;
3688              goto ENDLOOP;
3689            }            }
3690          }          }
3691        else        ENDLOOP:
         {  
         do_study = 0;  
         no_force_study = 1;  
         }  
3692        break;        break;
3693    
3694        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
# Line 1686  while (!done) Line 3696  while (!done)
3696        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
3697        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3698        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
3699        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
3700          case '9': options |= PCRE_NEVER_UTF; break;
3701        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
3702    
3703        case 'T':        case 'T':
# Line 1720  while (!done) Line 3731  while (!done)
3731          goto SKIP_DATA;          goto SKIP_DATA;
3732          }          }
3733        locale_set = 1;        locale_set = 1;
3734        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
3735        pp = ppp;        pp = ppp;
3736        break;        break;
3737    
# Line 1733  while (!done) Line 3744  while (!done)
3744    
3745        case '<':        case '<':
3746          {          {
3747          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3748            {            {
3749            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
3750            pp += 3;            pp += 3;
# Line 1761  while (!done) Line 3772  while (!done)
3772    
3773    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
3774    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
3775    local character tables. */    local character tables. Neither does it have 16-bit support. */
3776    
3777  #if !defined NOPOSIX  #if !defined NOPOSIX
3778    if (posix || do_posix)    if (posix || do_posix)
# Line 1777  while (!done) Line 3788  while (!done)
3788      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3789      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3790    
3791        first_gotten_store = 0;
3792      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
3793    
3794      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1796  while (!done) Line 3808  while (!done)
3808  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
3809    
3810      {      {
3811      unsigned long int get_options;      /* In 16- or 32-bit mode, convert the input. */
3812    
3813    #ifdef SUPPORT_PCRE16
3814        if (pcre_mode == PCRE16_MODE)
3815          {
3816          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3817            {
3818            case -1:
3819            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3820              "converted to UTF-16\n");
3821            goto SKIP_DATA;
3822    
3823            case -2:
3824            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3825              "cannot be converted to UTF-16\n");
3826            goto SKIP_DATA;
3827    
3828            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3829            fprintf(outfile, "**Failed: character value greater than 0xffff "
3830              "cannot be converted to 16-bit in non-UTF mode\n");
3831            goto SKIP_DATA;
3832    
3833            default:
3834            break;
3835            }
3836          p = (pcre_uint8 *)buffer16;
3837          }
3838    #endif
3839    
3840    #ifdef SUPPORT_PCRE32
3841        if (pcre_mode == PCRE32_MODE)
3842          {
3843          switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3844            {
3845            case -1:
3846            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3847              "converted to UTF-32\n");
3848            goto SKIP_DATA;
3849    
3850            case -2:
3851            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3852              "cannot be converted to UTF-32\n");
3853            goto SKIP_DATA;
3854    
3855            case -3:
3856            fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3857            goto SKIP_DATA;
3858    
3859            default:
3860            break;
3861            }
3862          p = (pcre_uint8 *)buffer32;
3863          }
3864    #endif
3865    
3866        /* Compile many times when timing */
3867    
3868      if (timeit > 0)      if (timeit > 0)
3869        {        {
# Line 1805  while (!done) Line 3872  while (!done)
3872        clock_t start_time = clock();        clock_t start_time = clock();
3873        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
3874          {          {
3875          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3876          if (re != NULL) free(re);          if (re != NULL) free(re);
3877          }          }
3878        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1814  while (!done) Line 3881  while (!done)
3881            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
3882        }        }
3883    
3884      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
3885        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3886    
3887      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
3888      if non-interactive. */      if non-interactive. */
# Line 1843  while (!done) Line 3911  while (!done)
3911    
3912      /* Compilation succeeded. It is now possible to set the UTF-8 option from      /* Compilation succeeded. It is now possible to set the UTF-8 option from
3913      within the regex; check for this so that we know how to process the data      within the regex; check for this so that we know how to process the data
3914      lines. */      lines. */
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;  
   
     /* Print information if required. There are now two info-returning  
     functions. The old one has a limited interface and returns only limited  
     data. Check that it agrees with the newer one. */  
3915    
3916      if (log_store)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3917        fprintf(outfile, "Memory allocation (code space): %d\n",        goto SKIP_DATA;
3918          (int)(gotten_store -      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
3919    
3920      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3921      and remember the store that was got. */      and remember the store that was got. */
3922    
3923      true_size = ((real_pcre *)re)->size;      true_size = REAL_PCRE_SIZE(re);
3924      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3925    
3926        /* Output code size information if requested */
3927    
3928        if (log_store)
3929          {
3930          int name_count, name_entry_size, real_pcre_size;
3931    
3932          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3933          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3934          real_pcre_size = 0;
3935    #ifdef SUPPORT_PCRE8
3936          if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3937            real_pcre_size = sizeof(real_pcre);
3938    #endif
3939    #ifdef SUPPORT_PCRE16
3940          if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3941            real_pcre_size = sizeof(real_pcre16);
3942    #endif
3943    #ifdef SUPPORT_PCRE32
3944          if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3945            real_pcre_size = sizeof(real_pcre32);
3946    #endif
3947          fprintf(outfile, "Memory allocation (code space): %d\n",
3948            (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3949          }
3950    
3951      /* If -s or /S was present, study the regex to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
3952      help with the matching, unless the pattern has the SS option, which      help with the matching, unless the pattern has the SS option, which
# Line 1877  while (!done) Line 3961  while (!done)
3961          clock_t time_taken;          clock_t time_taken;
3962          clock_t start_time = clock();          clock_t start_time = clock();
3963          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3964            extra = pcre_study(re, study_options | force_study_options, &error);            {
3965              PCRE_STUDY(extra, re, study_options, &error);
3966              }
3967          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3968          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL)
3969              {
3970              PCRE_FREE_STUDY(extra);
3971              }
3972          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3973            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3974              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3975          }          }
3976        extra = pcre_study(re, study_options | force_study_options, &error);        PCRE_STUDY(extra, re, study_options, &error);
3977        if (error != NULL)        if (error != NULL)
3978          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3979        else if (extra != NULL)        else if (extra != NULL)
3980            {
3981          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3982            if (log_store)
3983              {
3984              size_t jitsize;
3985              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3986                  jitsize != 0)
3987                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3988              }
3989            }
3990        }        }
3991    
3992      /* If /K was present, we set up for handling MARK data. */      /* If /K was present, we set up for handling MARK data. */
# Line 1904  while (!done) Line 4002  while (!done)
4002        extra->flags |= PCRE_EXTRA_MARK;        extra->flags |= PCRE_EXTRA_MARK;
4003        }        }
4004    
4005      /* If the 'F' option was present, we flip the bytes of all the integer      /* Extract and display information from the compiled data if required. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
   
     if (do_flip)  
       {  
       real_pcre *rre = (real_pcre *)re;  
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));  
         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));  
         }  
       }  
   
     /* Extract information from the compiled data if required */  
4006    
4007      SHOW_INFO:      SHOW_INFO:
4008    
4009      if (do_debug)      if (do_debug)
4010        {        {
4011        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
4012        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
4013        }        }
4014    
4015      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1956  while (!done) Line 4017  while (!done)
4017      if (do_showinfo)      if (do_showinfo)
4018        {        {
4019        unsigned long int all_options;        unsigned long int all_options;
4020  #if !defined NOINFOCHECK        pcre_uint32 first_char, need_char;
4021        int old_first_char, old_options, old_count;        pcre_uint32 match_limit, recursion_limit;
4022  #endif        int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4023        int count, backrefmax, first_char, need_char, okpartial, jchanged,          hascrorlf, maxlookbehind;
         hascrorlf;  
4024        int nameentrysize, namecount;        int nameentrysize, namecount;
4025        const uschar *nametable;        const pcre_uint8 *nametable;
4026    
4027        new_info(re, NULL, PCRE_INFO_SIZE, &size);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
4028        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4029        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4030        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4031        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4032        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4033        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4034        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4035        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4036        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4037        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4038              new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4039  #if !defined NOINFOCHECK            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4040        old_count = pcre_info(re, &old_options, &old_first_char);            new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4041        if (count < 0) fprintf(outfile,            != 0)
4042          "Error %d from pcre_info()\n", count);          goto SKIP_DATA;
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
4043    
4044        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
4045          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
4046          (int)size, (int)regex_gotten_store);          (int)size, (int)regex_gotten_store);
4047    
4048        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
4049    
4050        if (backrefmax > 0)        if (backrefmax > 0)
4051          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
4052    
4053          if (maxlookbehind > 0)
4054            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4055    
4056          if (new_info(re, NULL, PCRE_INFO_MATCHLIMIT, &match_limit) == 0)
4057            fprintf(outfile, "Match limit = %u\n", match_limit);
4058    
4059          if (new_info(re, NULL, PCRE_INFO_RECURSIONLIMIT, &recursion_limit) == 0)
4060            fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
4061    
4062        if (namecount > 0)        if (namecount > 0)
4063          {          {
4064          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
4065          while (namecount-- > 0)          while (namecount-- > 0)
4066            {            {
4067            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,            int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4068              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int length = (int)STRLEN(nametable + imm2_size);
4069              GET2(nametable, 0));            fprintf(outfile, "  ");
4070            nametable += nameentrysize;            PCHARSV(nametable, imm2_size, length, outfile);
4071              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4072    #ifdef SUPPORT_PCRE32
4073              if (pcre_mode == PCRE32_MODE)
4074                fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4075    #endif
4076    #ifdef SUPPORT_PCRE16
4077              if (pcre_mode == PCRE16_MODE)
4078                fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4079    #endif
4080    #ifdef SUPPORT_PCRE8
4081              if (pcre_mode == PCRE8_MODE)
4082                fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4083    #endif
4084              nametable += nameentrysize * CHAR_SIZE;
4085            }            }
4086          }          }
4087    
4088        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4089        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4090    
4091        all_options = ((real_pcre *)re)->options;        all_options = REAL_PCRE_OPTIONS(re);
4092        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
4093    
4094        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
4095          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4096            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4097            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4098            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 2036  while (!done) Line 4105  while (!done)
4105            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4106            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4107            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4108            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4109            ((get_options & PCRE_UCP) != 0)? " ucp" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4110            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4111            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4112            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "",
4113              ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : "");
4114    
4115        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4116    
# Line 2070  while (!done) Line 4140  while (!done)
4140          break;          break;
4141          }          }
4142    
4143        if (first_char == -1)        if (first_char_set == 2)
4144          {          {
4145          fprintf(outfile, "First char at start or follows newline\n");          fprintf(outfile, "First char at start or follows newline\n");
4146          }          }
4147        else if (first_char < 0)        else if (first_char_set == 1)
4148          {          {
4149          fprintf(outfile, "No first char\n");          const char *caseless =
4150              ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4151              "" : " (caseless)";
4152    
4153            if (PRINTOK(first_char))
4154              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4155            else
4156              {
4157              fprintf(outfile, "First char = ");
4158              pchar(first_char, outfile);
4159              fprintf(outfile, "%s\n", caseless);
4160              }
4161          }          }
4162        else        else
4163          {          {
4164          int ch = first_char & 255;          fprintf(outfile, "No first char\n");
         const char *caseless = ((first_char & REQ_CASELESS) == 0)?  
           "" : " (caseless)";  
         if (PRINTHEX(ch))  
           fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);  
         else  
           fprintf(outfile, "First char = %d%s\n", ch, caseless);  
4165          }          }
4166    
4167        if (need_char < 0)        if (need_char_set == 0)
4168          {          {
4169          fprintf(outfile, "No need char\n");          fprintf(outfile, "No need char\n");
4170          }          }
4171        else        else
4172          {          {
4173          int ch = need_char & 255;          const char *caseless =
4174          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4175            "" : " (caseless)";            "" : " (caseless)";
4176          if (PRINTHEX(ch))  
4177            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
4178              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4179          else          else
4180            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
4181              fprintf(outfile, "Need char = ");
4182              pchar(need_char, outfile);
4183              fprintf(outfile, "%s\n", caseless);
4184              }
4185          }          }
4186    
4187        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
# Line 2118  while (!done) Line 4198  while (!done)
4198            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
4199          else          else
4200            {            {
4201            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
4202            int minlength;            int minlength;
4203    
4204            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4205            fprintf(outfile, "Subject length lower bound = %d\n", minlength);              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4206    
4207            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
           if (start_bits == NULL)  
             fprintf(outfile, "No set of starting bytes\n");  
           else  
4208              {              {
4209              int i;              if (start_bits == NULL)
4210              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
4211              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
4212                {                {
4213                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
4214                  int c = 24;
4215                  fprintf(outfile, "Starting byte set: ");
4216                  for (i = 0; i < 256; i++)
4217                  {                  {
4218                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;