/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 675 by ph10, Sat Aug 27 10:18:46 2011 UTC revision 1150 by zherczeg, Sun Oct 21 06:35:52 2012 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of all of the 8-bit, 16-bit, and
40    32-bit PCRE libraries in a single program. This is different from the modules
41    such as pcre_compile.c in the library itself, which are compiled separately for
42    each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43    twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44    make use of any of the macros from pcre_internal.h that depend on
45    COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46    SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47    supported library functions. */
48    
49  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
50  #include "config.h"  #include "config.h"
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 58  POSSIBILITY OF SUCH DAMAGE.
58  #include <locale.h>  #include <locale.h>
59  #include <errno.h>  #include <errno.h>
60    
61  #ifdef SUPPORT_LIBREADLINE  /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67  #ifdef HAVE_UNISTD_H  #ifdef HAVE_UNISTD_H
68  #include <unistd.h>  #include <unistd.h>
69  #endif  #endif
70    #if defined(SUPPORT_LIBREADLINE)
71  #include <readline/readline.h>  #include <readline/readline.h>
72  #include <readline/history.h>  #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80  #endif  #endif
   
81    
82  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
83  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 90  input mode under Windows. */ Line 112  input mode under Windows. */
112  #else  #else
113  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
114  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
115    #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
116    #define INPUT_MODE   "r"
117    #define OUTPUT_MODE  "w"
118    #else
119  #define INPUT_MODE   "rb"  #define INPUT_MODE   "rb"
120  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
121  #endif  #endif
122    #endif
123    
124    #define PRIV(name) name
125    
126  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
127  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 107  appropriately for an application, not fo Line 135  appropriately for an application, not fo
135  #include "pcre.h"  #include "pcre.h"
136  #include "pcre_internal.h"  #include "pcre_internal.h"
137    
138    /* The pcre_printint() function, which prints the internal form of a compiled
139    regex, is held in a separate file so that (a) it can be compiled in either
140    8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
141    when that is compiled in debug mode. */
142    
143    #ifdef SUPPORT_PCRE8
144    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
145    #endif
146    #ifdef SUPPORT_PCRE16
147    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
148    #endif
149    #ifdef SUPPORT_PCRE32
150    void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151    #endif
152    
153  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
154  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source files here, changing the names of the
155  external symbols to prevent clashes. */  external symbols to prevent clashes. */
156    
157  #define _pcre_ucp_gentype      ucp_gentype  #define PCRE_INCLUDED
 #define _pcre_ucp_typerange    ucp_typerange  
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utf8_char_sizes  utf8_char_sizes  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
158    
159  #include "pcre_tables.c"  #include "pcre_tables.c"
160    #include "pcre_ucd.c"
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
161    
162  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
163  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
164  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
165  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
166  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
167    
168    #ifdef EBCDIC
169    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
170    #else
171    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
172    #endif
173    
174  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
175    
176    /* Posix support is disabled in 16 or 32 bit only mode. */
177    #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
178    #define NOPOSIX
179    #endif
180    
181  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
182  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 150  Makefile. */ Line 186  Makefile. */
186  #include "pcreposix.h"  #include "pcreposix.h"
187  #endif  #endif
188    
189  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
190  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
191  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
192  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
193  UTF8 support if PCRE is built without it. */  
194    #ifndef SUPPORT_UTF
195  #ifndef SUPPORT_UTF8  #ifndef NOUTF
196  #ifndef NOUTF8  #define NOUTF
197  #define NOUTF8  #endif
198    #endif
199    
200    /* To make the code a bit tidier for 8/16/32-bit support, we define macros
201    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
202    only from one place and is handled differently). I couldn't dream up any way of
203    using a single macro to do this in a generic way, because of the many different
204    argument requirements. We know that at least one of SUPPORT_PCRE8 and
205    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
206    use these in the definitions of generic macros.
207    
208    **** Special note about the PCHARSxxx macros: the address of the string to be
209    printed is always given as two arguments: a base address followed by an offset.
210    The base address is cast to the correct data size for 8 or 16 bit data; the
211    offset is in units of this size. If the string were given as base+offset in one
212    argument, the casting might be incorrectly applied. */
213    
214    #ifdef SUPPORT_PCRE8
215    
216    #define PCHARS8(lv, p, offset, len, f) \
217      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
218    
219    #define PCHARSV8(p, offset, len, f) \
220      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
221    
222    #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
223      p = read_capture_name8(p, cn8, re)
224    
225    #define STRLEN8(p) ((int)strlen((char *)p))
226    
227    #define SET_PCRE_CALLOUT8(callout) \
228      pcre_callout = callout
229    
230    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
231       pcre_assign_jit_stack(extra, callback, userdata)
232    
233    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
234      re = pcre_compile((char *)pat, options, error, erroffset, tables)
235    
236    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
237        namesptr, cbuffer, size) \
238      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
239        (char *)namesptr, cbuffer, size)
240    
241    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
242      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
243    
244    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
245        offsets, size_offsets, workspace, size_workspace) \
246      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
247        offsets, size_offsets, workspace, size_workspace)
248    
249    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
250        offsets, size_offsets) \
251      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
252        offsets, size_offsets)
253    
254    #define PCRE_FREE_STUDY8(extra) \
255      pcre_free_study(extra)
256    
257    #define PCRE_FREE_SUBSTRING8(substring) \
258      pcre_free_substring(substring)
259    
260    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
261      pcre_free_substring_list(listptr)
262    
263    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
264        getnamesptr, subsptr) \
265      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
266        (char *)getnamesptr, subsptr)
267    
268    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
269      n = pcre_get_stringnumber(re, (char *)ptr)
270    
271    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
272      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
273    
274    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
275      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
276    
277    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
278      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
279    
280    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
281      pcre_printint(re, outfile, debug_lengths)
282    
283    #define PCRE_STUDY8(extra, re, options, error) \
284      extra = pcre_study(re, options, error)
285    
286    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
287      pcre_jit_stack_alloc(startsize, maxsize)
288    
289    #define PCRE_JIT_STACK_FREE8(stack) \
290      pcre_jit_stack_free(stack)
291    
292    #define pcre8_maketables pcre_maketables
293    
294    #endif /* SUPPORT_PCRE8 */
295    
296    /* -----------------------------------------------------------*/
297    
298    #ifdef SUPPORT_PCRE16
299    
300    #define PCHARS16(lv, p, offset, len, f) \
301      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
302    
303    #define PCHARSV16(p, offset, len, f) \
304      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
305    
306    #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
307      p = read_capture_name16(p, cn16, re)
308    
309    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
310    
311    #define SET_PCRE_CALLOUT16(callout) \
312      pcre16_callout = (int (*)(pcre16_callout_block *))callout
313    
314    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
315      pcre16_assign_jit_stack((pcre16_extra *)extra, \
316        (pcre16_jit_callback)callback, userdata)
317    
318    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
319      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
320        tables)
321    
322    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
323        namesptr, cbuffer, size) \
324      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
325        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
326    
327    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
328      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
329        (PCRE_UCHAR16 *)cbuffer, size/2)
330    
331    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
332        offsets, size_offsets, workspace, size_workspace) \
333      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
334        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
335        workspace, size_workspace)
336    
337    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338        offsets, size_offsets) \
339      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
340        len, start_offset, options, offsets, size_offsets)
341    
342    #define PCRE_FREE_STUDY16(extra) \
343      pcre16_free_study((pcre16_extra *)extra)
344    
345    #define PCRE_FREE_SUBSTRING16(substring) \
346      pcre16_free_substring((PCRE_SPTR16)substring)
347    
348    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
349      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
350    
351    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
352        getnamesptr, subsptr) \
353      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
354        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
355    
356    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
357      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
358    
359    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
360      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
361        (PCRE_SPTR16 *)(void*)subsptr)
362    
363    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
364      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
365        (PCRE_SPTR16 **)(void*)listptr)
366    
367    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
368      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
369        tables)
370    
371    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
372      pcre16_printint(re, outfile, debug_lengths)
373    
374    #define PCRE_STUDY16(extra, re, options, error) \
375      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
376    
377    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
378      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
379    
380    #define PCRE_JIT_STACK_FREE16(stack) \
381      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
382    
383    #endif /* SUPPORT_PCRE16 */
384    
385    /* -----------------------------------------------------------*/
386    
387    #ifdef SUPPORT_PCRE32
388    
389    #define PCHARS32(lv, p, offset, len, f) \
390      lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
391    
392    #define PCHARSV32(p, offset, len, f)                \
393      (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
394    
395    #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
396      p = read_capture_name32(p, cn32, re)
397    
398    #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
399    
400    #define SET_PCRE_CALLOUT32(callout) \
401      pcre32_callout = (int (*)(pcre32_callout_block *))callout
402    
403    #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
404      pcre32_assign_jit_stack((pcre32_extra *)extra, \
405        (pcre32_jit_callback)callback, userdata)
406    
407    #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
408      re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
409        tables)
410    
411    #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
412        namesptr, cbuffer, size) \
413      rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
414        count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
415    
416    #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
417      rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
418        (PCRE_UCHAR32 *)cbuffer, size/2)
419    
420    #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
421        offsets, size_offsets, workspace, size_workspace) \
422      count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
423        (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
424        workspace, size_workspace)
425    
426    #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
427        offsets, size_offsets) \
428      count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
429        len, start_offset, options, offsets, size_offsets)
430    
431    #define PCRE_FREE_STUDY32(extra) \
432      pcre32_free_study((pcre32_extra *)extra)
433    
434    #define PCRE_FREE_SUBSTRING32(substring) \
435      pcre32_free_substring((PCRE_SPTR32)substring)
436    
437    #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
438      pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
439    
440    #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
441        getnamesptr, subsptr) \
442      rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
443        count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
444    
445    #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
446      n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
447    
448    #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
449      rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
450        (PCRE_SPTR32 *)(void*)subsptr)
451    
452    #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
453      rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
454        (PCRE_SPTR32 **)(void*)listptr)
455    
456    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
457      rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
458        tables)
459    
460    #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
461      pcre32_printint(re, outfile, debug_lengths)
462    
463    #define PCRE_STUDY32(extra, re, options, error) \
464      extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
465    
466    #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
467      (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
468    
469    #define PCRE_JIT_STACK_FREE32(stack) \
470      pcre32_jit_stack_free((pcre32_jit_stack *)stack)
471    
472    #endif /* SUPPORT_PCRE32 */
473    
474    
475    /* ----- More than one mode is supported; a runtime test is needed, except for
476    pcre_config(), and the JIT stack functions, when it doesn't matter which
477    available version is called. ----- */
478    
479    enum {
480      PCRE8_MODE,
481      PCRE16_MODE,
482      PCRE32_MODE
483    };
484    
485    #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
486         defined (SUPPORT_PCRE32)) >= 2
487    
488    #define CHAR_SIZE (1 << pcre_mode)
489    
490    /* There doesn't seem to be an easy way of writing these macros that can cope
491    with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
492    cases separately. */
493    
494    /* ----- All three modes supported ----- */
495    
496    #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
497    
498    #define PCHARS(lv, p, offset, len, f) \
499      if (pcre_mode == PCRE32_MODE) \
500        PCHARS32(lv, p, offset, len, f); \
501      else if (pcre_mode == PCRE16_MODE) \
502        PCHARS16(lv, p, offset, len, f); \
503      else \
504        PCHARS8(lv, p, offset, len, f)
505    
506    #define PCHARSV(p, offset, len, f) \
507      if (pcre_mode == PCRE32_MODE) \
508        PCHARSV32(p, offset, len, f); \
509      else if (pcre_mode == PCRE16_MODE) \
510        PCHARSV16(p, offset, len, f); \
511      else \
512        PCHARSV8(p, offset, len, f)
513    
514    #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
515      if (pcre_mode == PCRE32_MODE) \
516        READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
517      else if (pcre_mode == PCRE16_MODE) \
518        READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
519      else \
520        READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
521    
522    #define SET_PCRE_CALLOUT(callout) \
523      if (pcre_mode == PCRE32_MODE) \
524        SET_PCRE_CALLOUT32(callout); \
525      else if (pcre_mode == PCRE16_MODE) \
526        SET_PCRE_CALLOUT16(callout); \
527      else \
528        SET_PCRE_CALLOUT8(callout)
529    
530    #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
531    
532    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
533      if (pcre_mode == PCRE32_MODE) \
534        PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
535      else if (pcre_mode == PCRE16_MODE) \
536        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
537      else \
538        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
539    
540    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
541      if (pcre_mode == PCRE32_MODE) \
542        PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
543      else if (pcre_mode == PCRE16_MODE) \
544        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
545      else \
546        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
547    
548    #define PCRE_CONFIG pcre_config
549    
550    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
551        namesptr, cbuffer, size) \
552      if (pcre_mode == PCRE32_MODE) \
553        PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
554          namesptr, cbuffer, size); \
555      else if (pcre_mode == PCRE16_MODE) \
556        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
557          namesptr, cbuffer, size); \
558      else \
559        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
560          namesptr, cbuffer, size)
561    
562    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
563      if (pcre_mode == PCRE32_MODE) \
564        PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
565      else if (pcre_mode == PCRE16_MODE) \
566        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
567      else \
568        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
569    
570    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
571        offsets, size_offsets, workspace, size_workspace) \
572      if (pcre_mode == PCRE32_MODE) \
573        PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
574          offsets, size_offsets, workspace, size_workspace); \
575      else if (pcre_mode == PCRE16_MODE) \
576        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
577          offsets, size_offsets, workspace, size_workspace); \
578      else \
579        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
580          offsets, size_offsets, workspace, size_workspace)
581    
582    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
583        offsets, size_offsets) \
584      if (pcre_mode == PCRE32_MODE) \
585        PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
586          offsets, size_offsets); \
587      else if (pcre_mode == PCRE16_MODE) \
588        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
589          offsets, size_offsets); \
590      else \
591        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
592          offsets, size_offsets)
593    
594    #define PCRE_FREE_STUDY(extra) \
595      if (pcre_mode == PCRE32_MODE) \
596        PCRE_FREE_STUDY32(extra); \
597      else if (pcre_mode == PCRE16_MODE) \
598        PCRE_FREE_STUDY16(extra); \
599      else \
600        PCRE_FREE_STUDY8(extra)
601    
602    #define PCRE_FREE_SUBSTRING(substring) \
603      if (pcre_mode == PCRE32_MODE) \
604        PCRE_FREE_SUBSTRING32(substring); \
605      else if (pcre_mode == PCRE16_MODE) \
606        PCRE_FREE_SUBSTRING16(substring); \
607      else \
608        PCRE_FREE_SUBSTRING8(substring)
609    
610    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
611      if (pcre_mode == PCRE32_MODE) \
612        PCRE_FREE_SUBSTRING_LIST32(listptr); \
613      else if (pcre_mode == PCRE16_MODE) \
614        PCRE_FREE_SUBSTRING_LIST16(listptr); \
615      else \
616        PCRE_FREE_SUBSTRING_LIST8(listptr)
617    
618    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
619        getnamesptr, subsptr) \
620      if (pcre_mode == PCRE32_MODE) \
621        PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
622          getnamesptr, subsptr); \
623      else if (pcre_mode == PCRE16_MODE) \
624        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
625          getnamesptr, subsptr); \
626      else \
627        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
628          getnamesptr, subsptr)
629    
630    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
631      if (pcre_mode == PCRE32_MODE) \
632        PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
633      else if (pcre_mode == PCRE16_MODE) \
634        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
635      else \
636        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
637    
638    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
639      if (pcre_mode == PCRE32_MODE) \
640        PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
641      else if (pcre_mode == PCRE16_MODE) \
642        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
643      else \
644        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
645    
646    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
647      if (pcre_mode == PCRE32_MODE) \
648        PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
649      else if (pcre_mode == PCRE16_MODE) \
650        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
651      else \
652        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
653    
654    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
655      (pcre_mode == PCRE32_MODE ? \
656         PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
657        : pcre_mode == PCRE16_MODE ? \
658          PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
659          : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
660    
661    #define PCRE_JIT_STACK_FREE(stack) \
662      if (pcre_mode == PCRE32_MODE) \
663        PCRE_JIT_STACK_FREE32(stack); \
664      else if (pcre_mode == PCRE16_MODE) \
665        PCRE_JIT_STACK_FREE16(stack); \
666      else \
667        PCRE_JIT_STACK_FREE8(stack)
668    
669    #define PCRE_MAKETABLES \
670      (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
671    
672    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
673      if (pcre_mode == PCRE32_MODE) \
674        PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
675      else if (pcre_mode == PCRE16_MODE) \
676        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
677      else \
678        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
679    
680    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
681      if (pcre_mode == PCRE32_MODE) \
682        PCRE_PRINTINT32(re, outfile, debug_lengths); \
683      else if (pcre_mode == PCRE16_MODE) \
684        PCRE_PRINTINT16(re, outfile, debug_lengths); \
685      else \
686        PCRE_PRINTINT8(re, outfile, debug_lengths)
687    
688    #define PCRE_STUDY(extra, re, options, error) \
689      if (pcre_mode == PCRE32_MODE) \
690        PCRE_STUDY32(extra, re, options, error); \
691      else if (pcre_mode == PCRE16_MODE) \
692        PCRE_STUDY16(extra, re, options, error); \
693      else \
694        PCRE_STUDY8(extra, re, options, error)
695    
696    
697    /* ----- Two out of three modes are supported ----- */
698    
699    #else
700    
701    /* We can use some macro trickery to make a single set of definitions work in
702    the three different cases. */
703    
704    /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
705    
706    #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
707    #define BITONE 32
708    #define BITTWO 16
709    
710    /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
711    
712    #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
713    #define BITONE 32
714    #define BITTWO 8
715    
716    /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
717    
718    #else
719    #define BITONE 16
720    #define BITTWO 8
721  #endif  #endif
722    
723    #define glue(a,b) a##b
724    #define G(a,b) glue(a,b)
725    
726    
727    /* ----- Common macros for two-mode cases ----- */
728    
729    #define PCHARS(lv, p, offset, len, f) \
730      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
731        G(PCHARS,BITONE)(lv, p, offset, len, f); \
732      else \
733        G(PCHARS,BITTWO)(lv, p, offset, len, f)
734    
735    #define PCHARSV(p, offset, len, f) \
736      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
737        G(PCHARSV,BITONE)(p, offset, len, f); \
738      else \
739        G(PCHARSV,BITTWO)(p, offset, len, f)
740    
741    #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
742      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
743        G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
744      else \
745        G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
746    
747    #define SET_PCRE_CALLOUT(callout) \
748      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
749        G(SET_PCRE_CALLOUT,BITONE)(callout); \
750      else \
751        G(SET_PCRE_CALLOUT,BITTWO)(callout)
752    
753    #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
754      G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
755    
756    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
757      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
758        G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
759      else \
760        G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
761    
762    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
763      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
764        G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
765      else \
766        G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
767    
768    #define PCRE_CONFIG G(G(pcre,BITONE),_config)
769    
770    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
771        namesptr, cbuffer, size) \
772      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
773        G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
774          namesptr, cbuffer, size); \
775      else \
776        G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
777          namesptr, cbuffer, size)
778    
779    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
780      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
781        G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
782      else \
783        G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
784    
785    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
786        offsets, size_offsets, workspace, size_workspace) \
787      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
788        G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
789          offsets, size_offsets, workspace, size_workspace); \
790      else \
791        G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
792          offsets, size_offsets, workspace, size_workspace)
793    
794    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
795        offsets, size_offsets) \
796      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
797        G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
798          offsets, size_offsets); \
799      else \
800        G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
801          offsets, size_offsets)
802    
803    #define PCRE_FREE_STUDY(extra) \
804      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
805        G(PCRE_FREE_STUDY,BITONE)(extra); \
806      else \
807        G(PCRE_FREE_STUDY,BITTWO)(extra)
808    
809    #define PCRE_FREE_SUBSTRING(substring) \
810      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811        G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
812      else \
813        G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
814    
815    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
816      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817        G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
818      else \
819        G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
820    
821    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
822        getnamesptr, subsptr) \
823      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
824        G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
825          getnamesptr, subsptr); \
826      else \
827        G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
828          getnamesptr, subsptr)
829    
830    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
831      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
832        G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
833      else \
834        G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
835    
836    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
837      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838        G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
839      else \
840        G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
841    
842    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
843      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
844        G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
845      else \
846        G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
847    
848    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
849      (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
850         G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
851        : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
852    
853    #define PCRE_JIT_STACK_FREE(stack) \
854      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
855        G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
856      else \
857        G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
858    
859    #define PCRE_MAKETABLES \
860      (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
861        G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
862    
863    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
864      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
865        G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
866      else \
867        G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
868    
869    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
870      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
871        G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
872      else \
873        G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
874    
875    #define PCRE_STUDY(extra, re, options, error) \
876      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
877        G(PCRE_STUDY,BITONE)(extra, re, options, error); \
878      else \
879        G(PCRE_STUDY,BITTWO)(extra, re, options, error)
880    
881    #endif  /* Two out of three modes */
882    
883    /* ----- End of cases where more than one mode is supported ----- */
884    
885    
886    /* ----- Only 8-bit mode is supported ----- */
887    
888    #elif defined SUPPORT_PCRE8
889    #define CHAR_SIZE                 1
890    #define PCHARS                    PCHARS8
891    #define PCHARSV                   PCHARSV8
892    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
893    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
894    #define STRLEN                    STRLEN8
895    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
896    #define PCRE_COMPILE              PCRE_COMPILE8
897    #define PCRE_CONFIG               pcre_config
898    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
899    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
900    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
901    #define PCRE_EXEC                 PCRE_EXEC8
902    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
903    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
904    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
905    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
906    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
907    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
908    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
909    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
910    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
911    #define PCRE_MAKETABLES           pcre_maketables()
912    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
913    #define PCRE_PRINTINT             PCRE_PRINTINT8
914    #define PCRE_STUDY                PCRE_STUDY8
915    
916    /* ----- Only 16-bit mode is supported ----- */
917    
918    #elif defined SUPPORT_PCRE16
919    #define CHAR_SIZE                 2
920    #define PCHARS                    PCHARS16
921    #define PCHARSV                   PCHARSV16
922    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
923    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
924    #define STRLEN                    STRLEN16
925    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
926    #define PCRE_COMPILE              PCRE_COMPILE16
927    #define PCRE_CONFIG               pcre16_config
928    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
929    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
930    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
931    #define PCRE_EXEC                 PCRE_EXEC16
932    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
933    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
934    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
935    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
936    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
937    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
938    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
939    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
940    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
941    #define PCRE_MAKETABLES           pcre16_maketables()
942    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
943    #define PCRE_PRINTINT             PCRE_PRINTINT16
944    #define PCRE_STUDY                PCRE_STUDY16
945    
946    /* ----- Only 32-bit mode is supported ----- */
947    
948    #elif defined SUPPORT_PCRE32
949    #define CHAR_SIZE                 4
950    #define PCHARS                    PCHARS32
951    #define PCHARSV                   PCHARSV32
952    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME32
953    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT32
954    #define STRLEN                    STRLEN32
955    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK32
956    #define PCRE_COMPILE              PCRE_COMPILE32
957    #define PCRE_CONFIG               pcre32_config
958    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
959    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING32
960    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC32
961    #define PCRE_EXEC                 PCRE_EXEC32
962    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY32
963    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING32
964    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST32
965    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING32
966    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER32
967    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING32
968    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST32
969    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC32
970    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE32
971    #define PCRE_MAKETABLES           pcre32_maketables()
972    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
973    #define PCRE_PRINTINT             PCRE_PRINTINT32
974    #define PCRE_STUDY                PCRE_STUDY32
975    
976  #endif  #endif
977    
978    /* ----- End of mode-specific function call macros ----- */
979    
980    
981  /* Other parameters */  /* Other parameters */
982    
# Line 173  UTF8 support if PCRE is built without it Line 988  UTF8 support if PCRE is built without it
988  #endif  #endif
989  #endif  #endif
990    
991    #if !defined NODFA
992    #define DFA_WS_DIMENSION 1000
993    #endif
994    
995  /* This is the default loop count for timing. */  /* This is the default loop count for timing. */
996    
997  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
# Line 187  static int callout_fail_count; Line 1006  static int callout_fail_count;
1006  static int callout_fail_id;  static int callout_fail_id;
1007  static int debug_lengths;  static int debug_lengths;
1008  static int first_callout;  static int first_callout;
1009    static int jit_was_used;
1010  static int locale_set = 0;  static int locale_set = 0;
1011  static int show_malloc;  static int show_malloc;
1012  static int use_utf8;  static int use_utf;
1013  static size_t gotten_store;  static size_t gotten_store;
1014    static size_t first_gotten_store = 0;
1015  static const unsigned char *last_callout_mark = NULL;  static const unsigned char *last_callout_mark = NULL;
1016    
1017  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
1018    
1019  static int buffer_size = 50000;  static int buffer_size = 50000;
1020  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
1021  static uschar *dbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
1022  static uschar *pbuffer = NULL;  
1023    /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1024    
1025    #ifdef COMPILE_PCRE16
1026    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1027    #endif
1028    
1029    #ifdef COMPILE_PCRE32
1030    #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1031    #endif
1032    
1033    /* We need buffers for building 16/32-bit strings, and the tables of operator
1034    lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1035    pattern for saving/reloading testing. Luckily, the data for these tables is
1036    defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1037    are used in the tables) are adjusted appropriately for the 16/32-bit world.
1038    LINK_SIZE is also used later in this program. */
1039    
1040    #ifdef SUPPORT_PCRE16
1041    #undef IMM2_SIZE
1042    #define IMM2_SIZE 1
1043    
1044    #if LINK_SIZE == 2
1045    #undef LINK_SIZE
1046    #define LINK_SIZE 1
1047    #elif LINK_SIZE == 3 || LINK_SIZE == 4
1048    #undef LINK_SIZE
1049    #define LINK_SIZE 2
1050    #else
1051    #error LINK_SIZE must be either 2, 3, or 4
1052    #endif
1053    
1054    static int buffer16_size = 0;
1055    static pcre_uint16 *buffer16 = NULL;
1056    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1057    #endif  /* SUPPORT_PCRE16 */
1058    
1059    #ifdef SUPPORT_PCRE32
1060    #undef IMM2_SIZE
1061    #define IMM2_SIZE 1
1062    #undef LINK_SIZE
1063    #define LINK_SIZE 1
1064    
1065    static int buffer32_size = 0;
1066    static pcre_uint32 *buffer32 = NULL;
1067    static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1068    #endif  /* SUPPORT_PCRE32 */
1069    
1070    /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1071    support, it can be changed by an option. If there is no 8-bit support, there
1072    must be 16-or 32-bit support, so default it to 1. */
1073    
1074    #if defined SUPPORT_PCRE8
1075    static int pcre_mode = PCRE8_MODE;
1076    #elif defined SUPPORT_PCRE16
1077    static int pcre_mode = PCRE16_MODE;
1078    #elif defined SUPPORT_PCRE32
1079    static int pcre_mode = PCRE32_MODE;
1080    #endif
1081    
1082    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1083    
1084    static int jit_study_bits[] =
1085      {
1086      PCRE_STUDY_JIT_COMPILE,
1087      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1088      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1089      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1090      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1091      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1092      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1093        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1094    };
1095    
1096    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1097      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1098    
1099  /* Textual explanations for runtime error codes */  /* Textual explanations for runtime error codes */
1100    
# Line 213  static const char *errtexts[] = { Line 1109  static const char *errtexts[] = {
1109    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
1110    "match limit exceeded",    "match limit exceeded",
1111    "callout error code",    "callout error code",
1112    NULL,  /* BADUTF8 is handled specially */    NULL,  /* BADUTF8/16 is handled specially */
1113    "bad UTF-8 offset",    NULL,  /* BADUTF8/16 offset is handled specially */
1114    NULL,  /* PARTIAL is handled specially */    NULL,  /* PARTIAL is handled specially */
1115    "not used - internal error",    "not used - internal error",
1116    "internal error - pattern overwritten?",    "internal error - pattern overwritten?",
# Line 228  static const char *errtexts[] = { Line 1124  static const char *errtexts[] = {
1124    "not used - internal error",    "not used - internal error",
1125    "invalid combination of newline options",    "invalid combination of newline options",
1126    "bad offset value",    "bad offset value",
1127    NULL,  /* SHORTUTF8 is handled specially */    NULL,  /* SHORTUTF8/16 is handled specially */
1128    "nested recursion at the same subject position"    "nested recursion at the same subject position",
1129      "JIT stack limit reached",
1130      "pattern compiled in wrong mode: 8-bit/16-bit error",
1131      "pattern compiled with other endianness",
1132      "invalid data in workspace for DFA restart"
1133  };  };
1134    
1135    
# Line 245  the L (locale) option also adjusts the t Line 1145  the L (locale) option also adjusts the t
1145  /* This is the set of tables distributed as default with PCRE. It recognizes  /* This is the set of tables distributed as default with PCRE. It recognizes
1146  only ASCII characters. */  only ASCII characters. */
1147    
1148  static const unsigned char tables0[] = {  static const pcre_uint8 tables0[] = {
1149    
1150  /* This table is a lower casing table. */  /* This table is a lower casing table. */
1151    
# Line 418  graph, print, punct, and cntrl. Other cl Line 1318  graph, print, punct, and cntrl. Other cl
1318  be at least an approximation of ISO 8859. In particular, there are characters  be at least an approximation of ISO 8859. In particular, there are characters
1319  greater than 128 that are marked as spaces, letters, etc. */  greater than 128 that are marked as spaces, letters, etc. */
1320    
1321  static const unsigned char tables1[] = {  static const pcre_uint8 tables1[] = {
1322  0,1,2,3,4,5,6,7,  0,1,2,3,4,5,6,7,
1323  8,9,10,11,12,13,14,15,  8,9,10,11,12,13,14,15,
1324  16,17,18,19,20,21,22,23,  16,17,18,19,20,21,22,23,
# Line 581  return sys_errlist[n]; Line 1481  return sys_errlist[n];
1481  #endif /* HAVE_STRERROR */  #endif /* HAVE_STRERROR */
1482    
1483    
1484    
1485    /*************************************************
1486    *       Print newline configuration              *
1487    *************************************************/
1488    
1489    /*
1490    Arguments:
1491      rc         the return code from PCRE_CONFIG_NEWLINE
1492      isc        TRUE if called from "-C newline"
1493    Returns:     nothing
1494    */
1495    
1496    static void
1497    print_newline_config(int rc, BOOL isc)
1498    {
1499    const char *s = NULL;
1500    if (!isc) printf("  Newline sequence is ");
1501    switch(rc)
1502      {
1503      case CHAR_CR: s = "CR"; break;
1504      case CHAR_LF: s = "LF"; break;
1505      case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1506      case -1: s = "ANY"; break;
1507      case -2: s = "ANYCRLF"; break;
1508    
1509      default:
1510      printf("a non-standard value: 0x%04x\n", rc);
1511      return;
1512      }
1513    
1514    printf("%s\n", s);
1515    }
1516    
1517    
1518    
1519  /*************************************************  /*************************************************
1520  *         JIT memory callback                    *  *         JIT memory callback                    *
1521  *************************************************/  *************************************************/
1522    
1523  static pcre_jit_stack* jit_callback(void *arg)  static pcre_jit_stack* jit_callback(void *arg)
1524  {  {
1525    jit_was_used = TRUE;
1526  return (pcre_jit_stack *)arg;  return (pcre_jit_stack *)arg;
1527  }  }
1528    
1529    
1530    #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1531  /*************************************************  /*************************************************
1532  *        Read or extend an input line            *  *            Convert UTF-8 string to value       *
1533  *************************************************/  *************************************************/
1534    
1535  /* Input lines are read into buffer, but both patterns and data lines can be  /* This function takes one or more bytes that represents a UTF-8 character,
1536  continued over multiple input lines. In addition, if the buffer fills up, we  and returns the value of the character.
 want to automatically expand it so as to be able to handle extremely large  
 lines that are needed for certain stress tests. When the input buffer is  
 expanded, the other two buffers must also be expanded likewise, and the  
 contents of pbuffer, which are a copy of the input for callouts, must be  
 preserved (for when expansion happens for a data line). This is not the most  
 optimal way of handling this, but hey, this is just a test program!  
1537    
1538  Arguments:  Argument:
1539    f            the file to read    utf8bytes   a pointer to the byte vector
1540    start        where in buffer to start (this *must* be within buffer)    vptr        a pointer to an int to receive the value
   prompt       for stdin or readline()  
1541    
1542  Returns:       pointer to the start of new data  Returns:      >  0 => the number of bytes consumed
1543                 could be a copy of start, or could be moved                -6 to 0 => malformed UTF-8 character at offset = (-return)
                NULL if no data read and EOF reached  
1544  */  */
1545    
1546  static uschar *  static int
1547  extend_inputline(FILE *f, uschar *start, const char *prompt)  utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1548  {  {
1549  uschar *here = start;  pcre_uint32 c = *utf8bytes++;
1550    pcre_uint32 d = c;
1551    int i, j, s;
1552    
1553  for (;;)  for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1554    {    {
1555    int rlen = (int)(buffer_size - (here - buffer));    if ((d & 0x80) == 0) break;
1556      d <<= 1;
1557    if (rlen > 1000)    }
     {  
     int dlen;  
1558    
1559      /* If libreadline support is required, use readline() to read a line if the  if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1560      input is a terminal. Note that readline() removes the trailing newline, so  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
     we must put it back again, to be compatible with fgets(). */  
1561    
1562  #ifdef SUPPORT_LIBREADLINE  /* i now has a value in the range 1-5 */
     if (isatty(fileno(f)))  
       {  
       size_t len;  
       char *s = readline(prompt);  
       if (s == NULL) return (here == start)? NULL : start;  
       len = strlen(s);  
       if (len > 0) add_history(s);  
       if (len > rlen - 1) len = rlen - 1;  
       memcpy(here, s, len);  
       here[len] = '\n';  
       here[len+1] = 0;  
       free(s);  
       }  
     else  
 #endif  
1563    
1564      /* Read the next line by normal means, prompting if the file is stdin. */  s = 6*i;
1565    d = (c & utf8_table3[i]) << s;
1566    
1567        {  for (j = 0; j < i; j++)
1568        if (f == stdin) printf("%s", prompt);    {
1569        if (fgets((char *)here, rlen,  f) == NULL)    c = *utf8bytes++;
1570          return (here == start)? NULL : start;    if ((c & 0xc0) != 0x80) return -(j+1);
1571        }    s -= 6;
1572      d |= (c & 0x3f) << s;
1573      }
1574    
1575      dlen = (int)strlen((char *)here);  /* Check that encoding was the correct unique one */
     if (dlen > 0 && here[dlen - 1] == '\n') return start;  
     here += dlen;  
     }  
1576    
1577    else  for (j = 0; j < utf8_table1_size; j++)
1578      {    if (d <= (pcre_uint32)utf8_table1[j]) break;
1579      int new_buffer_size = 2*buffer_size;  if (j != i) return -(i+1);
     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);  
     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);  
     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);  
1580    
1581      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)  /* Valid value */
       {  
       fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);  
       exit(1);  
       }  
1582    
1583      memcpy(new_buffer, buffer, buffer_size);  *vptr = d;
1584      memcpy(new_pbuffer, pbuffer, buffer_size);  return i+1;
1585    }
1586    #endif /* NOUTF || SUPPORT_PCRE16 */
1587    
     buffer_size = new_buffer_size;  
1588    
     start = new_buffer + (start - buffer);  
     here = new_buffer + (here - buffer);  
1589    
1590      free(buffer);  #if defined SUPPORT_PCRE8 && !defined NOUTF
1591      free(dbuffer);  /*************************************************
1592      free(pbuffer);  *       Convert character value to UTF-8         *
1593    *************************************************/
1594    
1595      buffer = new_buffer;  /* This function takes an integer value in the range 0 - 0x7fffffff
1596      dbuffer = new_dbuffer;  and encodes it as a UTF-8 character in 0 to 6 bytes.
1597      pbuffer = new_pbuffer;  
1598    Arguments:
1599      cvalue     the character value
1600      utf8bytes  pointer to buffer for result - at least 6 bytes long
1601    
1602    Returns:     number of characters placed in the buffer
1603    */
1604    
1605    static int
1606    ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1607    {
1608    register int i, j;
1609    if (cvalue > 0x7fffffffu)
1610      return -1;
1611    for (i = 0; i < utf8_table1_size; i++)
1612      if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1613    utf8bytes += i;
1614    for (j = i; j > 0; j--)
1615     {
1616     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1617     cvalue >>= 6;
1618     }
1619    *utf8bytes = utf8_table2[i] | cvalue;
1620    return i + 1;
1621    }
1622    #endif
1623    
1624    
1625    #ifdef SUPPORT_PCRE16
1626    /*************************************************
1627    *         Convert a string to 16-bit             *
1628    *************************************************/
1629    
1630    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1631    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1632    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1633    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1634    result is always left in buffer16.
1635    
1636    Note that this function does not object to surrogate values. This is
1637    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1638    for the purpose of testing that they are correctly faulted.
1639    
1640    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1641    in UTF-8 so that values greater than 255 can be handled.
1642    
1643    Arguments:
1644      data       TRUE if converting a data line; FALSE for a regex
1645      p          points to a byte string
1646      utf        true if UTF-8 (to be converted to UTF-16)
1647      len        number of bytes in the string (excluding trailing zero)
1648    
1649    Returns:     number of 16-bit data items used (excluding trailing zero)
1650                 OR -1 if a UTF-8 string is malformed
1651                 OR -2 if a value > 0x10ffff is encountered
1652                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1653    */
1654    
1655    static int
1656    to16(int data, pcre_uint8 *p, int utf, int len)
1657    {
1658    pcre_uint16 *pp;
1659    
1660    if (buffer16_size < 2*len + 2)
1661      {
1662      if (buffer16 != NULL) free(buffer16);
1663      buffer16_size = 2*len + 2;
1664      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1665      if (buffer16 == NULL)
1666        {
1667        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1668        exit(1);
1669      }      }
1670    }    }
1671    
1672  return NULL;  /* Control never gets here */  pp = buffer16;
1673    
1674    if (!utf && !data)
1675      {
1676      while (len-- > 0) *pp++ = *p++;
1677      }
1678    
1679    else
1680      {
1681      pcre_uint32 c = 0;
1682      while (len > 0)
1683        {
1684        int chlen = utf82ord(p, &c);
1685        if (chlen <= 0) return -1;
1686        if (c > 0x10ffff) return -2;
1687        p += chlen;
1688        len -= chlen;
1689        if (c < 0x10000) *pp++ = c; else
1690          {
1691          if (!utf) return -3;
1692          c -= 0x10000;
1693          *pp++ = 0xD800 | (c >> 10);
1694          *pp++ = 0xDC00 | (c & 0x3ff);
1695          }
1696        }
1697      }
1698    
1699    *pp = 0;
1700    return pp - buffer16;
1701  }  }
1702    #endif
1703    
1704    #ifdef SUPPORT_PCRE32
1705    /*************************************************
1706    *         Convert a string to 32-bit             *
1707    *************************************************/
1708    
1709    /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1710    8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1711    times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1712    in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1713    result is always left in buffer32.
1714    
1715    Note that this function does not object to surrogate values. This is
1716    deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1717    for the purpose of testing that they are correctly faulted.
1718    
1719    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1720    in UTF-8 so that values greater than 255 can be handled.
1721    
1722    Arguments:
1723      data       TRUE if converting a data line; FALSE for a regex
1724      p          points to a byte string
1725      utf        true if UTF-8 (to be converted to UTF-32)
1726      len        number of bytes in the string (excluding trailing zero)
1727    
1728    Returns:     number of 32-bit data items used (excluding trailing zero)
1729                 OR -1 if a UTF-8 string is malformed
1730                 OR -2 if a value > 0x10ffff is encountered
1731                 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1732    */
1733    
1734    static int
1735    to32(int data, pcre_uint8 *p, int utf, int len)
1736    {
1737    pcre_uint32 *pp;
1738    
1739    if (buffer32_size < 4*len + 4)
1740      {
1741      if (buffer32 != NULL) free(buffer32);
1742      buffer32_size = 4*len + 4;
1743      buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1744      if (buffer32 == NULL)
1745        {
1746        fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1747        exit(1);
1748        }
1749      }
1750    
1751    pp = buffer32;
1752    
1753    if (!utf && !data)
1754      {
1755      while (len-- > 0) *pp++ = *p++;
1756      }
1757    
1758    else
1759      {
1760      pcre_uint32 c = 0;
1761      while (len > 0)
1762        {
1763        int chlen = utf82ord(p, &c);
1764        if (chlen <= 0) return -1;
1765        if (utf)
1766          {
1767          if (c > 0x10ffff) return -2;
1768          if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1769          }
1770    
1771        p += chlen;
1772        len -= chlen;
1773        *pp++ = c;
1774        }
1775      }
1776    
1777    *pp = 0;
1778    return pp - buffer32;
1779    }
1780    
1781    /* Check that a 32-bit character string is valid UTF-32.
1782    
1783    Arguments:
1784      string       points to the string
1785      length       length of string, or -1 if the string is zero-terminated
1786    
1787    Returns:       TRUE  if the string is a valid UTF-32 string
1788                   FALSE otherwise
1789    */
1790    
1791    #ifdef SUPPORT_UTF
1792    static BOOL
1793    valid_utf32(pcre_uint32 *string, int length)
1794    {
1795    register pcre_uint32 *p;
1796    register pcre_uint32 c;
1797    
1798    for (p = string; length-- > 0; p++)
1799      {
1800      c = *p;
1801    
1802      if (c > 0x10ffffu)
1803        return FALSE;
1804    
1805      /* A surrogate */
1806      if ((c & 0xfffff800u) == 0xd800u)
1807        return FALSE;
1808    
1809      /* Non-character */
1810      if ((c & 0xfffeu) == 0xfffeu || (c >= 0xfdd0u && c <= 0xfdefu))
1811        return FALSE;
1812      }
1813    
1814    return TRUE;
1815    }
1816    #endif /* SUPPORT_UTF */
1817    
1818    #endif
1819    
1820    /*************************************************
1821    *        Read or extend an input line            *
1822    *************************************************/
1823    
1824    /* Input lines are read into buffer, but both patterns and data lines can be
1825    continued over multiple input lines. In addition, if the buffer fills up, we
1826    want to automatically expand it so as to be able to handle extremely large
1827    lines that are needed for certain stress tests. When the input buffer is
1828    expanded, the other two buffers must also be expanded likewise, and the
1829    contents of pbuffer, which are a copy of the input for callouts, must be
1830    preserved (for when expansion happens for a data line). This is not the most
1831    optimal way of handling this, but hey, this is just a test program!
1832    
1833    Arguments:
1834      f            the file to read
1835      start        where in buffer to start (this *must* be within buffer)
1836      prompt       for stdin or readline()
1837    
1838    Returns:       pointer to the start of new data
1839                   could be a copy of start, or could be moved
1840                   NULL if no data read and EOF reached
1841    */
1842    
1843    static pcre_uint8 *
1844    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1845    {
1846    pcre_uint8 *here = start;
1847    
1848    for (;;)
1849      {
1850      size_t rlen = (size_t)(buffer_size - (here - buffer));
1851    
1852      if (rlen > 1000)
1853        {
1854        int dlen;
1855    
1856        /* If libreadline or libedit support is required, use readline() to read a
1857        line if the input is a terminal. Note that readline() removes the trailing
1858        newline, so we must put it back again, to be compatible with fgets(). */
1859    
1860    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1861        if (isatty(fileno(f)))
1862          {
1863          size_t len;
1864          char *s = readline(prompt);
1865          if (s == NULL) return (here == start)? NULL : start;
1866          len = strlen(s);
1867          if (len > 0) add_history(s);
1868          if (len > rlen - 1) len = rlen - 1;
1869          memcpy(here, s, len);
1870          here[len] = '\n';
1871          here[len+1] = 0;
1872          free(s);
1873          }
1874        else
1875    #endif
1876    
1877        /* Read the next line by normal means, prompting if the file is stdin. */
1878    
1879          {
1880          if (f == stdin) printf("%s", prompt);
1881          if (fgets((char *)here, rlen,  f) == NULL)
1882            return (here == start)? NULL : start;
1883          }
1884    
1885        dlen = (int)strlen((char *)here);
1886        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1887        here += dlen;
1888        }
1889    
1890      else
1891        {
1892        int new_buffer_size = 2*buffer_size;
1893        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1894        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1895    
1896        if (new_buffer == NULL || new_pbuffer == NULL)
1897          {
1898          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1899          exit(1);
1900          }
1901    
1902        memcpy(new_buffer, buffer, buffer_size);
1903        memcpy(new_pbuffer, pbuffer, buffer_size);
1904    
1905        buffer_size = new_buffer_size;
1906    
1907        start = new_buffer + (start - buffer);
1908        here = new_buffer + (here - buffer);
1909    
1910        free(buffer);
1911        free(pbuffer);
1912    
1913        buffer = new_buffer;
1914        pbuffer = new_pbuffer;
1915        }
1916      }
1917    
1918    return NULL;  /* Control never gets here */
1919    }
1920    
1921    
1922    
# Line 717  Returns:        the unsigned long Line 1936  Returns:        the unsigned long
1936  */  */
1937    
1938  static int  static int
1939  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1940  {  {
1941  int result = 0;  int result = 0;
1942  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 728  return(result); Line 1947  return(result);
1947    
1948    
1949    
   
1950  /*************************************************  /*************************************************
1951  *            Convert UTF-8 string to value       *  *             Print one character                *
1952  *************************************************/  *************************************************/
1953    
1954  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
   
 Argument:  
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
   
 Returns:      >  0 => the number of bytes consumed  
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
1955    
1956  static int  static int pchar(pcre_uint32 c, FILE *f)
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1957  {  {
1958  int c = *utf8bytes++;  int n = 0;
1959  int d = c;  if (PRINTOK(c))
1960  int i, j, s;    {
1961      if (f != NULL) fprintf(f, "%c", c);
1962      return 1;
1963      }
1964    
1965  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  if (c < 0x100)
1966    {    {
1967    if ((d & 0x80) == 0) break;    if (use_utf)
1968    d <<= 1;      {
1969        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1970        return 6;
1971        }
1972      else
1973        {
1974        if (f != NULL) fprintf(f, "\\x%02x", c);
1975        return 4;
1976        }
1977    }    }
1978    
1979  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1980  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  return n >= 0 ? n : 0;
1981    }
1982    
 /* i now has a value in the range 1-5 */  
1983    
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
1984    
1985  for (j = 0; j < i; j++)  #ifdef SUPPORT_PCRE8
1986    {  /*************************************************
1987    c = *utf8bytes++;  *         Print 8-bit character string           *
1988    if ((c & 0xc0) != 0x80) return -(j+1);  *************************************************/
   s -= 6;  
   d |= (c & 0x3f) << s;  
   }  
1989    
1990  /* Check that encoding was the correct unique one */  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1991    If handed a NULL file, just counts chars without printing. */
1992    
1993  for (j = 0; j < utf8_table1_size; j++)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1994    if (d <= utf8_table1[j]) break;  {
1995  if (j != i) return -(i+1);  pcre_uint32 c = 0;
1996    int yield = 0;
1997    
1998  /* Valid value */  if (length < 0)
1999      length = strlen((char *)p);
2000    
2001  *vptr = d;  while (length-- > 0)
2002  return i+1;    {
2003  }  #if !defined NOUTF
2004      if (use_utf)
2005        {
2006        int rc = utf82ord(p, &c);
2007        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
2008          {
2009          length -= rc - 1;
2010          p += rc;
2011          yield += pchar(c, f);
2012          continue;
2013          }
2014        }
2015    #endif
2016      c = *p++;
2017      yield += pchar(c, f);
2018      }
2019    
2020    return yield;
2021    }
2022  #endif  #endif
2023    
2024    
2025    
2026    #ifdef SUPPORT_PCRE16
2027  /*************************************************  /*************************************************
2028  *       Convert character value to UTF-8         *  *    Find length of 0-terminated 16-bit string   *
2029  *************************************************/  *************************************************/
2030    
2031  /* This function takes an integer value in the range 0 - 0x7fffffff  static int strlen16(PCRE_SPTR16 p)
2032  and encodes it as a UTF-8 character in 0 to 6 bytes.  {
2033    int len = 0;
2034    while (*p++ != 0) len++;
2035    return len;
2036    }
2037    #endif  /* SUPPORT_PCRE16 */
2038    
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
2039    
 Returns:     number of characters placed in the buffer  
 */  
2040    
2041  #if !defined NOUTF8  #ifdef SUPPORT_PCRE32
2042    /*************************************************
2043    *    Find length of 0-terminated 32-bit string   *
2044    *************************************************/
2045    
2046  static int  static int strlen32(PCRE_SPTR32 p)
 ord2utf8(int cvalue, uschar *utf8bytes)  
2047  {  {
2048  register int i, j;  int len = 0;
2049  for (i = 0; i < utf8_table1_size; i++)  while (*p++ != 0) len++;
2050    if (cvalue <= utf8_table1[i]) break;  return len;
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
2051  }  }
2052    #endif  /* SUPPORT_PCRE32 */
 #endif  
2053    
2054    
2055    
2056    #ifdef SUPPORT_PCRE16
2057  /*************************************************  /*************************************************
2058  *             Print character string             *  *           Print 16-bit character string        *
2059  *************************************************/  *************************************************/
2060    
2061  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2062  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
2063    
2064  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2065  {  {
 int c = 0;  
2066  int yield = 0;  int yield = 0;
2067    
2068    if (length < 0)
2069      length = strlen16(p);
2070    
2071  while (length-- > 0)  while (length-- > 0)
2072    {    {
2073  #if !defined NOUTF8    pcre_uint32 c = *p++ & 0xffff;
2074    if (use_utf8)  #if !defined NOUTF
2075      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2076      {      {
2077      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
2078        if (d >= 0xDC00 && d < 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
2079        {        {
2080        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2081        p += rc;        length--;
2082        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
2083        }        }
2084      }      }
2085  #endif  #endif
2086      yield += pchar(c, f);
2087      }
2088    
2089     /* Not UTF-8, or malformed UTF-8  */  return yield;
2090    }
2091    #endif  /* SUPPORT_PCRE16 */
2092    
2093    c = *p++;  
2094    if (PRINTHEX(c))  
2095      {  #ifdef SUPPORT_PCRE32
2096      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
2097      yield++;  *           Print 32-bit character string        *
2098      }  *************************************************/
2099    else  
2100      {  /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2101      if (f != NULL) fprintf(f, "\\x%02x", c);  If handed a NULL file, just counts chars without printing. */
2102      yield += 4;  
2103      }  #define UTF32_MASK (0x1fffffu)
2104    
2105    static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2106    {
2107    int yield = 0;
2108    
2109    if (length < 0)
2110      length = strlen32(p);
2111    
2112    while (length-- > 0)
2113      {
2114      pcre_uint32 c = *p++;
2115      if (utf) c &= UTF32_MASK;
2116      yield += pchar(c, f);
2117    }    }
2118    
2119  return yield;  return yield;
2120  }  }
2121    #endif  /* SUPPORT_PCRE32 */
2122    
2123    
2124    
2125    #ifdef SUPPORT_PCRE8
2126    /*************************************************
2127    *     Read a capture name (8-bit) and check it   *
2128    *************************************************/
2129    
2130    static pcre_uint8 *
2131    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2132    {
2133    pcre_uint8 *npp = *pp;
2134    while (isalnum(*p)) *npp++ = *p++;
2135    *npp++ = 0;
2136    *npp = 0;
2137    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2138      {
2139      fprintf(outfile, "no parentheses with name \"");
2140      PCHARSV(*pp, 0, -1, outfile);
2141      fprintf(outfile, "\"\n");
2142      }
2143    
2144    *pp = npp;
2145    return p;
2146    }
2147    #endif  /* SUPPORT_PCRE8 */
2148    
2149    
2150    
2151    #ifdef SUPPORT_PCRE16
2152    /*************************************************
2153    *     Read a capture name (16-bit) and check it  *
2154    *************************************************/
2155    
2156    /* Note that the text being read is 8-bit. */
2157    
2158    static pcre_uint8 *
2159    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2160    {
2161    pcre_uint16 *npp = *pp;
2162    while (isalnum(*p)) *npp++ = *p++;
2163    *npp++ = 0;
2164    *npp = 0;
2165    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2166      {
2167      fprintf(outfile, "no parentheses with name \"");
2168      PCHARSV(*pp, 0, -1, outfile);
2169      fprintf(outfile, "\"\n");
2170      }
2171    *pp = npp;
2172    return p;
2173    }
2174    #endif  /* SUPPORT_PCRE16 */
2175    
2176    
2177    
2178    #ifdef SUPPORT_PCRE32
2179    /*************************************************
2180    *     Read a capture name (32-bit) and check it  *
2181    *************************************************/
2182    
2183    /* Note that the text being read is 8-bit. */
2184    
2185    static pcre_uint8 *
2186    read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2187    {
2188    pcre_uint32 *npp = *pp;
2189    while (isalnum(*p)) *npp++ = *p++;
2190    *npp++ = 0;
2191    *npp = 0;
2192    if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2193      {
2194      fprintf(outfile, "no parentheses with name \"");
2195      PCHARSV(*pp, 0, -1, outfile);
2196      fprintf(outfile, "\"\n");
2197      }
2198    *pp = npp;
2199    return p;
2200    }
2201    #endif  /* SUPPORT_PCRE32 */
2202    
2203    
2204    
# Line 915  if (callout_extra) Line 2227  if (callout_extra)
2227      else      else
2228        {        {
2229        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
2230        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
2231          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
2232        fprintf(f, "\n");        fprintf(f, "\n");
2233        }        }
# Line 928  printed lengths of the substrings. */ Line 2240  printed lengths of the substrings. */
2240    
2241  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
2242    
2243  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2244  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
2245    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
2246    
2247  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2248    
2249  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
2250    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
2251    
2252  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 973  first_callout = 0; Line 2285  first_callout = 0;
2285    
2286  if (cb->mark != last_callout_mark)  if (cb->mark != last_callout_mark)
2287    {    {
2288    fprintf(outfile, "Latest Mark: %s\n",    if (cb->mark == NULL)
2289      (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));      fprintf(outfile, "Latest Mark: <unset>\n");
2290      else
2291        {
2292        fprintf(outfile, "Latest Mark: ");
2293        PCHARSV(cb->mark, 0, -1, outfile);
2294        putc('\n', outfile);
2295        }
2296    last_callout_mark = cb->mark;    last_callout_mark = cb->mark;
2297    }    }
2298    
2299  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
2300      {
2301      int callout_data = *((int *)(cb->callout_data));
2302      if (callout_data != 0)
2303        {
2304        fprintf(outfile, "Callout data = %d\n", callout_data);
2305        return callout_data;
2306        }
2307      }
2308    
2309    return (cb->callout_number != callout_fail_id)? 0 :
2310           (++callout_count >= callout_fail_count)? 1 : 0;
2311    }
2312    
2313    
2314    /*************************************************
2315    *            Local malloc functions              *
2316    *************************************************/
2317    
2318    /* Alternative malloc function, to test functionality and save the size of a
2319    compiled re, which is the first store request that pcre_compile() makes. The
2320    show_malloc variable is set only during matching. */
2321    
2322    static void *new_malloc(size_t size)
2323    {
2324    void *block = malloc(size);
2325    gotten_store = size;
2326    if (first_gotten_store == 0) first_gotten_store = size;
2327    if (show_malloc)
2328      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
2329    return block;
2330    }
2331    
2332    static void new_free(void *block)
2333    {
2334    if (show_malloc)
2335      fprintf(outfile, "free             %p\n", block);
2336    free(block);
2337    }
2338    
2339    /* For recursion malloc/free, to test stacking calls */
2340    
2341    static void *stack_malloc(size_t size)
2342    {
2343    void *block = malloc(size);
2344    if (show_malloc)
2345      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2346    return block;
2347    }
2348    
2349    static void stack_free(void *block)
2350    {
2351    if (show_malloc)
2352      fprintf(outfile, "stack_free       %p\n", block);
2353    free(block);
2354    }
2355    
2356    
2357    /*************************************************
2358    *          Call pcre_fullinfo()                  *
2359    *************************************************/
2360    
2361    /* Get one piece of information from the pcre_fullinfo() function. When only
2362    one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2363    value, but the code is defensive.
2364    
2365    Arguments:
2366      re        compiled regex
2367      study     study data
2368      option    PCRE_INFO_xxx option
2369      ptr       where to put the data
2370    
2371    Returns:    0 when OK, < 0 on error
2372    */
2373    
2374    static int
2375    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2376    {
2377    int rc;
2378    
2379    if (pcre_mode == PCRE32_MODE)
2380    #ifdef SUPPORT_PCRE32
2381      rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2382    #else
2383      rc = PCRE_ERROR_BADMODE;
2384    #endif
2385    else if (pcre_mode == PCRE16_MODE)
2386    #ifdef SUPPORT_PCRE16
2387      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2388    #else
2389      rc = PCRE_ERROR_BADMODE;
2390    #endif
2391    else
2392    #ifdef SUPPORT_PCRE8
2393      rc = pcre_fullinfo(re, study, option, ptr);
2394    #else
2395      rc = PCRE_ERROR_BADMODE;
2396    #endif
2397    
2398    if (rc < 0)
2399      {
2400      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2401        pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2402      if (rc == PCRE_ERROR_BADMODE)
2403        fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2404          "%d-bit mode\n", 8 * CHAR_SIZE,
2405          8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2406      }
2407    
2408    return rc;
2409    }
2410    
2411    
2412    
2413    /*************************************************
2414    *             Swap byte functions                *
2415    *************************************************/
2416    
2417    /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2418    value, respectively.
2419    
2420    Arguments:
2421      value        any number
2422    
2423    Returns:       the byte swapped value
2424    */
2425    
2426    static pcre_uint32
2427    swap_uint32(pcre_uint32 value)
2428    {
2429    return ((value & 0x000000ff) << 24) |
2430           ((value & 0x0000ff00) <<  8) |
2431           ((value & 0x00ff0000) >>  8) |
2432           (value >> 24);
2433    }
2434    
2435    static pcre_uint16
2436    swap_uint16(pcre_uint16 value)
2437    {
2438    return (value >> 8) | (value << 8);
2439    }
2440    
2441    
2442    
2443    /*************************************************
2444    *        Flip bytes in a compiled pattern        *
2445    *************************************************/
2446    
2447    /* This function is called if the 'F' option was present on a pattern that is
2448    to be written to a file. We flip the bytes of all the integer fields in the
2449    regex data block and the study block. In 16-bit mode this also flips relevant
2450    bytes in the pattern itself. This is to make it possible to test PCRE's
2451    ability to reload byte-flipped patterns, e.g. those compiled on a different
2452    architecture. */
2453    
2454    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2455    static void
2456    regexflip8_or_16(pcre *ere, pcre_extra *extra)
2457    {
2458    real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2459    #ifdef SUPPORT_PCRE16
2460    int op;
2461    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2462    int length = re->name_count * re->name_entry_size;
2463    #ifdef SUPPORT_UTF
2464    BOOL utf = (re->options & PCRE_UTF16) != 0;
2465    BOOL utf16_char = FALSE;
2466    #endif /* SUPPORT_UTF */
2467    #endif /* SUPPORT_PCRE16 */
2468    
2469    /* Always flip the bytes in the main data block and study blocks. */
2470    
2471    re->magic_number = REVERSED_MAGIC_NUMBER;
2472    re->size = swap_uint32(re->size);
2473    re->options = swap_uint32(re->options);
2474    re->flags = swap_uint16(re->flags);
2475    re->top_bracket = swap_uint16(re->top_bracket);
2476    re->top_backref = swap_uint16(re->top_backref);
2477    re->first_char = swap_uint16(re->first_char);
2478    re->req_char = swap_uint16(re->req_char);
2479    re->name_table_offset = swap_uint16(re->name_table_offset);
2480    re->name_entry_size = swap_uint16(re->name_entry_size);
2481    re->name_count = swap_uint16(re->name_count);
2482    
2483    if (extra != NULL)
2484      {
2485      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2486      rsd->size = swap_uint32(rsd->size);
2487      rsd->flags = swap_uint32(rsd->flags);
2488      rsd->minlength = swap_uint32(rsd->minlength);
2489      }
2490    
2491    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2492    in the name table, if present, and then in the pattern itself. */
2493    
2494    #ifdef SUPPORT_PCRE16
2495    if (pcre_mode != PCRE16_MODE) return;
2496    
2497    while(TRUE)
2498    {    {
2499    int callout_data = *((int *)(cb->callout_data));    /* Swap previous characters. */
2500    if (callout_data != 0)    while (length-- > 0)
2501      {      {
2502      fprintf(outfile, "Callout data = %d\n", callout_data);      *ptr = swap_uint16(*ptr);
2503      return callout_data;      ptr++;
2504      }      }
2505    }  #ifdef SUPPORT_UTF
2506      if (utf16_char)
2507  return (cb->callout_number != callout_fail_id)? 0 :      {
2508         (++callout_count >= callout_fail_count)? 1 : 0;      if ((ptr[-1] & 0xfc00) == 0xd800)
2509  }        {
2510          /* We know that there is only one extra character in UTF-16. */
2511          *ptr = swap_uint16(*ptr);
2512          ptr++;
2513          }
2514        }
2515      utf16_char = FALSE;
2516    #endif /* SUPPORT_UTF */
2517    
2518      /* Get next opcode. */
2519    
2520  /*************************************************    length = 0;
2521  *            Local malloc functions              *    op = *ptr;
2522  *************************************************/    *ptr++ = swap_uint16(op);
2523    
2524  /* Alternative malloc function, to test functionality and save the size of a    switch (op)
2525  compiled re. The show_malloc variable is set only during matching. */      {
2526        case OP_END:
2527        return;
2528    
2529  static void *new_malloc(size_t size)  #ifdef SUPPORT_UTF
2530  {      case OP_CHAR:
2531  void *block = malloc(size);      case OP_CHARI:
2532  gotten_store = size;      case OP_NOT:
2533  if (show_malloc)      case OP_NOTI:
2534    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);      case OP_STAR:
2535  return block;      case OP_MINSTAR:
2536  }      case OP_PLUS:
2537        case OP_MINPLUS:
2538        case OP_QUERY:
2539        case OP_MINQUERY:
2540        case OP_UPTO:
2541        case OP_MINUPTO:
2542        case OP_EXACT:
2543        case OP_POSSTAR:
2544        case OP_POSPLUS:
2545        case OP_POSQUERY:
2546        case OP_POSUPTO:
2547        case OP_STARI:
2548        case OP_MINSTARI:
2549        case OP_PLUSI:
2550        case OP_MINPLUSI:
2551        case OP_QUERYI:
2552        case OP_MINQUERYI:
2553        case OP_UPTOI:
2554        case OP_MINUPTOI:
2555        case OP_EXACTI:
2556        case OP_POSSTARI:
2557        case OP_POSPLUSI:
2558        case OP_POSQUERYI:
2559        case OP_POSUPTOI:
2560        case OP_NOTSTAR:
2561        case OP_NOTMINSTAR:
2562        case OP_NOTPLUS:
2563        case OP_NOTMINPLUS:
2564        case OP_NOTQUERY:
2565        case OP_NOTMINQUERY:
2566        case OP_NOTUPTO:
2567        case OP_NOTMINUPTO:
2568        case OP_NOTEXACT:
2569        case OP_NOTPOSSTAR:
2570        case OP_NOTPOSPLUS:
2571        case OP_NOTPOSQUERY:
2572        case OP_NOTPOSUPTO:
2573        case OP_NOTSTARI:
2574        case OP_NOTMINSTARI:
2575        case OP_NOTPLUSI:
2576        case OP_NOTMINPLUSI:
2577        case OP_NOTQUERYI:
2578        case OP_NOTMINQUERYI:
2579        case OP_NOTUPTOI:
2580        case OP_NOTMINUPTOI:
2581        case OP_NOTEXACTI:
2582        case OP_NOTPOSSTARI:
2583        case OP_NOTPOSPLUSI:
2584        case OP_NOTPOSQUERYI:
2585        case OP_NOTPOSUPTOI:
2586        if (utf) utf16_char = TRUE;
2587    #endif
2588        /* Fall through. */
2589    
2590  static void new_free(void *block)      default:
2591  {      length = OP_lengths16[op] - 1;
2592  if (show_malloc)      break;
2593    fprintf(outfile, "free             %p\n", block);  
2594  free(block);      case OP_CLASS:
2595  }      case OP_NCLASS:
2596        /* Skip the character bit map. */
2597        ptr += 32/sizeof(pcre_uint16);
2598        length = 0;
2599        break;
2600    
2601        case OP_XCLASS:
2602        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2603        if (LINK_SIZE > 1)
2604          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2605            - (1 + LINK_SIZE + 1));
2606        else
2607          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2608    
2609  /* For recursion malloc/free, to test stacking calls */      /* Reverse the size of the XCLASS instance. */
2610        *ptr = swap_uint16(*ptr);
2611        ptr++;
2612        if (LINK_SIZE > 1)
2613          {
2614          *ptr = swap_uint16(*ptr);
2615          ptr++;
2616          }
2617    
2618  static void *stack_malloc(size_t size)      op = *ptr;
2619  {      *ptr = swap_uint16(op);
2620  void *block = malloc(size);      ptr++;
2621  if (show_malloc)      if ((op & XCL_MAP) != 0)
2622    fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);        {
2623  return block;        /* Skip the character bit map. */
2624          ptr += 32/sizeof(pcre_uint16);
2625          length -= 32/sizeof(pcre_uint16);
2626          }
2627        break;
2628        }
2629      }
2630    /* Control should never reach here in 16 bit mode. */
2631    #endif /* SUPPORT_PCRE16 */
2632  }  }
2633    #endif /* SUPPORT_PCRE[8|16] */
2634    
 static void stack_free(void *block)  
 {  
 if (show_malloc)  
   fprintf(outfile, "stack_free       %p\n", block);  
 free(block);  
 }  
2635    
2636    
2637  /*************************************************  #if defined SUPPORT_PCRE32
2638  *          Call pcre_fullinfo()                  *  static void
2639  *************************************************/  regexflip_32(pcre *ere, pcre_extra *extra)
2640    {
2641    real_pcre32 *re = (real_pcre32 *)ere;
2642    int op;
2643    pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2644    int length = re->name_count * re->name_entry_size;
2645    
2646    /* Always flip the bytes in the main data block and study blocks. */
2647    
2648    re->magic_number = REVERSED_MAGIC_NUMBER;
2649    re->size = swap_uint32(re->size);
2650    re->options = swap_uint32(re->options);
2651    re->flags = swap_uint16(re->flags);
2652    re->top_bracket = swap_uint16(re->top_bracket);
2653    re->top_backref = swap_uint16(re->top_backref);
2654    re->first_char = swap_uint32(re->first_char);
2655    re->req_char = swap_uint32(re->req_char);
2656    re->name_table_offset = swap_uint16(re->name_table_offset);
2657    re->name_entry_size = swap_uint16(re->name_entry_size);
2658    re->name_count = swap_uint16(re->name_count);
2659    
2660  /* Get one piece of information from the pcre_fullinfo() function */  if (extra != NULL)
2661      {
2662      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2663      rsd->size = swap_uint32(rsd->size);
2664      rsd->flags = swap_uint32(rsd->flags);
2665      rsd->minlength = swap_uint32(rsd->minlength);
2666      }
2667    
2668  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2669  {  the pattern itself. */
 int rc;  
 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
   fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  
 }  
2670    
2671    while(TRUE)
2672      {
2673      /* Swap previous characters. */
2674      while (length-- > 0)
2675        {
2676        *ptr = swap_uint32(*ptr);
2677        ptr++;
2678        }
2679    
2680      /* Get next opcode. */
2681    
2682  /*************************************************    length = 0;
2683  *      Check for supported JIT architecture      *    op = *ptr;
2684  *************************************************/    *ptr++ = swap_uint32(op);
2685    
2686  /* If it won't JIT-compile a very simple regex, return FALSE. */    switch (op)
2687        {
2688        case OP_END:
2689        return;
2690    
2691  static int check_jit_arch(void)      default:
2692  {      length = OP_lengths32[op] - 1;
2693  const char *error;      break;
2694  int erroffset, rc;  
2695  pcre *re = pcre_compile("abc", 0, &error, &erroffset, NULL);      case OP_CLASS:
2696  pcre_extra *extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);      case OP_NCLASS:
2697  rc = extra != NULL && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&      /* Skip the character bit map. */
2698    extra->executable_jit != NULL;      ptr += 32/sizeof(pcre_uint32);
2699  pcre_free_study(extra);      length = 0;
2700  free(re);      break;
2701  return rc;  
2702        case OP_XCLASS:
2703        /* LINK_SIZE can only be 1 in 32-bit mode. */
2704        length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2705    
2706        /* Reverse the size of the XCLASS instance. */
2707        *ptr = swap_uint32(*ptr);
2708        ptr++;
2709    
2710        op = *ptr;
2711        *ptr = swap_uint32(op);
2712        ptr++;
2713        if ((op & XCL_MAP) != 0)
2714          {
2715          /* Skip the character bit map. */
2716          ptr += 32/sizeof(pcre_uint32);
2717          length -= 32/sizeof(pcre_uint32);
2718          }
2719        break;
2720        }
2721      }
2722    /* Control should never reach here in 32 bit mode. */
2723  }  }
2724    
2725    #endif /* SUPPORT_PCRE32 */
2726    
 /*************************************************  
 *         Byte flipping function                 *  
 *************************************************/  
2727    
2728  static unsigned long int  
2729  byteflip(unsigned long int value, int n)  static void
2730    regexflip(pcre *ere, pcre_extra *extra)
2731  {  {
2732  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  #if defined SUPPORT_PCRE32
2733  return ((value & 0x000000ff) << 24) |    if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2734         ((value & 0x0000ff00) <<  8) |      regexflip_32(ere, extra);
2735         ((value & 0x00ff0000) >>  8) |  #endif
2736         ((value & 0xff000000) >> 24);  #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2737      if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2738        regexflip8_or_16(ere, extra);
2739    #endif
2740  }  }
2741    
2742    
2743    
   
2744  /*************************************************  /*************************************************
2745  *        Check match or recursion limit          *  *        Check match or recursion limit          *
2746  *************************************************/  *************************************************/
2747    
2748  static int  static int
2749  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2750    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
2751    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
2752  {  {
# Line 1106  for (;;) Line 2761  for (;;)
2761    {    {
2762    *limit = mid;    *limit = mid;
2763    
2764    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2765      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2766    
2767    if (count == errnumber)    if (count == errnumber)
# Line 1151  Returns:    < 0, = 0, or > 0, according Line 2806  Returns:    < 0, = 0, or > 0, according
2806  */  */
2807    
2808  static int  static int
2809  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2810  {  {
2811  while (n--)  while (n--)
2812    {    {
# Line 1178  Returns:      appropriate PCRE_NEWLINE_x Line 2833  Returns:      appropriate PCRE_NEWLINE_x
2833  */  */
2834    
2835  static int  static int
2836  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2837  {  {
2838  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2839  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2840  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2841  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2842  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2843  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2844  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2845  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2846  return 0;  return 0;
2847  }  }
# Line 1202  usage(void) Line 2857  usage(void)
2857  {  {
2858  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2859  printf("Input and output default to stdin and stdout.\n");  printf("Input and output default to stdin and stdout.\n");
2860  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2861  printf("If input is a terminal, readline() is used to read from it.\n");  printf("If input is a terminal, readline() is used to read from it.\n");
2862  #else  #else
2863  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2864  #endif  #endif
2865  printf("\nOptions:\n");  printf("\nOptions:\n");
2866  printf("  -b       show compiled code (bytecode)\n");  #ifdef SUPPORT_PCRE16
2867    printf("  -16      use the 16-bit library\n");
2868    #endif
2869    #ifdef SUPPORT_PCRE32
2870    printf("  -32      use the 32-bit library\n");
2871    #endif
2872    printf("  -b       show compiled code\n");
2873  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2874    printf("  -C arg   show a specific compile-time option\n");
2875    printf("           and exit with its value. The arg can be:\n");
2876    printf("     linksize     internal link size [2, 3, 4]\n");
2877    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2878    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2879    printf("     pcre32       32 bit library support enabled [0, 1]\n");
2880    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2881    printf("     ucp          Unicode Properties supported [0, 1]\n");
2882    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2883    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2884  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2885  #if !defined NODFA  #if !defined NODFA
2886  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
# Line 1226  printf("  -q       quiet: do not output Line 2897  printf("  -q       quiet: do not output
2897  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2898  printf("  -s       force each pattern to be studied at basic level\n"  printf("  -s       force each pattern to be studied at basic level\n"
2899         "  -s+      force each pattern to be studied, using JIT if available\n"         "  -s+      force each pattern to be studied, using JIT if available\n"
2900           "  -s++     ditto, verifying when JIT was actually used\n"
2901           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2902           "             where 1 <= n <= 7 selects JIT options\n"
2903           "  -s++n    ditto, verifying when JIT was actually used\n"
2904         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2905  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2906  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 1245  options, followed by a set of test data, Line 2920  options, followed by a set of test data,
2920  int main(int argc, char **argv)  int main(int argc, char **argv)
2921  {  {
2922  FILE *infile = stdin;  FILE *infile = stdin;
2923    const char *version;
2924  int options = 0;  int options = 0;
2925  int study_options = 0;  int study_options = 0;
2926  int default_find_match_limit = FALSE;  int default_find_match_limit = FALSE;
# Line 1259  int quiet = 0; Line 2935  int quiet = 0;
2935  int size_offsets = 45;  int size_offsets = 45;
2936  int size_offsets_max;  int size_offsets_max;
2937  int *offsets = NULL;  int *offsets = NULL;
 #if !defined NOPOSIX  
 int posix = 0;  
 #endif  
2938  int debug = 0;  int debug = 0;
2939  int done = 0;  int done = 0;
2940  int all_use_dfa = 0;  int all_use_dfa = 0;
2941    int verify_jit = 0;
2942  int yield = 0;  int yield = 0;
2943    #ifdef SUPPORT_PCRE32
2944    int mask_utf32 = 0;
2945    #endif
2946  int stack_size;  int stack_size;
2947    pcre_uint8 *dbuffer = NULL;
2948    size_t dbuffer_size = 1u << 14;
2949    
2950  pcre_jit_stack *jit_stack = NULL;  #if !defined NOPOSIX
2951    int posix = 0;
2952    #endif
2953    #if !defined NODFA
2954    int *dfa_workspace = NULL;
2955    #endif
2956    
2957    pcre_jit_stack *jit_stack = NULL;
2958    
2959  /* These vectors store, end-to-end, a list of captured substring names. Assume  /* These vectors store, end-to-end, a list of zero-terminated captured
2960  that 1024 is plenty long enough for the few names we'll be testing. */  substring names, each list itself being terminated by an empty name. Assume
2961    that 1024 is plenty long enough for the few names we'll be testing. It is
2962    easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2963    for the actual memory, to ensure alignment. */
2964    
2965    pcre_uint32 copynames[1024];
2966    pcre_uint32 getnames[1024];
2967    
2968    #ifdef SUPPORT_PCRE32
2969    pcre_uint32 *cn32ptr;
2970    pcre_uint32 *gn32ptr;
2971    #endif
2972    
2973  uschar copynames[1024];  #ifdef SUPPORT_PCRE16
2974  uschar getnames[1024];  pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2975    pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2976    pcre_uint16 *cn16ptr;
2977    pcre_uint16 *gn16ptr;
2978    #endif
2979    
2980  uschar *copynamesptr;  #ifdef SUPPORT_PCRE8
2981  uschar *getnamesptr;  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2982    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2983    pcre_uint8 *cn8ptr;
2984    pcre_uint8 *gn8ptr;
2985    #endif
2986    
2987  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that valgrind will check their misuse when
2988  when I am debugging. They grow automatically when very long lines are read. */  debugging. They grow automatically when very long lines are read. The 16-
2989    and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2990    
2991  buffer = (unsigned char *)malloc(buffer_size);  buffer = (pcre_uint8 *)malloc(buffer_size);
2992  dbuffer = (unsigned char *)malloc(buffer_size);  pbuffer = (pcre_uint8 *)malloc(buffer_size);
 pbuffer = (unsigned char *)malloc(buffer_size);  
2993    
2994  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2995    
# Line 1300  it set 0x8000, but then I was advised th Line 3004  it set 0x8000, but then I was advised th
3004  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
3005  #endif  #endif
3006    
3007    /* Get the version number: both pcre_version() and pcre16_version() give the
3008    same answer. We just need to ensure that we call one that is available. */
3009    
3010    #if defined SUPPORT_PCRE8
3011    version = pcre_version();
3012    #elif defined SUPPORT_PCRE16
3013    version = pcre16_version();
3014    #elif defined SUPPORT_PCRE32
3015    version = pcre32_version();
3016    #endif
3017    
3018  /* Scan options */  /* Scan options */
3019    
3020  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
3021    {    {
3022    unsigned char *endptr;    pcre_uint8 *endptr;
3023      char *arg = argv[op];
3024    
3025      if (strcmp(arg, "-m") == 0) showstore = 1;
3026      else if (strcmp(arg, "-s") == 0) force_study = 0;
3027    
3028    if (strcmp(argv[op], "-m") == 0) showstore = 1;    else if (strncmp(arg, "-s+", 3) == 0)
   else if (strcmp(argv[op], "-s") == 0) force_study = 0;  
   else if (strcmp(argv[op], "-s+") == 0)  
3029      {      {
3030        arg += 3;
3031        if (*arg == '+') { arg++; verify_jit = TRUE; }
3032      force_study = 1;      force_study = 1;
3033      force_study_options = PCRE_STUDY_JIT_COMPILE;      if (*arg == 0)
3034      }        force_study_options = jit_study_bits[6];
3035    else if (strcmp(argv[op], "-q") == 0) quiet = 1;      else if (*arg >= '1' && *arg <= '7')
3036    else if (strcmp(argv[op], "-b") == 0) debug = 1;        force_study_options = jit_study_bits[*arg - '1'];
3037    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;      else goto BAD_ARG;
3038    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;      }
3039    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;    else if (strcmp(arg, "-8") == 0)
3040        {
3041    #ifdef SUPPORT_PCRE8
3042        pcre_mode = PCRE8_MODE;
3043    #else
3044        printf("** This version of PCRE was built without 8-bit support\n");
3045        exit(1);
3046    #endif
3047        }
3048      else if (strcmp(arg, "-16") == 0)
3049        {
3050    #ifdef SUPPORT_PCRE16
3051        pcre_mode = PCRE16_MODE;
3052    #else
3053        printf("** This version of PCRE was built without 16-bit support\n");
3054        exit(1);
3055    #endif
3056        }
3057      else if (strcmp(arg, "-32") == 0 || strcmp(arg, "-32+") == 0)
3058        {
3059    #ifdef SUPPORT_PCRE32
3060        pcre_mode = PCRE32_MODE;
3061        mask_utf32 = (strcmp(arg, "-32+") == 0);
3062    #else
3063        printf("** This version of PCRE was built without 32-bit support\n");
3064        exit(1);
3065    #endif
3066        }
3067      else if (strcmp(arg, "-q") == 0) quiet = 1;
3068      else if (strcmp(arg, "-b") == 0) debug = 1;
3069      else if (strcmp(arg, "-i") == 0) showinfo = 1;
3070      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3071      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3072  #if !defined NODFA  #if !defined NODFA
3073    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3074  #endif  #endif
3075    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3076        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3077          *endptr == 0))          *endptr == 0))
3078      {      {
3079      op++;      op++;
3080      argc--;      argc--;
3081      }      }
3082    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
3083      {      {
3084      int both = argv[op][2] == 0;      int both = arg[2] == 0;
3085      int temp;      int temp;
3086      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3087                       *endptr == 0))                       *endptr == 0))
3088        {        {
3089        timeitm = temp;        timeitm = temp;
# Line 1342  while (argc > 1 && argv[op][0] == '-') Line 3093  while (argc > 1 && argv[op][0] == '-')
3093      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
3094      if (both) timeit = timeitm;      if (both) timeit = timeitm;
3095      }      }
3096    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3097        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3098          *endptr == 0))          *endptr == 0))
3099      {      {
3100  #if defined(_WIN32) || defined(WIN32) || defined(__minix)  #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
3101      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
3102      exit(1);      exit(1);
3103  #else  #else
# Line 1365  while (argc > 1 && argv[op][0] == '-') Line 3116  while (argc > 1 && argv[op][0] == '-')
3116  #endif  #endif
3117      }      }
3118  #if !defined NOPOSIX  #if !defined NOPOSIX
3119    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
3120  #endif  #endif
3121    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
3122      {      {
3123      int rc;      int rc;
3124      unsigned long int lrc;      unsigned long int lrc;
3125      printf("PCRE version %s\n", pcre_version());  
3126        if (argc > 2)
3127          {
3128          if (strcmp(argv[op + 1], "linksize") == 0)
3129            {
3130            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3131            printf("%d\n", rc);
3132            yield = rc;
3133            }
3134          else if (strcmp(argv[op + 1], "pcre8") == 0)
3135            {
3136    #ifdef SUPPORT_PCRE8
3137            printf("1\n");
3138            yield = 1;
3139    #else
3140            printf("0\n");
3141            yield = 0;
3142    #endif
3143            }
3144          else if (strcmp(argv[op + 1], "pcre16") == 0)
3145            {
3146    #ifdef SUPPORT_PCRE16
3147            printf("1\n");
3148            yield = 1;
3149    #else
3150            printf("0\n");
3151            yield = 0;
3152    #endif
3153            }
3154          else if (strcmp(argv[op + 1], "pcre32") == 0)
3155            {
3156    #ifdef SUPPORT_PCRE32
3157            printf("1\n");
3158            yield = 1;
3159    #else
3160            printf("0\n");
3161            yield = 0;
3162    #endif
3163            goto EXIT;
3164            }
3165          if (strcmp(argv[op + 1], "utf") == 0)
3166            {
3167    #ifdef SUPPORT_PCRE8
3168            if (pcre_mode == PCRE8_MODE)
3169              (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3170    #endif
3171    #ifdef SUPPORT_PCRE16
3172            if (pcre_mode == PCRE16_MODE)
3173              (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3174    #endif
3175    #ifdef SUPPORT_PCRE32
3176            if (pcre_mode == PCRE32_MODE)
3177              (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3178    #endif
3179            printf("%d\n", rc);
3180            yield = rc;
3181            goto EXIT;
3182            }
3183          else if (strcmp(argv[op + 1], "ucp") == 0)
3184            {
3185            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3186            printf("%d\n", rc);
3187            yield = rc;
3188            }
3189          else if (strcmp(argv[op + 1], "jit") == 0)
3190            {
3191            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3192            printf("%d\n", rc);
3193            yield = rc;
3194            }
3195          else if (strcmp(argv[op + 1], "newline") == 0)
3196            {
3197            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3198            print_newline_config(rc, TRUE);
3199            }
3200          else if (strcmp(argv[op + 1], "ebcdic") == 0)
3201            {
3202    #ifdef EBCDIC
3203            printf("1\n");
3204            yield = 1;
3205    #else
3206            printf("0\n");
3207    #endif
3208            }
3209          else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3210            {
3211    #ifdef EBCDIC
3212            printf("0x%02x\n", CHAR_LF);
3213    #else
3214            printf("0\n");
3215    #endif
3216            }
3217          else
3218            {
3219            printf("Unknown -C option: %s\n", argv[op + 1]);
3220            }
3221          goto EXIT;
3222          }
3223    
3224        /* No argument for -C: output all configuration information. */
3225    
3226        printf("PCRE version %s\n", version);
3227      printf("Compiled with\n");      printf("Compiled with\n");
3228    
3229    #ifdef EBCDIC
3230        printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3231    #endif
3232    
3233    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3234    are set, either both UTFs are supported or both are not supported. */
3235    
3236    #ifdef SUPPORT_PCRE8
3237        printf("  8-bit support\n");
3238      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3239      printf("  %sUTF-8 support\n", rc? "" : "No ");        printf ("  %sUTF-8 support\n", rc ? "" : "No ");
3240      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #endif
3241    #ifdef SUPPORT_PCRE16
3242        printf("  16-bit support\n");
3243        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3244        printf ("  %sUTF-16 support\n", rc ? "" : "No ");
3245    #endif
3246    #ifdef SUPPORT_PCRE32
3247        printf("  32-bit support\n");
3248        (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3249        printf ("  %sUTF-32 support\n", rc ? "" : "No ");
3250    #endif
3251    
3252        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3253      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
3254      (void)pcre_config(PCRE_CONFIG_JIT, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3255      if (rc)      if (rc)
3256        printf("  Just-in-time compiler support%s\n", check_jit_arch()?        {
3257          "" : " (but this architecture is unsupported)");        const char *arch;
3258          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3259          printf("  Just-in-time compiler support: %s\n", arch);
3260          }
3261      else      else
3262        printf("  No just-in-time compiler support\n");        printf("  No just-in-time compiler support\n");
3263      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3264      /* Note that these values are always the ASCII values, even      print_newline_config(rc, FALSE);
3265      in EBCDIC environments. CR is 13 and NL is 10. */      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
     printf("  Newline sequence is %s\n", (rc == 13)? "CR" :  
       (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :  
       (rc == -2)? "ANYCRLF" :  
       (rc == -1)? "ANY" : "???");  
     (void)pcre_config(PCRE_CONFIG_BSR, &rc);  
3266      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3267                                       "all Unicode newlines");                                       "all Unicode newlines");
3268      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3269      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
3270      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3271      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
3272      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3273      printf("  Default match limit = %ld\n", lrc);      printf("  Default match limit = %ld\n", lrc);
3274      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3275      printf("  Default recursion depth limit = %ld\n", lrc);      printf("  Default recursion depth limit = %ld\n", lrc);
3276      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3277      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
3278        if (showstore)
3279          {
3280          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3281          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3282          }
3283        printf("\n");
3284      goto EXIT;      goto EXIT;
3285      }      }
3286    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
3287             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
3288      {      {
3289      usage();      usage();
3290      goto EXIT;      goto EXIT;
3291      }      }
3292    else    else
3293      {      {
3294      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
3295        printf("** Unknown or malformed option %s\n", arg);
3296      usage();      usage();
3297      yield = 1;      yield = 1;
3298      goto EXIT;      goto EXIT;
# Line 1460  if (argc > 2) Line 3339  if (argc > 2)
3339    
3340  /* Set alternative malloc function */  /* Set alternative malloc function */
3341    
3342    #ifdef SUPPORT_PCRE8
3343  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
3344  pcre_free = new_free;  pcre_free = new_free;
3345  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
3346  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
3347    #endif
3348    
3349    #ifdef SUPPORT_PCRE16
3350    pcre16_malloc = new_malloc;
3351    pcre16_free = new_free;
3352    pcre16_stack_malloc = stack_malloc;
3353    pcre16_stack_free = stack_free;
3354    #endif
3355    
3356    #ifdef SUPPORT_PCRE32
3357    pcre32_malloc = new_malloc;
3358    pcre32_free = new_free;
3359    pcre32_stack_malloc = stack_malloc;
3360    pcre32_stack_free = stack_free;
3361    #endif
3362    
3363  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
3364    
3365  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3366    
3367  /* Main loop */  /* Main loop */
3368    
# Line 1482  while (!done) Line 3377  while (!done)
3377  #endif  #endif
3378    
3379    const char *error;    const char *error;
3380    unsigned char *markptr;    pcre_uint8 *markptr;
3381    unsigned char *p, *pp, *ppp;    pcre_uint8 *p, *pp, *ppp;
3382    unsigned char *to_file = NULL;    pcre_uint8 *to_file = NULL;
3383    const unsigned char *tables = NULL;    const pcre_uint8 *tables = NULL;
3384      unsigned long int get_options;
3385    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
3386    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
3387    int do_allcaps = 0;    int do_allcaps = 0;
# Line 1501  while (!done) Line 3397  while (!done)
3397    int do_flip = 0;    int do_flip = 0;
3398    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
3399    
3400    use_utf8 = 0;  #if !defined NODFA
3401      int dfa_matched = 0;
3402    #endif
3403    
3404      use_utf = 0;
3405    debug_lengths = 1;    debug_lengths = 1;
3406    
3407    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1516  while (!done) Line 3416  while (!done)
3416    
3417    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3418      {      {
3419      unsigned long int magic, get_options;      pcre_uint32 magic;
3420      uschar sbuf[8];      pcre_uint8 sbuf[8];
3421      FILE *f;      FILE *f;
3422    
3423      p++;      p++;
3424        if (*p == '!')
3425          {
3426          do_debug = TRUE;
3427          do_showinfo = TRUE;
3428          p++;
3429          }
3430    
3431      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
3432      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
3433      *pp = 0;      *pp = 0;
# Line 1532  while (!done) Line 3439  while (!done)
3439        continue;        continue;
3440        }        }
3441    
3442        first_gotten_store = 0;
3443      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3444    
3445      true_size =      true_size =
# Line 1539  while (!done) Line 3447  while (!done)
3447      true_study_size =      true_study_size =
3448        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3449    
3450      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
3451      regex_gotten_store = gotten_store;      if (re == NULL)
3452          {
3453          printf("** Failed to get %d bytes of memory for pcre object\n",
3454            (int)true_size);
3455          yield = 1;
3456          goto EXIT;
3457          }
3458        regex_gotten_store = first_gotten_store;
3459    
3460      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3461    
3462      magic = ((real_pcre *)re)->magic_number;      magic = REAL_PCRE_MAGIC(re);
3463      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
3464        {        {
3465        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
3466          {          {
3467          do_flip = 1;          do_flip = 1;
3468          }          }
3469        else        else
3470          {          {
3471          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3472            new_free(re);
3473          fclose(f);          fclose(f);
3474          continue;          continue;
3475          }          }
3476        }        }
3477    
3478        /* We hide the byte-invert info for little and big endian tests. */
3479      fprintf(outfile, "Compiled pattern%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3480        do_flip? " (byte-inverted)" : "", p);        do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
   
     /* Need to know if UTF-8 for printing data strings */  
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
3481    
3482      /* Now see if there is any following study data. */      /* Now see if there is any following study data. */
3483    
# Line 1583  while (!done) Line 3495  while (!done)
3495          {          {
3496          FAIL_READ:          FAIL_READ:
3497          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
3498          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL)
3499          if (re != NULL) new_free(re);            {
3500              PCRE_FREE_STUDY(extra);
3501              }
3502            new_free(re);
3503          fclose(f);          fclose(f);
3504          continue;          continue;
3505          }          }
# Line 1593  while (!done) Line 3508  while (!done)
3508        }        }
3509      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
3510    
3511        /* Flip the necessary bytes. */
3512        if (do_flip)
3513          {
3514          int rc;
3515          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3516          if (rc == PCRE_ERROR_BADMODE)
3517            {
3518            pcre_uint16 flags_in_host_byte_order;
3519            if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3520              flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3521            else
3522              flags_in_host_byte_order = swap_uint16(REAL_PCRE_FLAGS(re));
3523            /* Simulate the result of the function call below. */
3524            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3525              pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3526              PCRE_INFO_OPTIONS);
3527            fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3528              "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3529            new_free(re);
3530            fclose(f);
3531            continue;
3532            }
3533          }
3534    
3535        /* Need to know if UTF-8 for printing data strings. */
3536    
3537        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3538          {
3539          new_free(re);
3540          fclose(f);
3541          continue;
3542          }
3543        use_utf = (get_options & PCRE_UTF8) != 0;
3544    
3545      fclose(f);      fclose(f);
3546      goto SHOW_INFO;      goto SHOW_INFO;
3547      }      }
3548    
3549    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
3550    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
3551    
3552    delimiter = *p++;    delimiter = *p++;
3553    
# Line 1649  while (!done) Line 3598  while (!done)
3598    /* Look for options after final delimiter */    /* Look for options after final delimiter */
3599    
3600    options = 0;    options = 0;
3601      study_options = force_study_options;
3602    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
3603    
3604    while (*pp != 0)    while (*pp != 0)
# Line 1685  while (!done) Line 3635  while (!done)
3635  #endif  #endif
3636    
3637        case 'S':        case 'S':
3638        if (do_study == 0)        do_study = 1;
3639          for (;;)
3640          {          {
3641          do_study = 1;          switch (*pp++)
         if (*pp == '+')  
3642            {            {
3643            study_options |= PCRE_STUDY_JIT_COMPILE;            case 'S':
3644            pp++;            do_study = 0;
3645            }            no_force_study = 1;
3646          }            break;
3647        else  
3648          {            case '!':
3649          do_study = 0;            study_options |= PCRE_STUDY_EXTRA_NEEDED;
3650          no_force_study = 1;            break;
3651    
3652              case '+':
3653              if (*pp == '+')
3654                {
3655                verify_jit = TRUE;
3656                pp++;
3657                }
3658              if (*pp >= '1' && *pp <= '7')
3659                study_options |= jit_study_bits[*pp++ - '1'];
3660              else
3661                study_options |= jit_study_bits[6];
3662              break;
3663    
3664              case '-':
3665              study_options &= ~PCRE_STUDY_ALLJIT;
3666              break;
3667    
3668              default:
3669              pp--;
3670              goto ENDLOOP;
3671              }
3672          }          }
3673          ENDLOOP:
3674        break;        break;
3675    
3676        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
# Line 1706  while (!done) Line 3678  while (!done)
3678        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
3679        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3680        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
3681        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
3682        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
3683    
3684        case 'T':        case 'T':
# Line 1740  while (!done) Line 3712  while (!done)
3712          goto SKIP_DATA;          goto SKIP_DATA;
3713          }          }
3714        locale_set = 1;        locale_set = 1;
3715        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
3716        pp = ppp;        pp = ppp;
3717        break;        break;
3718    
# Line 1753  while (!done) Line 3725  while (!done)
3725    
3726        case '<':        case '<':
3727          {          {
3728          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3729            {            {
3730            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
3731            pp += 3;            pp += 3;
# Line 1781  while (!done) Line 3753  while (!done)
3753    
3754    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
3755    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
3756    local character tables. */    local character tables. Neither does it have 16-bit support. */
3757    
3758  #if !defined NOPOSIX  #if !defined NOPOSIX
3759    if (posix || do_posix)    if (posix || do_posix)
# Line 1797  while (!done) Line 3769  while (!done)
3769      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3770      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3771    
3772        first_gotten_store = 0;
3773      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
3774    
3775      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1816  while (!done) Line 3789  while (!done)
3789  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
3790    
3791      {      {
3792      unsigned long int get_options;      /* In 16- or 32-bit mode, convert the input. */
3793    
3794    #ifdef SUPPORT_PCRE16
3795        if (pcre_mode == PCRE16_MODE)
3796          {
3797          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3798            {
3799            case -1:
3800            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3801              "converted to UTF-16\n");
3802            goto SKIP_DATA;
3803    
3804            case -2:
3805            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3806              "cannot be converted to UTF-16\n");
3807            goto SKIP_DATA;
3808    
3809            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3810            fprintf(outfile, "**Failed: character value greater than 0xffff "
3811              "cannot be converted to 16-bit in non-UTF mode\n");
3812            goto SKIP_DATA;
3813    
3814            default:
3815            break;
3816            }
3817          p = (pcre_uint8 *)buffer16;
3818          }
3819    #endif
3820    
3821    #ifdef SUPPORT_PCRE32
3822        if (pcre_mode == PCRE32_MODE)
3823          {
3824          switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3825            {
3826            case -1:
3827            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3828              "converted to UTF-32\n");
3829            goto SKIP_DATA;
3830    
3831            case -2:
3832            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3833              "cannot be converted to UTF-32\n");
3834            goto SKIP_DATA;
3835    
3836            case -3:
3837            fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3838            goto SKIP_DATA;
3839    
3840            default:
3841            break;
3842            }
3843          p = (pcre_uint8 *)buffer32;
3844          }
3845    #endif
3846    
3847        /* Compile many times when timing */
3848    
3849      if (timeit > 0)      if (timeit > 0)
3850        {        {
# Line 1825  while (!done) Line 3853  while (!done)
3853        clock_t start_time = clock();        clock_t start_time = clock();
3854        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
3855          {          {
3856          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3857          if (re != NULL) free(re);          if (re != NULL) free(re);
3858          }          }
3859        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1834  while (!done) Line 3862  while (!done)
3862            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
3863        }        }
3864    
3865      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
3866        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3867    
3868      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
3869      if non-interactive. */      if non-interactive. */
# Line 1865  while (!done) Line 3894  while (!done)
3894      within the regex; check for this so that we know how to process the data      within the regex; check for this so that we know how to process the data
3895      lines. */      lines. */
3896    
3897      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3898      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;        goto SKIP_DATA;
3899        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
     /* Print information if required. There are now two info-returning  
     functions. The old one has a limited interface and returns only limited  
     data. Check that it agrees with the newer one. */  
   
     if (log_store)  
       fprintf(outfile, "Memory allocation (code space): %d\n",  
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
3900    
3901      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3902      and remember the store that was got. */      and remember the store that was got. */
3903    
3904      true_size = ((real_pcre *)re)->size;      true_size = REAL_PCRE_SIZE(re);
3905      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3906    
3907        /* Output code size information if requested */
3908    
3909        if (log_store)
3910          {
3911          int name_count, name_entry_size, real_pcre_size;
3912    
3913          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3914          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3915          real_pcre_size = 0;
3916    #ifdef SUPPORT_PCRE8
3917          if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3918            real_pcre_size = sizeof(real_pcre);
3919    #endif
3920    #ifdef SUPPORT_PCRE16
3921          if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3922            real_pcre_size = sizeof(real_pcre16);
3923    #endif
3924    #ifdef SUPPORT_PCRE32
3925          if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3926            real_pcre_size = sizeof(real_pcre32);
3927    #endif
3928          fprintf(outfile, "Memory allocation (code space): %d\n",
3929            (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3930          }
3931    
3932      /* If -s or /S was present, study the regex to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
3933      help with the matching, unless the pattern has the SS option, which      help with the matching, unless the pattern has the SS option, which
# Line 1897  while (!done) Line 3942  while (!done)
3942          clock_t time_taken;          clock_t time_taken;
3943          clock_t start_time = clock();          clock_t start_time = clock();
3944          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3945            extra = pcre_study(re, study_options | force_study_options, &error);            {
3946              PCRE_STUDY(extra, re, study_options, &error);
3947              }
3948          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3949          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL)
3950              {
3951              PCRE_FREE_STUDY(extra);
3952              }
3953          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3954            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3955              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3956          }          }
3957        extra = pcre_study(re, study_options | force_study_options, &error);        PCRE_STUDY(extra, re, study_options, &error);
3958        if (error != NULL)        if (error != NULL)
3959          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3960        else if (extra != NULL)        else if (extra != NULL)
3961            {
3962          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3963            if (log_store)
3964              {
3965              size_t jitsize;
3966              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3967                  jitsize != 0)
3968                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3969              }
3970            }
3971        }        }
3972    
3973      /* If /K was present, we set up for handling MARK data. */      /* If /K was present, we set up for handling MARK data. */
# Line 1924  while (!done) Line 3983  while (!done)
3983        extra->flags |= PCRE_EXTRA_MARK;        extra->flags |= PCRE_EXTRA_MARK;
3984        }        }
3985    
3986      /* If the 'F' option was present, we flip the bytes of all the integer      /* Extract and display information from the compiled data if required. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
   
     if (do_flip)  
       {  
       real_pcre *rre = (real_pcre *)re;  
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));  
         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));  
         }  
       }  
   
     /* Extract information from the compiled data if required */  
3987    
3988      SHOW_INFO:      SHOW_INFO:
3989    
3990      if (do_debug)      if (do_debug)
3991        {        {
3992        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3993        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3994        }        }
3995    
3996      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1976  while (!done) Line 3998  while (!done)
3998      if (do_showinfo)      if (do_showinfo)
3999        {        {
4000        unsigned long int all_options;        unsigned long int all_options;
4001  #if !defined NOINFOCHECK        pcre_uint32 first_char, need_char;
4002        int old_first_char, old_options, old_count;        int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4003  #endif          hascrorlf, maxlookbehind;
       int count, backrefmax, first_char, need_char, okpartial, jchanged,  
         hascrorlf;  
4004        int nameentrysize, namecount;        int nameentrysize, namecount;
4005        const uschar *nametable;        const pcre_uint8 *nametable;
4006    
4007        new_info(re, NULL, PCRE_INFO_SIZE, &size);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
4008        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4009        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4010        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4011        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4012        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4013        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4014        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4015        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4016        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4017        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4018              new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4019  #if !defined NOINFOCHECK            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4020        old_count = pcre_info(re, &old_options, &old_first_char);            new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4021        if (count < 0) fprintf(outfile,            != 0)
4022          "Error %d from pcre_info()\n", count);          goto SKIP_DATA;
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
4023    
4024        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
4025          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 2029  while (!done) Line 4034  while (!done)
4034          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
4035          while (namecount-- > 0)          while (namecount-- > 0)
4036            {            {
4037            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,            int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4038              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int length = (int)STRLEN(nametable + imm2_size);
4039              GET2(nametable, 0));            fprintf(outfile, "  ");
4040            nametable += nameentrysize;            PCHARSV(nametable, imm2_size, length, outfile);
4041              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4042    #ifdef SUPPORT_PCRE32
4043              if (pcre_mode == PCRE32_MODE)
4044                fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4045    #endif
4046    #ifdef SUPPORT_PCRE16
4047              if (pcre_mode == PCRE16_MODE)
4048                fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4049    #endif
4050    #ifdef SUPPORT_PCRE8
4051              if (pcre_mode == PCRE8_MODE)
4052                fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4053    #endif
4054              nametable += nameentrysize * CHAR_SIZE;
4055            }            }
4056          }          }
4057    
4058        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4059        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4060    
4061        all_options = ((real_pcre *)re)->options;        all_options = REAL_PCRE_OPTIONS(re);
4062        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
4063    
4064        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
4065          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 2056  while (!done) Line 4075  while (!done)
4075            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4076            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4077            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4078            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4079            ((get_options & PCRE_UCP) != 0)? " ucp" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4080            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4081            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4082            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
4083    
# Line 2090  while (!done) Line 4109  while (!done)
4109          break;          break;
4110          }          }
4111    
4112        if (first_char == -1)        if (first_char_set == 2)
4113          {          {
4114          fprintf(outfile, "First char at start or follows newline\n");          fprintf(outfile, "First char at start or follows newline\n");
4115          }          }
4116        else if (first_char < 0)        else if (first_char_set == 1)
4117          {          {
4118          fprintf(outfile, "No first char\n");          const char *caseless =
4119              ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4120              "" : " (caseless)";
4121    
4122            if (PRINTOK(first_char))
4123              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4124            else
4125              {
4126              fprintf(outfile, "First char = ");
4127              pchar(first_char, outfile);
4128              fprintf(outfile, "%s\n", caseless);
4129              }
4130          }          }
4131        else        else
4132          {          {
4133          int ch = first_char & 255;          fprintf(outfile, "No first char\n");
         const char *caseless = ((first_char & REQ_CASELESS) == 0)?  
           "" : " (caseless)";  
         if (PRINTHEX(ch))  
           fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);  
         else  
           fprintf(outfile, "First char = %d%s\n", ch, caseless);  
4134          }          }
4135    
4136        if (need_char < 0)        if (need_char_set == 0)
4137          {          {
4138          fprintf(outfile, "No need char\n");          fprintf(outfile, "No need char\n");
4139          }          }
4140        else        else
4141          {          {
4142          int ch = need_char & 255;          const char *caseless =
4143          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4144            "" : " (caseless)";            "" : " (caseless)";
4145          if (PRINTHEX(ch))  
4146            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
4147              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4148          else          else
4149            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
4150              fprintf(outfile, "Need char = ");
4151              pchar(need_char, outfile);
4152              fprintf(outfile, "%s\n", caseless);
4153              }
4154          }          }
4155    
4156          if (maxlookbehind > 0)
4157            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4158    
4159        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
4160        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
4161        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
# Line 2138  while (!done) Line 4170  while (!done)
4170            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
4171          else          else
4172            {            {
4173            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
4174            int minlength;            int minlength;
4175    
4176            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4177            fprintf(outfile, "Subject length lower bound = %d\n", minlength);              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4178    
4179            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
           if (start_bits == NULL)  
             fprintf(outfile, "No set of starting bytes\n");  
           else  
4180              {              {
4181              int i;              if (start_bits == NULL)
4182              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
4183              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
4184                {                {
4185                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
4186                  int c = 24;
4187                  fprintf(outfile, "Starting byte set: ");
4188                  for (i = 0; i < 256; i++)
4189                  {                  {
4190                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
4191                    {                    {
4192                    fprintf(outfile, "%c ", i);                    if (c > 75)
4193                    c += 2;                      {
4194                    }                      fprintf(outfile, "\n  ");
4195                  else                      c = 2;
4196                    {                      }
4197                    fprintf(outfile, "\\x%02x ", i);                    if (PRINTOK(i) && i != ' ')
4198                    c += 5;                      {
4199                        fprintf(outfile, "%c ", i);
4200                        c += 2;
4201                        }
4202                      else
4203                        {
4204                        fprintf(outfile, "\\x%02x ", i);
4205                        c += 5;
4206                        }
4207                    }                    }
4208                  }                  }
4209                  fprintf(outfile, "\n");
4210                }                }
             fprintf(outfile, "\n");  
4211              }              }
4212            }            }
4213    
4214          /* Show this only if the JIT was set by /S, not by -s. */          /* Show this only if the JIT was set by /S, not by -s. */
4215    
4216          if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)          if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4217                (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4218            {            {
4219            int jit;            int jit;
4220            new_info(re, extra, PCRE_INFO_JIT, &jit);            if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4221            if (jit)              {
4222              fprintf(outfile, "JIT study was successful\n");              if (jit)
4223            else                fprintf(outfile, "JIT study was successful\n");
4224  #ifdef SUPPORT_JIT              else
4225              fprintf(outfile, "JIT study was not successful\n");  #ifdef SUPPORT_JIT
4226                  fprintf(outfile, "JIT study was not successful\n");
4227  #else  #else
4228              fprintf(outfile, "JIT support is not available in this version of PCRE\n");                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4229  #endif  #endif
4230            }              }
4231              }
4232          }          }
4233        }        }
4234    
# Line 2208  while (!done) Line 4245  while (!done)
4245          }          }
4246        else        else
4247          {          {
4248          uschar sbuf[8];          pcre_uint8 sbuf[8];
4249          sbuf[0] = (uschar)((true_size >> 24) & 255);  
4250          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
4251          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4252          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4253            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
4254          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
4255          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4256          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4257          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
4258            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4259    
4260          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
4261              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 2245  while (!done) Line 4283  while (!done)
4283          }          }
4284    
4285        new_free(re);        new_free(re);
4286        if (extra != NULL) pcre_free_study(extra);        if (extra != NULL)
4287            {
4288            PCRE_FREE_STUDY(extra);
4289            }
4290        if (locale_set)        if (locale_set)
4291          {          {
4292          new_free((void *)tables);          new_free((void *)tables);
# Line 2260  while (!done) Line 4301  while (!done)
4301    
4302    for (;;)    for (;;)
4303      {      {
4304      uschar *q;  #ifdef SUPPORT_PCRE8
4305      uschar *bptr;      pcre_uint8 *q8;
4306    #endif