/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 510 by ph10, Sat Mar 27 17:45:29 2010 UTC revision 1055 by chpe, Tue Oct 16 15:53:30 2012 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
50  #include "config.h"  #include "config.h"
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 58  POSSIBILITY OF SUCH DAMAGE.
58  #include <locale.h>  #include <locale.h>
59  #include <errno.h>  #include <errno.h>
60    
61  #ifdef SUPPORT_LIBREADLINE  /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67  #ifdef HAVE_UNISTD_H  #ifdef HAVE_UNISTD_H
68  #include <unistd.h>  #include <unistd.h>
69  #endif  #endif
70    #if defined(SUPPORT_LIBREADLINE)
71  #include <readline/readline.h>  #include <readline/readline.h>
72  #include <readline/history.h>  #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80  #endif  #endif
   
81    
82  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
83  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 79  input mode under Windows. */ Line 101  input mode under Windows. */
101  #define fileno _fileno  #define fileno _fileno
102  #endif  #endif
103    
104    /* A user sent this fix for Borland Builder 5 under Windows. */
105    
106    #ifdef __BORLANDC__
107    #define _setmode(handle, mode) setmode(handle, mode)
108    #endif
109    
110    /* Not Windows */
111    
112  #else  #else
113  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
114  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
115    #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
116    #define INPUT_MODE   "r"
117    #define OUTPUT_MODE  "w"
118    #else
119  #define INPUT_MODE   "rb"  #define INPUT_MODE   "rb"
120  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
121  #endif  #endif
122    #endif
123    
124    #define PRIV(name) name
125    
126  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
127  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 97  here before pcre_internal.h so that the Line 133  here before pcre_internal.h so that the
133  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
134    
135  #include "pcre.h"  #include "pcre.h"
136    
137    #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138    /* Configure internal macros to 32 bit mode. */
139    #define COMPILE_PCRE32
140    #endif
141    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142    /* Configure internal macros to 16 bit mode. */
143    #define COMPILE_PCRE16
144    #endif
145    #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146    /* Configure internal macros to 16 bit mode. */
147    #define COMPILE_PCRE8
148    #endif
149    
150  #include "pcre_internal.h"  #include "pcre_internal.h"
151    
152    /* The pcre_printint() function, which prints the internal form of a compiled
153    regex, is held in a separate file so that (a) it can be compiled in either
154    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
155    when that is compiled in debug mode. */
156    
157    #ifdef SUPPORT_PCRE8
158    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159    #endif
160    #ifdef SUPPORT_PCRE16
161    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162    #endif
163    #ifdef SUPPORT_PCRE32
164    void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165    #endif
166    
167  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
168  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source files here, changing the names of the
169  external symbols to prevent clashes. */  external symbols to prevent clashes. */
170    
171  #define _pcre_ucp_gentype      ucp_gentype  #define PCRE_INCLUDED
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
172    
173  #include "pcre_tables.c"  #include "pcre_tables.c"
174    #include "pcre_ucd.c"
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
175    
176  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
177  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
178  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
179  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
180  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
181    
182  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #ifdef EBCDIC
183    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
184    #else
185    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
186    #endif
187    
188    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189    
190    /* Posix support is disabled in 16 or 32 bit only mode. */
191    #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192    #define NOPOSIX
193    #endif
194    
195  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
196  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 140  Makefile. */ Line 200  Makefile. */
200  #include "pcreposix.h"  #include "pcreposix.h"
201  #endif  #endif
202    
203  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
204  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
205  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
206  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
207  UTF8 support if PCRE is built without it. */  
208    #ifndef SUPPORT_UTF
209  #ifndef SUPPORT_UTF8  #ifndef NOUTF
210  #ifndef NOUTF8  #define NOUTF
 #define NOUTF8  
211  #endif  #endif
212  #endif  #endif
213    
214    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
215    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216    only from one place and is handled differently). I couldn't dream up any way of
217    using a single macro to do this in a generic way, because of the many different
218    argument requirements. We know that at least one of SUPPORT_PCRE8 and
219    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
220    use these in the definitions of generic macros.
221    
222    **** Special note about the PCHARSxxx macros: the address of the string to be
223    printed is always given as two arguments: a base address followed by an offset.
224    The base address is cast to the correct data size for 8 or 16 bit data; the
225    offset is in units of this size. If the string were given as base+offset in one
226    argument, the casting might be incorrectly applied. */
227    
228    #ifdef SUPPORT_PCRE8
229    
230    #define PCHARS8(lv, p, offset, len, f) \
231      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
232    
233    #define PCHARSV8(p, offset, len, f) \
234      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235    
236    #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237      p = read_capture_name8(p, cn8, re)
238    
239    #define STRLEN8(p) ((int)strlen((char *)p))
240    
241    #define SET_PCRE_CALLOUT8(callout) \
242      pcre_callout = callout
243    
244    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
245       pcre_assign_jit_stack(extra, callback, userdata)
246    
247    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
248      re = pcre_compile((char *)pat, options, error, erroffset, tables)
249    
250    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251        namesptr, cbuffer, size) \
252      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
253        (char *)namesptr, cbuffer, size)
254    
255    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
256      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
257    
258    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259        offsets, size_offsets, workspace, size_workspace) \
260      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
261        offsets, size_offsets, workspace, size_workspace)
262    
263    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
264        offsets, size_offsets) \
265      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
266        offsets, size_offsets)
267    
268    #define PCRE_FREE_STUDY8(extra) \
269      pcre_free_study(extra)
270    
271    #define PCRE_FREE_SUBSTRING8(substring) \
272      pcre_free_substring(substring)
273    
274    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
275      pcre_free_substring_list(listptr)
276    
277    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
278        getnamesptr, subsptr) \
279      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
280        (char *)getnamesptr, subsptr)
281    
282    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
283      n = pcre_get_stringnumber(re, (char *)ptr)
284    
285    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
286      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
287    
288    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
289      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
290    
291    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
292      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
293    
294    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
295      pcre_printint(re, outfile, debug_lengths)
296    
297    #define PCRE_STUDY8(extra, re, options, error) \
298      extra = pcre_study(re, options, error)
299    
300    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
301      pcre_jit_stack_alloc(startsize, maxsize)
302    
303    #define PCRE_JIT_STACK_FREE8(stack) \
304      pcre_jit_stack_free(stack)
305    
306    #endif /* SUPPORT_PCRE8 */
307    
308    /* -----------------------------------------------------------*/
309    
310    #ifdef SUPPORT_PCRE16
311    
312    #define PCHARS16(lv, p, offset, len, f) \
313      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
314    
315    #define PCHARSV16(p, offset, len, f) \
316      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
317    
318    #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319      p = read_capture_name16(p, cn16, re)
320    
321    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
322    
323    #define SET_PCRE_CALLOUT16(callout) \
324      pcre16_callout = (int (*)(pcre16_callout_block *))callout
325    
326    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327      pcre16_assign_jit_stack((pcre16_extra *)extra, \
328        (pcre16_jit_callback)callback, userdata)
329    
330    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332        tables)
333    
334    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335        namesptr, cbuffer, size) \
336      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338    
339    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341        (PCRE_UCHAR16 *)cbuffer, size/2)
342    
343    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344        offsets, size_offsets, workspace, size_workspace) \
345      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347        workspace, size_workspace)
348    
349    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350        offsets, size_offsets) \
351      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352        len, start_offset, options, offsets, size_offsets)
353    
354    #define PCRE_FREE_STUDY16(extra) \
355      pcre16_free_study((pcre16_extra *)extra)
356    
357    #define PCRE_FREE_SUBSTRING16(substring) \
358      pcre16_free_substring((PCRE_SPTR16)substring)
359    
360    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362    
363    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364        getnamesptr, subsptr) \
365      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367    
368    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370    
371    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373        (PCRE_SPTR16 *)(void*)subsptr)
374    
375    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377        (PCRE_SPTR16 **)(void*)listptr)
378    
379    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381        tables)
382    
383    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384      pcre16_printint(re, outfile, debug_lengths)
385    
386    #define PCRE_STUDY16(extra, re, options, error) \
387      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388    
389    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391    
392    #define PCRE_JIT_STACK_FREE16(stack) \
393      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394    
395    #endif /* SUPPORT_PCRE16 */
396    
397    /* -----------------------------------------------------------*/
398    
399    #ifdef SUPPORT_PCRE32
400    
401    #define PCHARS32(lv, p, offset, len, f) \
402      lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)
403    
404    #define PCHARSV32(p, offset, len, f) \
405      (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)
406    
407    #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408      p = read_capture_name32(p, cn32, re)
409    
410    #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411    
412    #define SET_PCRE_CALLOUT32(callout) \
413      pcre32_callout = (int (*)(pcre32_callout_block *))callout
414    
415    #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416      pcre32_assign_jit_stack((pcre32_extra *)extra, \
417        (pcre32_jit_callback)callback, userdata)
418    
419    #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420      re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421        tables)
422    
423    #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424        namesptr, cbuffer, size) \
425      rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426        count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427    
428    #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429      rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430        (PCRE_UCHAR32 *)cbuffer, size/2)
431    
432    #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433        offsets, size_offsets, workspace, size_workspace) \
434      count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435        (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436        workspace, size_workspace)
437    
438    #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439        offsets, size_offsets) \
440      count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441        len, start_offset, options, offsets, size_offsets)
442    
443    #define PCRE_FREE_STUDY32(extra) \
444      pcre32_free_study((pcre32_extra *)extra)
445    
446    #define PCRE_FREE_SUBSTRING32(substring) \
447      pcre32_free_substring((PCRE_SPTR32)substring)
448    
449    #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450      pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
451    
452    #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453        getnamesptr, subsptr) \
454      rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455        count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456    
457    #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458      n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
459    
460    #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461      rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462        (PCRE_SPTR32 *)(void*)subsptr)
463    
464    #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465      rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466        (PCRE_SPTR32 **)(void*)listptr)
467    
468    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469      rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470        tables)
471    
472    #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473      pcre32_printint(re, outfile, debug_lengths)
474    
475    #define PCRE_STUDY32(extra, re, options, error) \
476      extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477    
478    #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479      (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480    
481    #define PCRE_JIT_STACK_FREE32(stack) \
482      pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483    
484    #endif /* SUPPORT_PCRE32 */
485    
486    
487    /* ----- Both modes are supported; a runtime test is needed, except for
488    pcre_config(), and the JIT stack functions, when it doesn't matter which
489    version is called. ----- */
490    
491    enum {
492      PCRE8_MODE,
493      PCRE16_MODE,
494      PCRE32_MODE
495    };
496    
497    #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2
498    
499    #define CHAR_SIZE (1 << pcre_mode)
500    
501    #define PCHARS(lv, p, offset, len, f) \
502      if (pcre_mode == PCRE32_MODE) \
503        PCHARS32(lv, p, offset, len, f); \
504      else if (pcre_mode == PCRE16_MODE) \
505        PCHARS16(lv, p, offset, len, f); \
506      else \
507        PCHARS8(lv, p, offset, len, f)
508    
509    #define PCHARSV(p, offset, len, f) \
510      if (pcre_mode == PCRE32_MODE) \
511        PCHARSV32(p, offset, len, f); \
512      else if (pcre_mode == PCRE16_MODE) \
513        PCHARSV16(p, offset, len, f); \
514      else \
515        PCHARSV8(p, offset, len, f)
516    
517    #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
518      if (pcre_mode == PCRE32_MODE) \
519        READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
520      else if (pcre_mode == PCRE16_MODE) \
521        READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
522      else \
523        READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
524    
525    #define SET_PCRE_CALLOUT(callout) \
526      if (pcre_mode == PCRE32_MODE) \
527        SET_PCRE_CALLOUT32(callout); \
528      else if (pcre_mode == PCRE16_MODE) \
529        SET_PCRE_CALLOUT16(callout); \
530      else \
531        SET_PCRE_CALLOUT8(callout)
532    
533    #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
534    
535    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
536      if (pcre_mode == PCRE32_MODE) \
537        PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
538      else if (pcre_mode == PCRE16_MODE) \
539        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
540      else \
541        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
542    
543    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
544      if (pcre_mode == PCRE32_MODE) \
545        PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
546      else if (pcre_mode == PCRE16_MODE) \
547        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
548      else \
549        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
550    
551    #define PCRE_CONFIG pcre_config
552    
553    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
554        namesptr, cbuffer, size) \
555      if (pcre_mode == PCRE32_MODE) \
556        PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
557          namesptr, cbuffer, size); \
558      else if (pcre_mode == PCRE16_MODE) \
559        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
560          namesptr, cbuffer, size); \
561      else \
562        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
563          namesptr, cbuffer, size)
564    
565    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
566      if (pcre_mode == PCRE32_MODE) \
567        PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
568      else if (pcre_mode == PCRE16_MODE) \
569        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
570      else \
571        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
572    
573    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
574        offsets, size_offsets, workspace, size_workspace) \
575      if (pcre_mode == PCRE32_MODE) \
576        PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
577          offsets, size_offsets, workspace, size_workspace); \
578      else if (pcre_mode == PCRE16_MODE) \
579        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
580          offsets, size_offsets, workspace, size_workspace); \
581      else \
582        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
583          offsets, size_offsets, workspace, size_workspace)
584    
585    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
586        offsets, size_offsets) \
587      if (pcre_mode == PCRE32_MODE) \
588        PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
589          offsets, size_offsets); \
590      else if (pcre_mode == PCRE16_MODE) \
591        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
592          offsets, size_offsets); \
593      else \
594        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
595          offsets, size_offsets)
596    
597    #define PCRE_FREE_STUDY(extra) \
598      if (pcre_mode == PCRE32_MODE) \
599        PCRE_FREE_STUDY32(extra); \
600      else if (pcre_mode == PCRE16_MODE) \
601        PCRE_FREE_STUDY16(extra); \
602      else \
603        PCRE_FREE_STUDY8(extra)
604    
605    #define PCRE_FREE_SUBSTRING(substring) \
606      if (pcre_mode == PCRE32_MODE) \
607        PCRE_FREE_SUBSTRING32(substring); \
608      else if (pcre_mode == PCRE16_MODE) \
609        PCRE_FREE_SUBSTRING16(substring); \
610      else \
611        PCRE_FREE_SUBSTRING8(substring)
612    
613    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
614      if (pcre_mode == PCRE32_MODE) \
615        PCRE_FREE_SUBSTRING_LIST32(listptr); \
616      else if (pcre_mode == PCRE16_MODE) \
617        PCRE_FREE_SUBSTRING_LIST16(listptr); \
618      else \
619        PCRE_FREE_SUBSTRING_LIST8(listptr)
620    
621    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
622        getnamesptr, subsptr) \
623      if (pcre_mode == PCRE32_MODE) \
624        PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
625          getnamesptr, subsptr); \
626      else if (pcre_mode == PCRE16_MODE) \
627        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
628          getnamesptr, subsptr); \
629      else \
630        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
631          getnamesptr, subsptr)
632    
633    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
634      if (pcre_mode == PCRE32_MODE) \
635        PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
636      else if (pcre_mode == PCRE16_MODE) \
637        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
638      else \
639        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
640    
641    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
642      if (pcre_mode == PCRE32_MODE) \
643        PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
644      else if (pcre_mode == PCRE16_MODE) \
645        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
646      else \
647        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
648    
649    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
650      if (pcre_mode == PCRE32_MODE) \
651        PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
652      else if (pcre_mode == PCRE16_MODE) \
653        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
654      else \
655        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
656    
657    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
658      (pcre_mode == PCRE32_MODE ? \
659         PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
660        : pcre_mode == PCRE16_MODE ? \
661          PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
662          : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
663    
664    #define PCRE_JIT_STACK_FREE(stack) \
665      if (pcre_mode == PCRE32_MODE) \
666        PCRE_JIT_STACK_FREE32(stack); \
667      else if (pcre_mode == PCRE16_MODE) \
668        PCRE_JIT_STACK_FREE16(stack); \
669      else \
670        PCRE_JIT_STACK_FREE8(stack)
671    
672    #define PCRE_MAKETABLES \
673      (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
674    
675    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
676      if (pcre_mode == PCRE32_MODE) \
677        PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
678      else if (pcre_mode == PCRE16_MODE) \
679        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
680      else \
681        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
682    
683    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
684      if (pcre_mode == PCRE32_MODE) \
685        PCRE_PRINTINT32(re, outfile, debug_lengths); \
686      else if (pcre_mode == PCRE16_MODE) \
687        PCRE_PRINTINT16(re, outfile, debug_lengths); \
688      else \
689        PCRE_PRINTINT8(re, outfile, debug_lengths)
690    
691    #define PCRE_STUDY(extra, re, options, error) \
692      if (pcre_mode == PCRE32_MODE) \
693        PCRE_STUDY32(extra, re, options, error); \
694      else if (pcre_mode == PCRE16_MODE) \
695        PCRE_STUDY16(extra, re, options, error); \
696      else \
697        PCRE_STUDY8(extra, re, options, error)
698    
699    /* ----- Only 8-bit mode is supported ----- */
700    
701    #elif defined SUPPORT_PCRE8
702    #define CHAR_SIZE                 1
703    #define PCHARS                    PCHARS8
704    #define PCHARSV                   PCHARSV8
705    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
706    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
707    #define STRLEN                    STRLEN8
708    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
709    #define PCRE_COMPILE              PCRE_COMPILE8
710    #define PCRE_CONFIG               pcre_config
711    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
712    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
713    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
714    #define PCRE_EXEC                 PCRE_EXEC8
715    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
716    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
717    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
718    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
719    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
720    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
721    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
722    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
723    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
724    #define PCRE_MAKETABLES           pcre_maketables()
725    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
726    #define PCRE_PRINTINT             PCRE_PRINTINT8
727    #define PCRE_STUDY                PCRE_STUDY8
728    
729    /* ----- Only 16-bit mode is supported ----- */
730    
731    #elif defined SUPPORT_PCRE16
732    #define CHAR_SIZE                 2
733    #define PCHARS                    PCHARS16
734    #define PCHARSV                   PCHARSV16
735    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
736    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
737    #define STRLEN                    STRLEN16
738    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
739    #define PCRE_COMPILE              PCRE_COMPILE16
740    #define PCRE_CONFIG               pcre16_config
741    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
742    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
743    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
744    #define PCRE_EXEC                 PCRE_EXEC16
745    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
746    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
747    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
748    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
749    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
750    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
751    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
752    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
753    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
754    #define PCRE_MAKETABLES           pcre16_maketables()
755    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
756    #define PCRE_PRINTINT             PCRE_PRINTINT16
757    #define PCRE_STUDY                PCRE_STUDY16
758    
759    /* ----- Only 32-bit mode is supported ----- */
760    
761    #elif defined SUPPORT_PCRE32
762    #define CHAR_SIZE                 4
763    #define PCHARS                    PCHARS32
764    #define PCHARSV                   PCHARSV32
765    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME32
766    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT32
767    #define STRLEN                    STRLEN32
768    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK32
769    #define PCRE_COMPILE              PCRE_COMPILE32
770    #define PCRE_CONFIG               pcre32_config
771    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
772    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING32
773    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC32
774    #define PCRE_EXEC                 PCRE_EXEC32
775    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY32
776    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING32
777    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST32
778    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING32
779    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER32
780    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING32
781    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST32
782    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC32
783    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE32
784    #define PCRE_MAKETABLES           pcre32_maketables()
785    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
786    #define PCRE_PRINTINT             PCRE_PRINTINT32
787    #define PCRE_STUDY                PCRE_STUDY32
788    
789    #endif
790    
791    /* ----- End of mode-specific function call macros ----- */
792    
793    
794  /* Other parameters */  /* Other parameters */
795    
# Line 163  UTF8 support if PCRE is built without it Line 801  UTF8 support if PCRE is built without it
801  #endif  #endif
802  #endif  #endif
803    
804    #if !defined NODFA
805    #define DFA_WS_DIMENSION 1000
806    #endif
807    
808  /* This is the default loop count for timing. */  /* This is the default loop count for timing. */
809    
810  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
# Line 177  static int callout_fail_count; Line 819  static int callout_fail_count;
819  static int callout_fail_id;  static int callout_fail_id;
820  static int debug_lengths;  static int debug_lengths;
821  static int first_callout;  static int first_callout;
822    static int jit_was_used;
823  static int locale_set = 0;  static int locale_set = 0;
824  static int show_malloc;  static int show_malloc;
825  static int use_utf8;  static int use_utf;
826  static size_t gotten_store;  static size_t gotten_store;
827    static size_t first_gotten_store = 0;
828    static const unsigned char *last_callout_mark = NULL;
829    
830  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
831    
832  static int buffer_size = 50000;  static int buffer_size = 50000;
833  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
834  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
835  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
836    
837    /* Another buffer is needed translation to 16-bit character strings. It will
838    obtained and extended as required. */
839    
840    #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
841    
842    /* We need the table of operator lengths that is used for 16/32-bit compiling, in
843    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
844    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
845    appropriately for the 16/32-bit world. Just as a safety check, make sure that
846    COMPILE_PCRE[16|32] is *not* set. */
847    
848    #ifdef COMPILE_PCRE16
849    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
850    #endif
851    
852    #ifdef COMPILE_PCRE32
853    #error COMPILE_PCRE32 must not be set when compiling pcretest.c
854    #endif
855    
856    #if LINK_SIZE == 2
857    #undef LINK_SIZE
858    #define LINK_SIZE 1
859    #elif LINK_SIZE == 3 || LINK_SIZE == 4
860    #undef LINK_SIZE
861    #define LINK_SIZE 2
862    #else
863    #error LINK_SIZE must be either 2, 3, or 4
864    #endif
865    
866    #undef IMM2_SIZE
867    #define IMM2_SIZE 1
868    
869    #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
870    
871    #ifdef SUPPORT_PCRE16
872    static int buffer16_size = 0;
873    static pcre_uint16 *buffer16 = NULL;
874    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
875    #endif  /* SUPPORT_PCRE16 */
876    
877    #ifdef SUPPORT_PCRE32
878    static int buffer32_size = 0;
879    static pcre_uint32 *buffer32 = NULL;
880    static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
881    #endif  /* SUPPORT_PCRE32 */
882    
883    /* If we have 8-bit support, default to it; if there is also
884    16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
885    there must be 16-or 32-bit support, so default it to 1. */
886    
887    #if defined SUPPORT_PCRE8
888    static int pcre_mode = PCRE8_MODE;
889    #elif defined SUPPORT_PCRE16
890    static int pcre_mode = PCRE16_MODE;
891    #elif defined SUPPORT_PCRE32
892    static int pcre_mode = PCRE32_MODE;
893    #endif
894    
895    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
896    
897    static int jit_study_bits[] =
898      {
899      PCRE_STUDY_JIT_COMPILE,
900      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
901      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
902      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
903      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
904      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
905      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
906        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
907    };
908    
909    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
910      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
911    
912    /* Textual explanations for runtime error codes */
913    
914    static const char *errtexts[] = {
915      NULL,  /* 0 is no error */
916      NULL,  /* NOMATCH is handled specially */
917      "NULL argument passed",
918      "bad option value",
919      "magic number missing",
920      "unknown opcode - pattern overwritten?",
921      "no more memory",
922      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
923      "match limit exceeded",
924      "callout error code",
925      NULL,  /* BADUTF8/16 is handled specially */
926      NULL,  /* BADUTF8/16 offset is handled specially */
927      NULL,  /* PARTIAL is handled specially */
928      "not used - internal error",
929      "internal error - pattern overwritten?",
930      "bad count value",
931      "item unsupported for DFA matching",
932      "backreference condition or recursion test not supported for DFA matching",
933      "match limit not supported for DFA matching",
934      "workspace size exceeded in DFA matching",
935      "too much recursion for DFA matching",
936      "recursion limit exceeded",
937      "not used - internal error",
938      "invalid combination of newline options",
939      "bad offset value",
940      NULL,  /* SHORTUTF8/16 is handled specially */
941      "nested recursion at the same subject position",
942      "JIT stack limit reached",
943      "pattern compiled in wrong mode: 8-bit/16-bit error",
944      "pattern compiled with other endianness",
945      "invalid data in workspace for DFA restart"
946    };
947    
948    
949  /*************************************************  /*************************************************
950    *         Alternate character tables             *
951    *************************************************/
952    
953    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
954    using the default tables of the library. However, the T option can be used to
955    select alternate sets of tables, for different kinds of testing. Note also that
956    the L (locale) option also adjusts the tables. */
957    
958    /* This is the set of tables distributed as default with PCRE. It recognizes
959    only ASCII characters. */
960    
961    static const pcre_uint8 tables0[] = {
962    
963    /* This table is a lower casing table. */
964    
965        0,  1,  2,  3,  4,  5,  6,  7,
966        8,  9, 10, 11, 12, 13, 14, 15,
967       16, 17, 18, 19, 20, 21, 22, 23,
968       24, 25, 26, 27, 28, 29, 30, 31,
969       32, 33, 34, 35, 36, 37, 38, 39,
970       40, 41, 42, 43, 44, 45, 46, 47,
971       48, 49, 50, 51, 52, 53, 54, 55,
972       56, 57, 58, 59, 60, 61, 62, 63,
973       64, 97, 98, 99,100,101,102,103,
974      104,105,106,107,108,109,110,111,
975      112,113,114,115,116,117,118,119,
976      120,121,122, 91, 92, 93, 94, 95,
977       96, 97, 98, 99,100,101,102,103,
978      104,105,106,107,108,109,110,111,
979      112,113,114,115,116,117,118,119,
980      120,121,122,123,124,125,126,127,
981      128,129,130,131,132,133,134,135,
982      136,137,138,139,140,141,142,143,
983      144,145,146,147,148,149,150,151,
984      152,153,154,155,156,157,158,159,
985      160,161,162,163,164,165,166,167,
986      168,169,170,171,172,173,174,175,
987      176,177,178,179,180,181,182,183,
988      184,185,186,187,188,189,190,191,
989      192,193,194,195,196,197,198,199,
990      200,201,202,203,204,205,206,207,
991      208,209,210,211,212,213,214,215,
992      216,217,218,219,220,221,222,223,
993      224,225,226,227,228,229,230,231,
994      232,233,234,235,236,237,238,239,
995      240,241,242,243,244,245,246,247,
996      248,249,250,251,252,253,254,255,
997    
998    /* This table is a case flipping table. */
999    
1000        0,  1,  2,  3,  4,  5,  6,  7,
1001        8,  9, 10, 11, 12, 13, 14, 15,
1002       16, 17, 18, 19, 20, 21, 22, 23,
1003       24, 25, 26, 27, 28, 29, 30, 31,
1004       32, 33, 34, 35, 36, 37, 38, 39,
1005       40, 41, 42, 43, 44, 45, 46, 47,
1006       48, 49, 50, 51, 52, 53, 54, 55,
1007       56, 57, 58, 59, 60, 61, 62, 63,
1008       64, 97, 98, 99,100,101,102,103,
1009      104,105,106,107,108,109,110,111,
1010      112,113,114,115,116,117,118,119,
1011      120,121,122, 91, 92, 93, 94, 95,
1012       96, 65, 66, 67, 68, 69, 70, 71,
1013       72, 73, 74, 75, 76, 77, 78, 79,
1014       80, 81, 82, 83, 84, 85, 86, 87,
1015       88, 89, 90,123,124,125,126,127,
1016      128,129,130,131,132,133,134,135,
1017      136,137,138,139,140,141,142,143,
1018      144,145,146,147,148,149,150,151,
1019      152,153,154,155,156,157,158,159,
1020      160,161,162,163,164,165,166,167,
1021      168,169,170,171,172,173,174,175,
1022      176,177,178,179,180,181,182,183,
1023      184,185,186,187,188,189,190,191,
1024      192,193,194,195,196,197,198,199,
1025      200,201,202,203,204,205,206,207,
1026      208,209,210,211,212,213,214,215,
1027      216,217,218,219,220,221,222,223,
1028      224,225,226,227,228,229,230,231,
1029      232,233,234,235,236,237,238,239,
1030      240,241,242,243,244,245,246,247,
1031      248,249,250,251,252,253,254,255,
1032    
1033    /* This table contains bit maps for various character classes. Each map is 32
1034    bytes long and the bits run from the least significant end of each byte. The
1035    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1036    graph, print, punct, and cntrl. Other classes are built from combinations. */
1037    
1038      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1039      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1040      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1041      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1042    
1043      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1044      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1045      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1046      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1047    
1048      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1049      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1050      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1051      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1052    
1053      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1054      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1055      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1056      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1057    
1058      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1059      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1060      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1061      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1062    
1063      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1064      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1065      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1066      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1067    
1068      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1069      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1070      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1071      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1072    
1073      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1074      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1075      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1076      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1077    
1078      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1079      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1080      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1081      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1082    
1083      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1084      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1085      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1086      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1087    
1088    /* This table identifies various classes of character by individual bits:
1089      0x01   white space character
1090      0x02   letter
1091      0x04   decimal digit
1092      0x08   hexadecimal digit
1093      0x10   alphanumeric or '_'
1094      0x80   regular expression metacharacter or binary zero
1095    */
1096    
1097      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
1098      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
1099      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
1100      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
1101      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
1102      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
1103      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
1104      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
1105      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
1106      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
1107      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
1108      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
1109      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
1110      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
1111      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
1112      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
1113      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1114      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1115      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1116      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1117      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1118      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1119      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1120      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1121      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1122      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1123      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1124      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1125      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1126      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1127      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1128      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1129    
1130    /* This is a set of tables that came orginally from a Windows user. It seems to
1131    be at least an approximation of ISO 8859. In particular, there are characters
1132    greater than 128 that are marked as spaces, letters, etc. */
1133    
1134    static const pcre_uint8 tables1[] = {
1135    0,1,2,3,4,5,6,7,
1136    8,9,10,11,12,13,14,15,
1137    16,17,18,19,20,21,22,23,
1138    24,25,26,27,28,29,30,31,
1139    32,33,34,35,36,37,38,39,
1140    40,41,42,43,44,45,46,47,
1141    48,49,50,51,52,53,54,55,
1142    56,57,58,59,60,61,62,63,
1143    64,97,98,99,100,101,102,103,
1144    104,105,106,107,108,109,110,111,
1145    112,113,114,115,116,117,118,119,
1146    120,121,122,91,92,93,94,95,
1147    96,97,98,99,100,101,102,103,
1148    104,105,106,107,108,109,110,111,
1149    112,113,114,115,116,117,118,119,
1150    120,121,122,123,124,125,126,127,
1151    128,129,130,131,132,133,134,135,
1152    136,137,138,139,140,141,142,143,
1153    144,145,146,147,148,149,150,151,
1154    152,153,154,155,156,157,158,159,
1155    160,161,162,163,164,165,166,167,
1156    168,169,170,171,172,173,174,175,
1157    176,177,178,179,180,181,182,183,
1158    184,185,186,187,188,189,190,191,
1159    224,225,226,227,228,229,230,231,
1160    232,233,234,235,236,237,238,239,
1161    240,241,242,243,244,245,246,215,
1162    248,249,250,251,252,253,254,223,
1163    224,225,226,227,228,229,230,231,
1164    232,233,234,235,236,237,238,239,
1165    240,241,242,243,244,245,246,247,
1166    248,249,250,251,252,253,254,255,
1167    0,1,2,3,4,5,6,7,
1168    8,9,10,11,12,13,14,15,
1169    16,17,18,19,20,21,22,23,
1170    24,25,26,27,28,29,30,31,
1171    32,33,34,35,36,37,38,39,
1172    40,41,42,43,44,45,46,47,
1173    48,49,50,51,52,53,54,55,
1174    56,57,58,59,60,61,62,63,
1175    64,97,98,99,100,101,102,103,
1176    104,105,106,107,108,109,110,111,
1177    112,113,114,115,116,117,118,119,
1178    120,121,122,91,92,93,94,95,
1179    96,65,66,67,68,69,70,71,
1180    72,73,74,75,76,77,78,79,
1181    80,81,82,83,84,85,86,87,
1182    88,89,90,123,124,125,126,127,
1183    128,129,130,131,132,133,134,135,
1184    136,137,138,139,140,141,142,143,
1185    144,145,146,147,148,149,150,151,
1186    152,153,154,155,156,157,158,159,
1187    160,161,162,163,164,165,166,167,
1188    168,169,170,171,172,173,174,175,
1189    176,177,178,179,180,181,182,183,
1190    184,185,186,187,188,189,190,191,
1191    224,225,226,227,228,229,230,231,
1192    232,233,234,235,236,237,238,239,
1193    240,241,242,243,244,245,246,215,
1194    248,249,250,251,252,253,254,223,
1195    192,193,194,195,196,197,198,199,
1196    200,201,202,203,204,205,206,207,
1197    208,209,210,211,212,213,214,247,
1198    216,217,218,219,220,221,222,255,
1199    0,62,0,0,1,0,0,0,
1200    0,0,0,0,0,0,0,0,
1201    32,0,0,0,1,0,0,0,
1202    0,0,0,0,0,0,0,0,
1203    0,0,0,0,0,0,255,3,
1204    126,0,0,0,126,0,0,0,
1205    0,0,0,0,0,0,0,0,
1206    0,0,0,0,0,0,0,0,
1207    0,0,0,0,0,0,255,3,
1208    0,0,0,0,0,0,0,0,
1209    0,0,0,0,0,0,12,2,
1210    0,0,0,0,0,0,0,0,
1211    0,0,0,0,0,0,0,0,
1212    254,255,255,7,0,0,0,0,
1213    0,0,0,0,0,0,0,0,
1214    255,255,127,127,0,0,0,0,
1215    0,0,0,0,0,0,0,0,
1216    0,0,0,0,254,255,255,7,
1217    0,0,0,0,0,4,32,4,
1218    0,0,0,128,255,255,127,255,
1219    0,0,0,0,0,0,255,3,
1220    254,255,255,135,254,255,255,7,
1221    0,0,0,0,0,4,44,6,
1222    255,255,127,255,255,255,127,255,
1223    0,0,0,0,254,255,255,255,
1224    255,255,255,255,255,255,255,127,
1225    0,0,0,0,254,255,255,255,
1226    255,255,255,255,255,255,255,255,
1227    0,2,0,0,255,255,255,255,
1228    255,255,255,255,255,255,255,127,
1229    0,0,0,0,255,255,255,255,
1230    255,255,255,255,255,255,255,255,
1231    0,0,0,0,254,255,0,252,
1232    1,0,0,248,1,0,0,120,
1233    0,0,0,0,254,255,255,255,
1234    0,0,128,0,0,0,128,0,
1235    255,255,255,255,0,0,0,0,
1236    0,0,0,0,0,0,0,128,
1237    255,255,255,255,0,0,0,0,
1238    0,0,0,0,0,0,0,0,
1239    128,0,0,0,0,0,0,0,
1240    0,1,1,0,1,1,0,0,
1241    0,0,0,0,0,0,0,0,
1242    0,0,0,0,0,0,0,0,
1243    1,0,0,0,128,0,0,0,
1244    128,128,128,128,0,0,128,0,
1245    28,28,28,28,28,28,28,28,
1246    28,28,0,0,0,0,0,128,
1247    0,26,26,26,26,26,26,18,
1248    18,18,18,18,18,18,18,18,
1249    18,18,18,18,18,18,18,18,
1250    18,18,18,128,128,0,128,16,
1251    0,26,26,26,26,26,26,18,
1252    18,18,18,18,18,18,18,18,
1253    18,18,18,18,18,18,18,18,
1254    18,18,18,128,128,0,0,0,
1255    0,0,0,0,0,1,0,0,
1256    0,0,0,0,0,0,0,0,
1257    0,0,0,0,0,0,0,0,
1258    0,0,0,0,0,0,0,0,
1259    1,0,0,0,0,0,0,0,
1260    0,0,18,0,0,0,0,0,
1261    0,0,20,20,0,18,0,0,
1262    0,20,18,0,0,0,0,0,
1263    18,18,18,18,18,18,18,18,
1264    18,18,18,18,18,18,18,18,
1265    18,18,18,18,18,18,18,0,
1266    18,18,18,18,18,18,18,18,
1267    18,18,18,18,18,18,18,18,
1268    18,18,18,18,18,18,18,18,
1269    18,18,18,18,18,18,18,0,
1270    18,18,18,18,18,18,18,18
1271    };
1272    
1273    
1274    
1275    
1276    #ifndef HAVE_STRERROR
1277    /*************************************************
1278    *     Provide strerror() for non-ANSI libraries  *
1279    *************************************************/
1280    
1281    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1282    in their libraries, but can provide the same facility by this simple
1283    alternative function. */
1284    
1285    extern int   sys_nerr;
1286    extern char *sys_errlist[];
1287    
1288    char *
1289    strerror(int n)
1290    {
1291    if (n < 0 || n >= sys_nerr) return "unknown error number";
1292    return sys_errlist[n];
1293    }
1294    #endif /* HAVE_STRERROR */
1295    
1296    
1297    
1298    /*************************************************
1299    *       Print newline configuration              *
1300    *************************************************/
1301    
1302    /*
1303    Arguments:
1304      rc         the return code from PCRE_CONFIG_NEWLINE
1305      isc        TRUE if called from "-C newline"
1306    Returns:     nothing
1307    */
1308    
1309    static void
1310    print_newline_config(int rc, BOOL isc)
1311    {
1312    const char *s = NULL;
1313    if (!isc) printf("  Newline sequence is ");
1314    switch(rc)
1315      {
1316      case CHAR_CR: s = "CR"; break;
1317      case CHAR_LF: s = "LF"; break;
1318      case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1319      case -1: s = "ANY"; break;
1320      case -2: s = "ANYCRLF"; break;
1321    
1322      default:
1323      printf("a non-standard value: 0x%04x\n", rc);
1324      return;
1325      }
1326    
1327    printf("%s\n", s);
1328    }
1329    
1330    
1331    
1332    /*************************************************
1333    *         JIT memory callback                    *
1334    *************************************************/
1335    
1336    static pcre_jit_stack* jit_callback(void *arg)
1337    {
1338    jit_was_used = TRUE;
1339    return (pcre_jit_stack *)arg;
1340    }
1341    
1342    
1343    #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1344    /*************************************************
1345    *            Convert UTF-8 string to value       *
1346    *************************************************/
1347    
1348    /* This function takes one or more bytes that represents a UTF-8 character,
1349    and returns the value of the character.
1350    
1351    Argument:
1352      utf8bytes   a pointer to the byte vector
1353      vptr        a pointer to an int to receive the value
1354    
1355    Returns:      >  0 => the number of bytes consumed
1356                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1357    */
1358    
1359    static int
1360    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1361    {
1362    int c = *utf8bytes++;
1363    int d = c;
1364    int i, j, s;
1365    
1366    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1367      {
1368      if ((d & 0x80) == 0) break;
1369      d <<= 1;
1370      }
1371    
1372    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1373    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1374    
1375    /* i now has a value in the range 1-5 */
1376    
1377    s = 6*i;
1378    d = (c & utf8_table3[i]) << s;
1379    
1380    for (j = 0; j < i; j++)
1381      {
1382      c = *utf8bytes++;
1383      if ((c & 0xc0) != 0x80) return -(j+1);
1384      s -= 6;
1385      d |= (c & 0x3f) << s;
1386      }
1387    
1388    /* Check that encoding was the correct unique one */
1389    
1390    for (j = 0; j < utf8_table1_size; j++)
1391      if (d <= utf8_table1[j]) break;
1392    if (j != i) return -(i+1);
1393    
1394    /* Valid value */
1395    
1396    *vptr = d;
1397    return i+1;
1398    }
1399    #endif /* NOUTF || SUPPORT_PCRE16 */
1400    
1401    
1402    
1403    #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1404    /*************************************************
1405    *       Convert character value to UTF-8         *
1406    *************************************************/
1407    
1408    /* This function takes an integer value in the range 0 - 0x7fffffff
1409    and encodes it as a UTF-8 character in 0 to 6 bytes.
1410    
1411    Arguments:
1412      cvalue     the character value
1413      utf8bytes  pointer to buffer for result - at least 6 bytes long
1414    
1415    Returns:     number of characters placed in the buffer
1416    */
1417    
1418    static int
1419    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1420    {
1421    register int i, j;
1422    for (i = 0; i < utf8_table1_size; i++)
1423      if (cvalue <= utf8_table1[i]) break;
1424    utf8bytes += i;
1425    for (j = i; j > 0; j--)
1426     {
1427     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1428     cvalue >>= 6;
1429     }
1430    *utf8bytes = utf8_table2[i] | cvalue;
1431    return i + 1;
1432    }
1433    #endif
1434    
1435    
1436    #ifdef SUPPORT_PCRE16
1437    /*************************************************
1438    *         Convert a string to 16-bit             *
1439    *************************************************/
1440    
1441    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1442    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1443    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1444    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1445    result is always left in buffer16.
1446    
1447    Note that this function does not object to surrogate values. This is
1448    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1449    for the purpose of testing that they are correctly faulted.
1450    
1451    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1452    in UTF-8 so that values greater than 255 can be handled.
1453    
1454    Arguments:
1455      data       TRUE if converting a data line; FALSE for a regex
1456      p          points to a byte string
1457      utf        true if UTF-8 (to be converted to UTF-16)
1458      len        number of bytes in the string (excluding trailing zero)
1459    
1460    Returns:     number of 16-bit data items used (excluding trailing zero)
1461                 OR -1 if a UTF-8 string is malformed
1462                 OR -2 if a value > 0x10ffff is encountered
1463                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1464    */
1465    
1466    static int
1467    to16(int data, pcre_uint8 *p, int utf, int len)
1468    {
1469    pcre_uint16 *pp;
1470    
1471    if (buffer16_size < 2*len + 2)
1472      {
1473      if (buffer16 != NULL) free(buffer16);
1474      buffer16_size = 2*len + 2;
1475      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1476      if (buffer16 == NULL)
1477        {
1478        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1479        exit(1);
1480        }
1481      }
1482    
1483    pp = buffer16;
1484    
1485    if (!utf && !data)
1486      {
1487      while (len-- > 0) *pp++ = *p++;
1488      }
1489    
1490    else
1491      {
1492      int c = 0;
1493      while (len > 0)
1494        {
1495        int chlen = utf82ord(p, &c);
1496        if (chlen <= 0) return -1;
1497        if (c > 0x10ffff) return -2;
1498        p += chlen;
1499        len -= chlen;
1500        if (c < 0x10000) *pp++ = c; else
1501          {
1502          if (!utf) return -3;
1503          c -= 0x10000;
1504          *pp++ = 0xD800 | (c >> 10);
1505          *pp++ = 0xDC00 | (c & 0x3ff);
1506          }
1507        }
1508      }
1509    
1510    *pp = 0;
1511    return pp - buffer16;
1512    }
1513    #endif
1514    
1515    #ifdef SUPPORT_PCRE32
1516    /*************************************************
1517    *         Convert a string to 32-bit             *
1518    *************************************************/
1519    
1520    /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1521    8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1522    times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1523    in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1524    result is always left in buffer32.
1525    
1526    Note that this function does not object to surrogate values. This is
1527    deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1528    for the purpose of testing that they are correctly faulted.
1529    
1530    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1531    in UTF-8 so that values greater than 255 can be handled.
1532    
1533    Arguments:
1534      data       TRUE if converting a data line; FALSE for a regex
1535      p          points to a byte string
1536      utf        true if UTF-8 (to be converted to UTF-32)
1537      len        number of bytes in the string (excluding trailing zero)
1538    
1539    Returns:     number of 32-bit data items used (excluding trailing zero)
1540                 OR -1 if a UTF-8 string is malformed
1541                 OR -2 if a value > 0x10ffff is encountered
1542                 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1543    */
1544    
1545    static int
1546    to32(int data, pcre_uint8 *p, int utf, int len)
1547    {
1548    pcre_uint32 *pp;
1549    
1550    if (buffer32_size < 4*len + 4)
1551      {
1552      if (buffer32 != NULL) free(buffer32);
1553      buffer32_size = 4*len + 4;
1554      buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1555      if (buffer32 == NULL)
1556        {
1557        fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1558        exit(1);
1559        }
1560      }
1561    
1562    pp = buffer32;
1563    
1564    if (!utf && !data)
1565      {
1566      while (len-- > 0) *pp++ = *p++;
1567      }
1568    
1569    else
1570      {
1571      int c = 0;
1572      while (len > 0)
1573        {
1574        int chlen = utf82ord(p, &c);
1575        if (chlen <= 0) return -1;
1576        if (utf)
1577          {
1578          if (c > 0x10ffff) return -2;
1579          if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1580          }
1581    
1582        p += chlen;
1583        len -= chlen;
1584        *pp++ = c;
1585        }
1586      }
1587    
1588    *pp = 0;
1589    return pp - buffer32;
1590    }
1591    #endif
1592    
1593    /*************************************************
1594  *        Read or extend an input line            *  *        Read or extend an input line            *
1595  *************************************************/  *************************************************/
1596    
# Line 214  Returns:       pointer to the start of n Line 1613  Returns:       pointer to the start of n
1613                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1614  */  */
1615    
1616  static uschar *  static pcre_uint8 *
1617  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1618  {  {
1619  uschar *here = start;  pcre_uint8 *here = start;
1620    
1621  for (;;)  for (;;)
1622    {    {
1623    int rlen = buffer_size - (here - buffer);    size_t rlen = (size_t)(buffer_size - (here - buffer));
1624    
1625    if (rlen > 1000)    if (rlen > 1000)
1626      {      {
1627      int dlen;      int dlen;
1628    
1629      /* If libreadline support is required, use readline() to read a line if the      /* If libreadline or libedit support is required, use readline() to read a
1630      input is a terminal. Note that readline() removes the trailing newline, so      line if the input is a terminal. Note that readline() removes the trailing
1631      we must put it back again, to be compatible with fgets(). */      newline, so we must put it back again, to be compatible with fgets(). */
1632    
1633  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1634      if (isatty(fileno(f)))      if (isatty(fileno(f)))
1635        {        {
1636        size_t len;        size_t len;
# Line 251  for (;;) Line 1650  for (;;)
1650      /* Read the next line by normal means, prompting if the file is stdin. */      /* Read the next line by normal means, prompting if the file is stdin. */
1651    
1652        {        {
1653        if (f == stdin) printf(prompt);        if (f == stdin) printf("%s", prompt);
1654        if (fgets((char *)here, rlen,  f) == NULL)        if (fgets((char *)here, rlen,  f) == NULL)
1655          return (here == start)? NULL : start;          return (here == start)? NULL : start;
1656        }        }
# Line 264  for (;;) Line 1663  for (;;)
1663    else    else
1664      {      {
1665      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1666      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1667      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1668      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1669    
1670      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1671        {        {
# Line 297  return NULL;  /* Control never gets here Line 1696  return NULL;  /* Control never gets here
1696    
1697    
1698    
   
   
   
   
1699  /*************************************************  /*************************************************
1700  *          Read number from string               *  *          Read number from string               *
1701  *************************************************/  *************************************************/
# Line 317  Returns:        the unsigned long Line 1712  Returns:        the unsigned long
1712  */  */
1713    
1714  static int  static int
1715  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1716  {  {
1717  int result = 0;  int result = 0;
1718  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 328  return(result); Line 1723  return(result);
1723    
1724    
1725    
   
1726  /*************************************************  /*************************************************
1727  *            Convert UTF-8 string to value       *  *             Print one character                *
1728  *************************************************/  *************************************************/
1729    
1730  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
   
 Argument:  
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
1731    
1732  Returns:      >  0 => the number of bytes consumed  static int pchar(int c, FILE *f)
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1733  {  {
1734  int c = *utf8bytes++;  if (PRINTOK(c))
1735  int d = c;    {
1736  int i, j, s;    if (f != NULL) fprintf(f, "%c", c);
1737      return 1;
1738      }
1739    
1740  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  if (c < 0x100)
1741    {    {
1742    if ((d & 0x80) == 0) break;    if (use_utf)
1743    d <<= 1;      {
1744        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1745        return 6;
1746        }
1747      else
1748        {
1749        if (f != NULL) fprintf(f, "\\x%02x", c);
1750        return 4;
1751        }
1752    }    }
1753    
1754  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1755  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  return (c <= 0x000000ff)? 6 :
1756           (c <= 0x00000fff)? 7 :
1757           (c <= 0x0000ffff)? 8 :
1758           (c <= 0x000fffff)? 9 : 10;
1759    }
1760    
 /* i now has a value in the range 1-5 */  
1761    
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
1762    
1763  for (j = 0; j < i; j++)  #ifdef SUPPORT_PCRE8
1764    {  /*************************************************
1765    c = *utf8bytes++;  *         Print 8-bit character string           *
1766    if ((c & 0xc0) != 0x80) return -(j+1);  *************************************************/
   s -= 6;  
   d |= (c & 0x3f) << s;  
   }  
1767    
1768  /* Check that encoding was the correct unique one */  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1769    If handed a NULL file, just counts chars without printing. */
1770    
1771  for (j = 0; j < utf8_table1_size; j++)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1772    if (d <= utf8_table1[j]) break;  {
1773  if (j != i) return -(i+1);  int c = 0;
1774    int yield = 0;
1775    
1776  /* Valid value */  if (length < 0)
1777      length = strlen((char *)p);
1778    
1779  *vptr = d;  while (length-- > 0)
1780  return i+1;    {
1781  }  #if !defined NOUTF
1782      if (use_utf)
1783        {
1784        int rc = utf82ord(p, &c);
1785        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1786          {
1787          length -= rc - 1;
1788          p += rc;
1789          yield += pchar(c, f);
1790          continue;
1791          }
1792        }
1793    #endif
1794      c = *p++;
1795      yield += pchar(c, f);
1796      }
1797    
1798    return yield;
1799    }
1800  #endif  #endif
1801    
1802    
1803    
1804    #ifdef SUPPORT_PCRE16
1805  /*************************************************  /*************************************************
1806  *       Convert character value to UTF-8         *  *    Find length of 0-terminated 16-bit string   *
1807  *************************************************/  *************************************************/
1808    
1809  /* This function takes an integer value in the range 0 - 0x7fffffff  static int strlen16(PCRE_SPTR16 p)
1810  and encodes it as a UTF-8 character in 0 to 6 bytes.  {
1811    int len = 0;
1812    while (*p++ != 0) len++;
1813    return len;
1814    }
1815    #endif  /* SUPPORT_PCRE16 */
1816    
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
1817    
 Returns:     number of characters placed in the buffer  
 */  
1818    
1819  #if !defined NOUTF8  #ifdef SUPPORT_PCRE32
1820    /*************************************************
1821    *    Find length of 0-terminated 32-bit string   *
1822    *************************************************/
1823    
1824  static int  static int strlen32(PCRE_SPTR32 p)
 ord2utf8(int cvalue, uschar *utf8bytes)  
1825  {  {
1826  register int i, j;  int len = 0;
1827  for (i = 0; i < utf8_table1_size; i++)  while (*p++ != 0) len++;
1828    if (cvalue <= utf8_table1[i]) break;  return len;
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1829  }  }
1830    #endif  /* SUPPORT_PCRE32 */
 #endif  
1831    
1832    
1833    
1834    #ifdef SUPPORT_PCRE16
1835  /*************************************************  /*************************************************
1836  *             Print character string             *  *           Print 16-bit character string        *
1837  *************************************************/  *************************************************/
1838    
1839  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1840  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1841    
1842  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1843  {  {
 int c = 0;  
1844  int yield = 0;  int yield = 0;
1845    
1846    if (length < 0)
1847      length = strlen16(p);
1848    
1849  while (length-- > 0)  while (length-- > 0)
1850    {    {
1851  #if !defined NOUTF8    int c = *p++ & 0xffff;
1852    if (use_utf8)  #if !defined NOUTF
1853      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1854      {      {
1855      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
1856        if (d >= 0xDC00 && d < 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
1857        {        {
1858        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1859        p += rc;        length--;
1860        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
1861        }        }
1862      }      }
1863  #endif  #endif
1864      yield += pchar(c, f);
1865      }
1866    
1867     /* Not UTF-8, or malformed UTF-8  */  return yield;
1868    }
1869    #endif  /* SUPPORT_PCRE16 */
1870    
1871    c = *p++;  
1872    if (PRINTHEX(c))  
1873      {  #ifdef SUPPORT_PCRE32
1874      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1875      yield++;  *           Print 32-bit character string        *
1876      }  *************************************************/
1877    else  
1878      {  /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
1879      if (f != NULL) fprintf(f, "\\x%02x", c);  If handed a NULL file, just counts chars without printing. */
1880      yield += 4;  
1881      }  static int pchars32(PCRE_SPTR32 p, int length, FILE *f)
1882    {
1883    int yield = 0;
1884    
1885    if (length < 0)
1886      length = strlen32(p);
1887    
1888    while (length-- > 0)
1889      {
1890      int c = *p++;
1891      yield += pchar(c, f);
1892    }    }
1893    
1894  return yield;  return yield;
1895  }  }
1896    #endif  /* SUPPORT_PCRE32 */
1897    
1898    
1899    
1900    #ifdef SUPPORT_PCRE8
1901    /*************************************************
1902    *     Read a capture name (8-bit) and check it   *
1903    *************************************************/
1904    
1905    static pcre_uint8 *
1906    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1907    {
1908    pcre_uint8 *npp = *pp;
1909    while (isalnum(*p)) *npp++ = *p++;
1910    *npp++ = 0;
1911    *npp = 0;
1912    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1913      {
1914      fprintf(outfile, "no parentheses with name \"");
1915      PCHARSV(*pp, 0, -1, outfile);
1916      fprintf(outfile, "\"\n");
1917      }
1918    
1919    *pp = npp;
1920    return p;
1921    }
1922    #endif  /* SUPPORT_PCRE8 */
1923    
1924    
1925    
1926    #ifdef SUPPORT_PCRE16
1927    /*************************************************
1928    *     Read a capture name (16-bit) and check it  *
1929    *************************************************/
1930    
1931    /* Note that the text being read is 8-bit. */
1932    
1933    static pcre_uint8 *
1934    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1935    {
1936    pcre_uint16 *npp = *pp;
1937    while (isalnum(*p)) *npp++ = *p++;
1938    *npp++ = 0;
1939    *npp = 0;
1940    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1941      {
1942      fprintf(outfile, "no parentheses with name \"");
1943      PCHARSV(*pp, 0, -1, outfile);
1944      fprintf(outfile, "\"\n");
1945      }
1946    *pp = npp;
1947    return p;
1948    }
1949    #endif  /* SUPPORT_PCRE16 */
1950    
1951    
1952    
1953    #ifdef SUPPORT_PCRE32
1954    /*************************************************
1955    *     Read a capture name (32-bit) and check it  *
1956    *************************************************/
1957    
1958    /* Note that the text being read is 8-bit. */
1959    
1960    static pcre_uint8 *
1961    read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
1962    {
1963    pcre_uint32 *npp = *pp;
1964    while (isalnum(*p)) *npp++ = *p++;
1965    *npp++ = 0;
1966    *npp = 0;
1967    if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
1968      {
1969      fprintf(outfile, "no parentheses with name \"");
1970      PCHARSV(*pp, 0, -1, outfile);
1971      fprintf(outfile, "\"\n");
1972      }
1973    *pp = npp;
1974    return p;
1975    }
1976    #endif  /* SUPPORT_PCRE32 */
1977    
1978    
1979    
# Line 515  if (callout_extra) Line 2002  if (callout_extra)
2002      else      else
2003        {        {
2004        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
2005        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
2006          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
2007        fprintf(f, "\n");        fprintf(f, "\n");
2008        }        }
# Line 528  printed lengths of the substrings. */ Line 2015  printed lengths of the substrings. */
2015    
2016  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
2017    
2018  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2019  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
2020    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
2021    
2022  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2023    
2024  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
2025    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
2026    
2027  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 571  fprintf(outfile, "%.*s", (cb->next_item_ Line 2058  fprintf(outfile, "%.*s", (cb->next_item_
2058  fprintf(outfile, "\n");  fprintf(outfile, "\n");
2059  first_callout = 0;  first_callout = 0;
2060    
2061    if (cb->mark != last_callout_mark)
2062      {
2063      if (cb->mark == NULL)
2064        fprintf(outfile, "Latest Mark: <unset>\n");
2065      else
2066        {
2067        fprintf(outfile, "Latest Mark: ");
2068        PCHARSV(cb->mark, 0, -1, outfile);
2069        putc('\n', outfile);
2070        }
2071      last_callout_mark = cb->mark;
2072      }
2073    
2074  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
2075    {    {
2076    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 590  return (cb->callout_number != callout_fa Line 2090  return (cb->callout_number != callout_fa
2090  *            Local malloc functions              *  *            Local malloc functions              *
2091  *************************************************/  *************************************************/
2092    
2093  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
2094  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
2095    show_malloc variable is set only during matching. */
2096    
2097  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
2098  {  {
2099  void *block = malloc(size);  void *block = malloc(size);
2100  gotten_store = size;  gotten_store = size;
2101    if (first_gotten_store == 0) first_gotten_store = size;
2102  if (show_malloc)  if (show_malloc)
2103    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
2104  return block;  return block;
# Line 609  if (show_malloc) Line 2111  if (show_malloc)
2111  free(block);  free(block);
2112  }  }
2113    
   
2114  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
2115    
2116  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 632  free(block); Line 2133  free(block);
2133  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
2134  *************************************************/  *************************************************/
2135    
2136  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
2137    one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2138    value, but the code is defensive.
2139    
2140    Arguments:
2141      re        compiled regex
2142      study     study data
2143      option    PCRE_INFO_xxx option
2144      ptr       where to put the data
2145    
2146    Returns:    0 when OK, < 0 on error
2147    */
2148    
2149  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static int
2150    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2151  {  {
2152  int rc;  int rc;
2153  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
2154    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (pcre_mode == PCRE32_MODE)
2155    #ifdef SUPPORT_PCRE32
2156      rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2157    #else
2158      rc = PCRE_ERROR_BADMODE;
2159    #endif
2160    else if (pcre_mode == PCRE16_MODE)
2161    #ifdef SUPPORT_PCRE16
2162      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2163    #else
2164      rc = PCRE_ERROR_BADMODE;
2165    #endif
2166    else
2167    #ifdef SUPPORT_PCRE8
2168      rc = pcre_fullinfo(re, study, option, ptr);
2169    #else
2170      rc = PCRE_ERROR_BADMODE;
2171    #endif
2172    
2173    if (rc < 0)
2174      {
2175      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2176        pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2177      if (rc == PCRE_ERROR_BADMODE)
2178        fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2179          "%d-bit mode\n", 8 * CHAR_SIZE,
2180          8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2181      }
2182    
2183    return rc;
2184  }  }
2185    
2186    
2187    
2188  /*************************************************  /*************************************************
2189  *         Byte flipping function                 *  *             Swap byte functions                *
2190  *************************************************/  *************************************************/
2191    
2192  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2193  byteflip(unsigned long int value, int n)  value, respectively.
2194    
2195    Arguments:
2196      value        any number
2197    
2198    Returns:       the byte swapped value
2199    */
2200    
2201    static pcre_uint32
2202    swap_uint32(pcre_uint32 value)
2203  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
2204  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
2205         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
2206         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
2207         ((value & 0xff000000) >> 24);         (value >> 24);
2208    }
2209    
2210    static pcre_uint16
2211    swap_uint16(pcre_uint16 value)
2212    {
2213    return (value >> 8) | (value << 8);
2214    }
2215    
2216    
2217    
2218    /*************************************************
2219    *        Flip bytes in a compiled pattern        *
2220    *************************************************/
2221    
2222    /* This function is called if the 'F' option was present on a pattern that is
2223    to be written to a file. We flip the bytes of all the integer fields in the
2224    regex data block and the study block. In 16-bit mode this also flips relevant
2225    bytes in the pattern itself. This is to make it possible to test PCRE's
2226    ability to reload byte-flipped patterns, e.g. those compiled on a different
2227    architecture. */
2228    
2229    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2230    static void
2231    regexflip8_or_16(pcre *ere, pcre_extra *extra)
2232    {
2233    real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2234    #ifdef SUPPORT_PCRE16
2235    int op;
2236    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2237    int length = re->name_count * re->name_entry_size;
2238    #ifdef SUPPORT_UTF
2239    BOOL utf = (re->options & PCRE_UTF16) != 0;
2240    BOOL utf16_char = FALSE;
2241    #endif /* SUPPORT_UTF */
2242    #endif /* SUPPORT_PCRE16 */
2243    
2244    /* Always flip the bytes in the main data block and study blocks. */
2245    
2246    re->magic_number = REVERSED_MAGIC_NUMBER;
2247    re->size = swap_uint32(re->size);
2248    re->options = swap_uint32(re->options);
2249    re->flags = swap_uint16(re->flags);
2250    re->top_bracket = swap_uint16(re->top_bracket);
2251    re->top_backref = swap_uint16(re->top_backref);
2252    re->first_char = swap_uint16(re->first_char);
2253    re->req_char = swap_uint16(re->req_char);
2254    re->name_table_offset = swap_uint16(re->name_table_offset);
2255    re->name_entry_size = swap_uint16(re->name_entry_size);
2256    re->name_count = swap_uint16(re->name_count);
2257    
2258    if (extra != NULL)
2259      {
2260      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2261      rsd->size = swap_uint32(rsd->size);
2262      rsd->flags = swap_uint32(rsd->flags);
2263      rsd->minlength = swap_uint32(rsd->minlength);
2264      }
2265    
2266    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2267    in the name table, if present, and then in the pattern itself. */
2268    
2269    #ifdef SUPPORT_PCRE16
2270    if (pcre_mode != PCRE16_MODE) return;
2271    
2272    while(TRUE)
2273      {
2274      /* Swap previous characters. */
2275      while (length-- > 0)
2276        {
2277        *ptr = swap_uint16(*ptr);
2278        ptr++;
2279        }
2280    #ifdef SUPPORT_UTF
2281      if (utf16_char)
2282        {
2283        if ((ptr[-1] & 0xfc00) == 0xd800)
2284          {
2285          /* We know that there is only one extra character in UTF-16. */
2286          *ptr = swap_uint16(*ptr);
2287          ptr++;
2288          }
2289        }
2290      utf16_char = FALSE;
2291    #endif /* SUPPORT_UTF */
2292    
2293      /* Get next opcode. */
2294    
2295      length = 0;
2296      op = *ptr;
2297      *ptr++ = swap_uint16(op);
2298    
2299      switch (op)
2300        {
2301        case OP_END:
2302        return;
2303    
2304    #ifdef SUPPORT_UTF
2305        case OP_CHAR:
2306        case OP_CHARI:
2307        case OP_NOT:
2308        case OP_NOTI:
2309        case OP_STAR:
2310        case OP_MINSTAR:
2311        case OP_PLUS:
2312        case OP_MINPLUS:
2313        case OP_QUERY:
2314        case OP_MINQUERY:
2315        case OP_UPTO:
2316        case OP_MINUPTO:
2317        case OP_EXACT:
2318        case OP_POSSTAR:
2319        case OP_POSPLUS:
2320        case OP_POSQUERY:
2321        case OP_POSUPTO:
2322        case OP_STARI:
2323        case OP_MINSTARI:
2324        case OP_PLUSI:
2325        case OP_MINPLUSI:
2326        case OP_QUERYI:
2327        case OP_MINQUERYI:
2328        case OP_UPTOI:
2329        case OP_MINUPTOI:
2330        case OP_EXACTI:
2331        case OP_POSSTARI:
2332        case OP_POSPLUSI:
2333        case OP_POSQUERYI:
2334        case OP_POSUPTOI:
2335        case OP_NOTSTAR:
2336        case OP_NOTMINSTAR:
2337        case OP_NOTPLUS:
2338        case OP_NOTMINPLUS:
2339        case OP_NOTQUERY:
2340        case OP_NOTMINQUERY:
2341        case OP_NOTUPTO:
2342        case OP_NOTMINUPTO:
2343        case OP_NOTEXACT:
2344        case OP_NOTPOSSTAR:
2345        case OP_NOTPOSPLUS:
2346        case OP_NOTPOSQUERY:
2347        case OP_NOTPOSUPTO:
2348        case OP_NOTSTARI:
2349        case OP_NOTMINSTARI:
2350        case OP_NOTPLUSI:
2351        case OP_NOTMINPLUSI:
2352        case OP_NOTQUERYI:
2353        case OP_NOTMINQUERYI:
2354        case OP_NOTUPTOI:
2355        case OP_NOTMINUPTOI:
2356        case OP_NOTEXACTI:
2357        case OP_NOTPOSSTARI:
2358        case OP_NOTPOSPLUSI:
2359        case OP_NOTPOSQUERYI:
2360        case OP_NOTPOSUPTOI:
2361        if (utf) utf16_char = TRUE;
2362    #endif
2363        /* Fall through. */
2364    
2365        default:
2366        length = OP_lengths16[op] - 1;
2367        break;
2368    
2369        case OP_CLASS:
2370        case OP_NCLASS:
2371        /* Skip the character bit map. */
2372        ptr += 32/sizeof(pcre_uint16);
2373        length = 0;
2374        break;
2375    
2376        case OP_XCLASS:
2377        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2378        if (LINK_SIZE > 1)
2379          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2380            - (1 + LINK_SIZE + 1));
2381        else
2382          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2383    
2384        /* Reverse the size of the XCLASS instance. */
2385        *ptr = swap_uint16(*ptr);
2386        ptr++;
2387        if (LINK_SIZE > 1)
2388          {
2389          *ptr = swap_uint16(*ptr);
2390          ptr++;
2391          }
2392    
2393        op = *ptr;
2394        *ptr = swap_uint16(op);
2395        ptr++;
2396        if ((op & XCL_MAP) != 0)
2397          {
2398          /* Skip the character bit map. */
2399          ptr += 32/sizeof(pcre_uint16);
2400          length -= 32/sizeof(pcre_uint16);
2401          }
2402        break;
2403        }
2404      }
2405    /* Control should never reach here in 16 bit mode. */
2406    #endif /* SUPPORT_PCRE16 */
2407    }
2408    #endif /* SUPPORT_PCRE[8|16] */
2409    
2410    
2411    
2412    #if defined SUPPORT_PCRE32
2413    static void
2414    regexflip_32(pcre *ere, pcre_extra *extra)
2415    {
2416    real_pcre32 *re = (real_pcre32 *)ere;
2417    int op;
2418    pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2419    int length = re->name_count * re->name_entry_size;
2420    #ifdef SUPPORT_UTF
2421    BOOL utf = (re->options & PCRE_UTF32) != 0;
2422    #endif /* SUPPORT_UTF */
2423    
2424    /* Always flip the bytes in the main data block and study blocks. */
2425    
2426    re->magic_number = REVERSED_MAGIC_NUMBER;
2427    re->size = swap_uint32(re->size);
2428    re->options = swap_uint32(re->options);
2429    re->flags = swap_uint16(re->flags);
2430    re->top_bracket = swap_uint16(re->top_bracket);
2431    re->top_backref = swap_uint16(re->top_backref);
2432    re->first_char = swap_uint32(re->first_char);
2433    re->req_char = swap_uint32(re->req_char);
2434    re->name_table_offset = swap_uint16(re->name_table_offset);
2435    re->name_entry_size = swap_uint16(re->name_entry_size);
2436    re->name_count = swap_uint16(re->name_count);
2437    
2438    if (extra != NULL)
2439      {
2440      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2441      rsd->size = swap_uint32(rsd->size);
2442      rsd->flags = swap_uint32(rsd->flags);
2443      rsd->minlength = swap_uint32(rsd->minlength);
2444      }
2445    
2446    /* In 32-bit mode we must swap bytes
2447    in the name table, if present, and then in the pattern itself. */
2448    
2449    while(TRUE)
2450      {
2451      /* Swap previous characters. */
2452      while (length-- > 0)
2453        {
2454        *ptr = swap_uint32(*ptr);
2455        ptr++;
2456        }
2457    
2458      /* Get next opcode. */
2459    
2460      length = 0;
2461      op = *ptr;
2462      *ptr++ = swap_uint32(op);
2463    
2464      switch (op)
2465        {
2466        case OP_END:
2467        return;
2468    
2469        default:
2470        length = OP_lengths32[op] - 1;
2471        break;
2472    
2473        case OP_CLASS:
2474        case OP_NCLASS:
2475        /* Skip the character bit map. */
2476        ptr += 32/sizeof(pcre_uint32);
2477        length = 0;
2478        break;
2479    
2480        case OP_XCLASS:
2481        /* LINK_SIZE can only be 1 in 32-bit mode. */
2482        length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2483    
2484        /* Reverse the size of the XCLASS instance. */
2485        *ptr = swap_uint32(*ptr);
2486        ptr++;
2487    
2488        op = *ptr;
2489        *ptr = swap_uint32(op);
2490        ptr++;
2491        if ((op & XCL_MAP) != 0)
2492          {
2493          /* Skip the character bit map. */
2494          ptr += 32/sizeof(pcre_uint32);
2495          length -= 32/sizeof(pcre_uint32);
2496          }
2497        break;
2498        }
2499      }
2500    /* Control should never reach here in 32 bit mode. */
2501  }  }
2502    
2503    #endif /* SUPPORT_PCRE32 */
2504    
2505    
2506    
2507    static void
2508    regexflip(pcre *ere, pcre_extra *extra)
2509    {
2510    #if defined SUPPORT_PCRE32
2511      if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2512        regexflip_32(ere, extra);
2513    #endif
2514    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2515      if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2516        regexflip8_or_16(ere, extra);
2517    #endif
2518    }
2519    
2520    
2521    
# Line 665  return ((value & 0x000000ff) << 24) | Line 2524  return ((value & 0x000000ff) << 24) |
2524  *************************************************/  *************************************************/
2525    
2526  static int  static int
2527  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2528    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
2529    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
2530  {  {
# Line 680  for (;;) Line 2539  for (;;)
2539    {    {
2540    *limit = mid;    *limit = mid;
2541    
2542    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2543      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2544    
2545    if (count == errnumber)    if (count == errnumber)
# Line 725  Returns:    < 0, = 0, or > 0, according Line 2584  Returns:    < 0, = 0, or > 0, according
2584  */  */
2585    
2586  static int  static int
2587  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2588  {  {
2589  while (n--)  while (n--)
2590    {    {
# Line 741  return 0; Line 2600  return 0;
2600  *         Check newline indicator                *  *         Check newline indicator                *
2601  *************************************************/  *************************************************/
2602    
2603  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
2604  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
2605    
2606  Arguments:  Arguments:
2607    p           points after the leading '<'    p           points after the leading '<'
# Line 753  Returns:      appropriate PCRE_NEWLINE_x Line 2611  Returns:      appropriate PCRE_NEWLINE_x
2611  */  */
2612    
2613  static int  static int
2614  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2615  {  {
2616  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2617  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2618  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2619  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2620  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2621  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2622  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2623  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2624  return 0;  return 0;
2625  }  }
# Line 777  usage(void) Line 2635  usage(void)
2635  {  {
2636  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2637  printf("Input and output default to stdin and stdout.\n");  printf("Input and output default to stdin and stdout.\n");
2638  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2639  printf("If input is a terminal, readline() is used to read from it.\n");  printf("If input is a terminal, readline() is used to read from it.\n");
2640  #else  #else
2641  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2642  #endif  #endif
2643  printf("\nOptions:\n");  printf("\nOptions:\n");
2644  printf("  -b       show compiled code (bytecode)\n");  #ifdef SUPPORT_PCRE16
2645    printf("  -16      use the 16-bit library\n");
2646    #endif
2647    #ifdef SUPPORT_PCRE32
2648    printf("  -32      use the 32-bit library\n");
2649    #endif
2650    printf("  -b       show compiled code\n");
2651  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2652    printf("  -C arg   show a specific compile-time option\n");
2653    printf("           and exit with its value. The arg can be:\n");
2654    printf("     linksize     internal link size [2, 3, 4]\n");
2655    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2656    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2657    printf("     pcre32       32 bit library support enabled [0, 1]\n");
2658    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2659    printf("     ucp          Unicode Properties supported [0, 1]\n");
2660    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2661    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2662  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2663  #if !defined NODFA  #if !defined NODFA
2664  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
# Line 799  printf("  -p       use POSIX interface\n Line 2673  printf("  -p       use POSIX interface\n
2673  #endif  #endif
2674  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
2675  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2676  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
2677           "  -s+      force each pattern to be studied, using JIT if available\n"
2678           "  -s++     ditto, verifying when JIT was actually used\n"
2679           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2680           "             where 1 <= n <= 7 selects JIT options\n"
2681           "  -s++n    ditto, verifying when JIT was actually used\n"
2682         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2683  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2684  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 819  options, followed by a set of test data, Line 2698  options, followed by a set of test data,
2698  int main(int argc, char **argv)  int main(int argc, char **argv)
2699  {  {
2700  FILE *infile = stdin;  FILE *infile = stdin;
2701    const char *version;
2702  int options = 0;  int options = 0;
2703  int study_options = 0;  int study_options = 0;
2704  int default_find_match_limit = FALSE;  int default_find_match_limit = FALSE;
# Line 827  int timeit = 0; Line 2707  int timeit = 0;
2707  int timeitm = 0;  int timeitm = 0;
2708  int showinfo = 0;  int showinfo = 0;
2709  int showstore = 0;  int showstore = 0;
2710    int force_study = -1;
2711    int force_study_options = 0;
2712  int quiet = 0;  int quiet = 0;
2713  int size_offsets = 45;  int size_offsets = 45;
2714  int size_offsets_max;  int size_offsets_max;
2715  int *offsets = NULL;  int *offsets = NULL;
 #if !defined NOPOSIX  
 int posix = 0;  
 #endif  
2716  int debug = 0;  int debug = 0;
2717  int done = 0;  int done = 0;
2718  int all_use_dfa = 0;  int all_use_dfa = 0;
2719    int verify_jit = 0;
2720  int yield = 0;  int yield = 0;
2721  int stack_size;  int stack_size;
2722    
2723  /* These vectors store, end-to-end, a list of captured substring names. Assume  #if !defined NOPOSIX
2724  that 1024 is plenty long enough for the few names we'll be testing. */  int posix = 0;
2725    #endif
2726    #if !defined NODFA
2727    int *dfa_workspace = NULL;
2728    #endif
2729    
2730    pcre_jit_stack *jit_stack = NULL;
2731    
2732  uschar copynames[1024];  /* These vectors store, end-to-end, a list of zero-terminated captured
2733  uschar getnames[1024];  substring names, each list itself being terminated by an empty name. Assume
2734    that 1024 is plenty long enough for the few names we'll be testing. It is
2735    easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2736    for the actual memory, to ensure alignment. */
2737    
2738    pcre_uint32 copynames[1024];
2739    pcre_uint32 getnames[1024];
2740    
2741    #ifdef SUPPORT_PCRE32
2742    pcre_uint32 *cn32ptr;
2743    pcre_uint32 *gn32ptr;
2744    #endif
2745    
2746  uschar *copynamesptr;  #ifdef SUPPORT_PCRE16
2747  uschar *getnamesptr;  pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2748    pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2749    pcre_uint16 *cn16ptr;
2750    pcre_uint16 *gn16ptr;
2751    #endif
2752    
2753  /* Get buffers from malloc() so that Electric Fence will check their misuse  #ifdef SUPPORT_PCRE8
2754  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2755    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2756    pcre_uint8 *cn8ptr;
2757    pcre_uint8 *gn8ptr;
2758    #endif
2759    
2760  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2761  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-
2762  pbuffer = (unsigned char *)malloc(buffer_size);  and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2763    
2764    buffer = (pcre_uint8 *)malloc(buffer_size);
2765    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2766    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2767    
2768  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2769    
# Line 869  it set 0x8000, but then I was advised th Line 2778  it set 0x8000, but then I was advised th
2778  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2779  #endif  #endif
2780    
2781    /* Get the version number: both pcre_version() and pcre16_version() give the
2782    same answer. We just need to ensure that we call one that is available. */
2783    
2784    #if defined SUPPORT_PCRE8
2785    version = pcre_version();
2786    #elif defined SUPPORT_PCRE16
2787    version = pcre16_version();
2788    #elif defined SUPPORT_PCRE32
2789    version = pcre32_version();
2790    #endif
2791    
2792  /* Scan options */  /* Scan options */
2793    
2794  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2795    {    {
2796    unsigned char *endptr;    pcre_uint8 *endptr;
2797      char *arg = argv[op];
2798    
2799      if (strcmp(arg, "-m") == 0) showstore = 1;
2800      else if (strcmp(arg, "-s") == 0) force_study = 0;
2801    
2802    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    else if (strncmp(arg, "-s+", 3) == 0)
2803      showstore = 1;      {
2804    else if (strcmp(argv[op], "-q") == 0) quiet = 1;      arg += 3;
2805    else if (strcmp(argv[op], "-b") == 0) debug = 1;      if (*arg == '+') { arg++; verify_jit = TRUE; }
2806    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;      force_study = 1;
2807    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;      if (*arg == 0)
2808    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;        force_study_options = jit_study_bits[6];
2809        else if (*arg >= '1' && *arg <= '7')
2810          force_study_options = jit_study_bits[*arg - '1'];
2811        else goto BAD_ARG;
2812        }
2813      else if (strcmp(arg, "-16") == 0)
2814        {
2815    #ifdef SUPPORT_PCRE16
2816        pcre_mode = PCRE16_MODE;
2817    #else
2818        printf("** This version of PCRE was built without 16-bit support\n");
2819        exit(1);
2820    #endif
2821        }
2822      else if (strcmp(arg, "-32") == 0)
2823        {
2824    #ifdef SUPPORT_PCRE32
2825        pcre_mode = PCRE32_MODE;
2826    #else
2827        printf("** This version of PCRE was built without 32-bit support\n");
2828        exit(1);
2829    #endif
2830        }
2831      else if (strcmp(arg, "-q") == 0) quiet = 1;
2832      else if (strcmp(arg, "-b") == 0) debug = 1;
2833      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2834      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2835      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2836  #if !defined NODFA  #if !defined NODFA
2837    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2838  #endif  #endif
2839    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2840        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2841          *endptr == 0))          *endptr == 0))
2842      {      {
2843      op++;      op++;
2844      argc--;      argc--;
2845      }      }
2846    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2847      {      {
2848      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2849      int temp;      int temp;
2850      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2851                       *endptr == 0))                       *endptr == 0))
2852        {        {
2853        timeitm = temp;        timeitm = temp;
# Line 906  while (argc > 1 && argv[op][0] == '-') Line 2857  while (argc > 1 && argv[op][0] == '-')
2857      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2858      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2859      }      }
2860    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2861        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2862          *endptr == 0))          *endptr == 0))
2863      {      {
2864  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2865      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2866      exit(1);      exit(1);
2867  #else  #else
# Line 929  while (argc > 1 && argv[op][0] == '-') Line 2880  while (argc > 1 && argv[op][0] == '-')
2880  #endif  #endif
2881      }      }
2882  #if !defined NOPOSIX  #if !defined NOPOSIX
2883    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2884  #endif  #endif
2885    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2886      {      {
2887      int rc;      int rc;
2888      unsigned long int lrc;      unsigned long int lrc;
2889      printf("PCRE version %s\n", pcre_version());  
2890        if (argc > 2)
2891          {
2892          if (strcmp(argv[op + 1], "linksize") == 0)
2893            {
2894            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2895            printf("%d\n", rc);
2896            yield = rc;
2897            }
2898          else if (strcmp(argv[op + 1], "pcre8") == 0)
2899            {
2900    #ifdef SUPPORT_PCRE8
2901            printf("1\n");
2902            yield = 1;
2903    #else
2904            printf("0\n");
2905            yield = 0;
2906    #endif
2907            }
2908          else if (strcmp(argv[op + 1], "pcre16") == 0)
2909            {
2910    #ifdef SUPPORT_PCRE16
2911            printf("1\n");
2912            yield = 1;
2913    #else
2914            printf("0\n");
2915            yield = 0;
2916    #endif
2917            }
2918          else if (strcmp(argv[op + 1], "pcre32") == 0)
2919            {
2920    #ifdef SUPPORT_PCRE32
2921            printf("1\n");
2922            yield = 1;
2923    #else
2924            printf("0\n");
2925            yield = 0;
2926    #endif
2927            goto EXIT;
2928            }
2929          if (strcmp(argv[op + 1], "utf") == 0)
2930            {
2931    #ifdef SUPPORT_PCRE8
2932            if (pcre_mode == PCRE8_MODE)
2933              (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2934    #endif
2935    #ifdef SUPPORT_PCRE16
2936            if (pcre_mode == PCRE16_MODE)
2937              (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2938    #endif
2939    #ifdef SUPPORT_PCRE32
2940            if (pcre_mode == PCRE32_MODE)
2941              (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
2942    #endif
2943            printf("%d\n", rc);
2944            yield = rc;
2945            goto EXIT;
2946            }
2947          else if (strcmp(argv[op + 1], "ucp") == 0)
2948            {
2949            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2950            printf("%d\n", rc);
2951            yield = rc;
2952            }
2953          else if (strcmp(argv[op + 1], "jit") == 0)
2954            {
2955            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2956            printf("%d\n", rc);
2957            yield = rc;
2958            }
2959          else if (strcmp(argv[op + 1], "newline") == 0)
2960            {
2961            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2962            print_newline_config(rc, TRUE);
2963            }
2964          else if (strcmp(argv[op + 1], "ebcdic") == 0)
2965            {
2966    #ifdef EBCDIC
2967            printf("1\n");
2968            yield = 1;
2969    #else
2970            printf("0\n");
2971    #endif
2972            }
2973          else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2974            {
2975    #ifdef EBCDIC
2976            printf("0x%02x\n", CHAR_LF);
2977    #else
2978            printf("0\n");
2979    #endif
2980            }
2981          else
2982            {
2983            printf("Unknown -C option: %s\n", argv[op + 1]);
2984            }
2985          goto EXIT;
2986          }
2987    
2988        /* No argument for -C: output all configuration information. */
2989    
2990        printf("PCRE version %s\n", version);
2991      printf("Compiled with\n");      printf("Compiled with\n");
2992    
2993    #ifdef EBCDIC
2994        printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2995    #endif
2996    
2997    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2998    are set, either both UTFs are supported or both are not supported. */
2999    
3000    #ifdef SUPPORT_PCRE8
3001        printf("  8-bit support\n");
3002      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3003      printf("  %sUTF-8 support\n", rc? "" : "No ");        printf ("  %sUTF-8 support\n", rc ? "" : "No ");
3004      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #endif
3005    #ifdef SUPPORT_PCRE16
3006        printf("  16-bit support\n");
3007        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3008        printf ("  %sUTF-16 support\n", rc ? "" : "No ");
3009    #endif
3010    #ifdef SUPPORT_PCRE32
3011        printf("  32-bit support\n");
3012        (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3013        printf ("  %sUTF-32 support\n", rc ? "" : "No ");
3014    #endif
3015    
3016        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3017      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
3018      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3019      /* Note that these values are always the ASCII values, even      if (rc)
3020      in EBCDIC environments. CR is 13 and NL is 10. */        {
3021      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :        const char *arch;
3022        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :        (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3023        (rc == -2)? "ANYCRLF" :        printf("  Just-in-time compiler support: %s\n", arch);
3024        (rc == -1)? "ANY" : "???");        }
3025      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      else
3026          printf("  No just-in-time compiler support\n");
3027        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3028        print_newline_config(rc, FALSE);
3029        (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3030      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3031                                       "all Unicode newlines");                                       "all Unicode newlines");
3032      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3033      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
3034      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3035      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
3036      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3037      printf("  Default match limit = %ld\n", lrc);      printf("  Default match limit = %ld\n", lrc);
3038      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3039      printf("  Default recursion depth limit = %ld\n", lrc);      printf("  Default recursion depth limit = %ld\n", lrc);
3040      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3041      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
3042        if (showstore)
3043          {
3044          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3045          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3046          }
3047        printf("\n");
3048      goto EXIT;      goto EXIT;
3049      }      }
3050    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
3051             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
3052      {      {
3053      usage();      usage();
3054      goto EXIT;      goto EXIT;
3055      }      }
3056    else    else
3057      {      {
3058      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
3059        printf("** Unknown or malformed option %s\n", arg);
3060      usage();      usage();
3061      yield = 1;      yield = 1;
3062      goto EXIT;      goto EXIT;
# Line 1018  if (argc > 2) Line 3103  if (argc > 2)
3103    
3104  /* Set alternative malloc function */  /* Set alternative malloc function */
3105    
3106    #ifdef SUPPORT_PCRE8
3107  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
3108  pcre_free = new_free;  pcre_free = new_free;
3109  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
3110  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
3111    #endif
3112    
3113    #ifdef SUPPORT_PCRE16
3114    pcre16_malloc = new_malloc;
3115    pcre16_free = new_free;
3116    pcre16_stack_malloc = stack_malloc;
3117    pcre16_stack_free = stack_free;
3118    #endif
3119    
3120    #ifdef SUPPORT_PCRE32
3121    pcre32_malloc = new_malloc;
3122    pcre32_free = new_free;
3123    pcre32_stack_malloc = stack_malloc;
3124    pcre32_stack_free = stack_free;
3125    #endif
3126    
3127  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
3128    
3129  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3130    
3131  /* Main loop */  /* Main loop */
3132    
# Line 1040  while (!done) Line 3141  while (!done)
3141  #endif  #endif
3142    
3143    const char *error;    const char *error;
3144    unsigned char *markptr;    pcre_uint8 *markptr;
3145    unsigned char *p, *pp, *ppp;    pcre_uint8 *p, *pp, *ppp;
3146    unsigned char *to_file = NULL;    pcre_uint8 *to_file = NULL;
3147    const unsigned char *tables = NULL;    const pcre_uint8 *tables = NULL;
3148      unsigned long int get_options;
3149    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
3150    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
3151    int do_mark = 0;    int do_allcaps = 0;
3152      int do_mark = 0;
3153    int do_study = 0;    int do_study = 0;
3154      int no_force_study = 0;
3155    int do_debug = debug;    int do_debug = debug;
3156    int do_G = 0;    int do_G = 0;
3157    int do_g = 0;    int do_g = 0;
3158    int do_showinfo = showinfo;    int do_showinfo = showinfo;
3159    int do_showrest = 0;    int do_showrest = 0;
3160      int do_showcaprest = 0;
3161    int do_flip = 0;    int do_flip = 0;
3162    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
3163    
3164    use_utf8 = 0;  #if !defined NODFA
3165      int dfa_matched = 0;
3166    #endif
3167    
3168      use_utf = 0;
3169    debug_lengths = 1;    debug_lengths = 1;
3170    
3171    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1071  while (!done) Line 3180  while (!done)
3180    
3181    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3182      {      {
3183      unsigned long int magic, get_options;      pcre_uint32 magic;
3184      uschar sbuf[8];      pcre_uint8 sbuf[8];
3185      FILE *f;      FILE *f;
3186    
3187      p++;      p++;
3188        if (*p == '!')
3189          {
3190          do_debug = TRUE;
3191          do_showinfo = TRUE;
3192          p++;
3193          }
3194    
3195      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
3196      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
3197      *pp = 0;      *pp = 0;
# Line 1087  while (!done) Line 3203  while (!done)
3203        continue;        continue;
3204        }        }
3205    
3206        first_gotten_store = 0;
3207      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3208    
3209      true_size =      true_size =
# Line 1094  while (!done) Line 3211  while (!done)
3211      true_study_size =      true_study_size =
3212        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3213    
3214      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
3215      regex_gotten_store = gotten_store;      if (re == NULL)
3216          {
3217          printf("** Failed to get %d bytes of memory for pcre object\n",
3218            (int)true_size);
3219          yield = 1;
3220          goto EXIT;
3221          }
3222        regex_gotten_store = first_gotten_store;
3223    
3224      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3225    
3226      magic = ((real_pcre *)re)->magic_number;      magic = REAL_PCRE_MAGIC(re);
3227      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
3228        {        {
3229        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
3230          {          {
3231          do_flip = 1;          do_flip = 1;
3232          }          }
3233        else        else
3234          {          {
3235          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3236            new_free(re);
3237          fclose(f);          fclose(f);
3238          continue;          continue;
3239          }          }
3240        }        }
3241    
3242      fprintf(outfile, "Compiled regex%s loaded from %s\n",      /* We hide the byte-invert info for little and big endian tests. */
3243        do_flip? " (byte-inverted)" : "", p);      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3244          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
     /* Need to know if UTF-8 for printing data strings */  
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
3245    
3246      /* Now see if there is any following study data */      /* Now see if there is any following study data. */
3247    
3248      if (true_study_size != 0)      if (true_study_size != 0)
3249        {        {
# Line 1138  while (!done) Line 3259  while (!done)
3259          {          {
3260          FAIL_READ:          FAIL_READ:
3261          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
3262          if (extra != NULL) new_free(extra);          if (extra != NULL)
3263          if (re != NULL) new_free(re);            {
3264              PCRE_FREE_STUDY(extra);
3265              }
3266            new_free(re);
3267          fclose(f);          fclose(f);
3268          continue;          continue;
3269          }          }
# Line 1148  while (!done) Line 3272  while (!done)
3272        }        }
3273      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
3274    
3275        /* Flip the necessary bytes. */
3276        if (do_flip)
3277          {
3278          int rc;
3279          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3280          if (rc == PCRE_ERROR_BADMODE)
3281            {
3282            /* Simulate the result of the function call below. */
3283            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3284              pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3285              PCRE_INFO_OPTIONS);
3286            fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3287              "%d-bit mode\n", 8 * CHAR_SIZE,
3288              8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3289            new_free(re);
3290            fclose(f);
3291            continue;
3292            }
3293          }
3294    
3295        /* Need to know if UTF-8 for printing data strings. */
3296    
3297        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3298          {
3299          new_free(re);
3300          fclose(f);
3301          continue;
3302          }
3303        use_utf = (get_options & PCRE_UTF8) != 0;
3304    
3305      fclose(f);      fclose(f);
3306      goto SHOW_INFO;      goto SHOW_INFO;
3307      }      }
3308    
3309    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
3310    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
3311    
3312    delimiter = *p++;    delimiter = *p++;
3313    
# Line 1164  while (!done) Line 3318  while (!done)
3318      }      }
3319    
3320    pp = p;    pp = p;
3321    poffset = p - buffer;    poffset = (int)(p - buffer);
3322    
3323    for(;;)    for(;;)
3324      {      {
# Line 1204  while (!done) Line 3358  while (!done)
3358    /* Look for options after final delimiter */    /* Look for options after final delimiter */
3359    
3360    options = 0;    options = 0;
3361    study_options = 0;    study_options = force_study_options;
3362    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
3363    
3364    while (*pp != 0)    while (*pp != 0)
# Line 1218  while (!done) Line 3372  while (!done)
3372        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
3373        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
3374    
3375        case '+': do_showrest = 1; break;        case '+':
3376          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3377          break;
3378    
3379          case '=': do_allcaps = 1; break;
3380        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
3381        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
3382        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1228  while (!done) Line 3386  while (!done)
3386        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
3387        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
3388        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
3389        case 'K': do_mark = 1; break;        case 'K': do_mark = 1; break;
3390        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
3391        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3392    
# Line 1236  while (!done) Line 3394  while (!done)
3394        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
3395  #endif  #endif
3396    
3397        case 'S': do_study = 1; break;        case 'S':
3398          do_study = 1;
3399          for (;;)
3400            {
3401            switch (*pp++)
3402              {
3403              case 'S':
3404              do_study = 0;
3405              no_force_study = 1;
3406              break;
3407    
3408              case '!':
3409              study_options |= PCRE_STUDY_EXTRA_NEEDED;
3410              break;
3411    
3412              case '+':
3413              if (*pp == '+')
3414                {
3415                verify_jit = TRUE;
3416                pp++;
3417                }
3418              if (*pp >= '1' && *pp <= '7')
3419                study_options |= jit_study_bits[*pp++ - '1'];
3420              else
3421                study_options |= jit_study_bits[6];
3422              break;
3423    
3424              case '-':
3425              study_options &= ~PCRE_STUDY_ALLJIT;
3426              break;
3427    
3428              default:
3429              pp--;
3430              goto ENDLOOP;
3431              }
3432            }
3433          ENDLOOP:
3434          break;
3435    
3436        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
3437          case 'W': options |= PCRE_UCP; break;
3438        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
3439          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3440        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
3441        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
3442        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
3443    
3444          case 'T':
3445          switch (*pp++)
3446            {
3447            case '0': tables = tables0; break;
3448            case '1': tables = tables1; break;
3449    
3450            case '\r':
3451            case '\n':
3452            case ' ':
3453            case 0:
3454            fprintf(outfile, "** Missing table number after /T\n");
3455            goto SKIP_DATA;
3456    
3457            default:
3458            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3459            goto SKIP_DATA;
3460            }
3461          break;
3462    
3463        case 'L':        case 'L':
3464        ppp = pp;        ppp = pp;
3465        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1255  while (!done) Line 3472  while (!done)
3472          goto SKIP_DATA;          goto SKIP_DATA;
3473          }          }
3474        locale_set = 1;        locale_set = 1;
3475        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
3476        pp = ppp;        pp = ppp;
3477        break;        break;
3478    
# Line 1268  while (!done) Line 3485  while (!done)
3485    
3486        case '<':        case '<':
3487          {          {
3488          if (strncmp((char *)pp, "JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3489            {            {
3490            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
3491            pp += 3;            pp += 3;
# Line 1296  while (!done) Line 3513  while (!done)
3513    
3514    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
3515    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
3516    local character tables. */    local character tables. Neither does it have 16-bit support. */
3517    
3518  #if !defined NOPOSIX  #if !defined NOPOSIX
3519    if (posix || do_posix)    if (posix || do_posix)
# Line 1309  while (!done) Line 3526  while (!done)
3526      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3527      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3528      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3529        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3530      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3531    
3532        first_gotten_store = 0;
3533      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
3534    
3535      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1324  while (!done) Line 3543  while (!done)
3543        }        }
3544      }      }
3545    
3546    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
3547    
3548      else
3549    #endif  /* !defined NOPOSIX */
3550    
3551        {
3552        /* In 16- or 32-bit mode, convert the input. */
3553    
3554    #ifdef SUPPORT_PCRE16
3555        if (pcre_mode == PCRE16_MODE)
3556          {
3557          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3558            {
3559            case -1:
3560            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3561              "converted to UTF-16\n");
3562            goto SKIP_DATA;
3563    
3564            case -2:
3565            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3566              "cannot be converted to UTF-16\n");
3567            goto SKIP_DATA;
3568    
3569            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3570            fprintf(outfile, "**Failed: character value greater than 0xffff "
3571              "cannot be converted to 16-bit in non-UTF mode\n");
3572            goto SKIP_DATA;
3573    
3574            default:
3575            break;
3576            }
3577          p = (pcre_uint8 *)buffer16;
3578          }
3579    #endif
3580    
3581    #ifdef SUPPORT_PCRE32
3582        if (pcre_mode == PCRE32_MODE)
3583          {
3584          switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3585            {
3586            case -1:
3587            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3588              "converted to UTF-32\n");
3589            goto SKIP_DATA;
3590    
3591            case -2:
3592            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3593              "cannot be converted to UTF-32\n");
3594            goto SKIP_DATA;
3595    
3596    else          case -3:
3597  #endif  /* !defined NOPOSIX */          fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3598            goto SKIP_DATA;
3599    
3600      {          default:
3601      unsigned long int get_options;          break;
3602            }
3603          p = (pcre_uint8 *)buffer32;
3604          }
3605    #endif
3606    
3607        /* Compile many times when timing */
3608    
3609      if (timeit > 0)      if (timeit > 0)
3610        {        {
# Line 1339  while (!done) Line 3613  while (!done)
3613        clock_t start_time = clock();        clock_t start_time = clock();
3614        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
3615          {          {
3616          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3617          if (re != NULL) free(re);          if (re != NULL) free(re);
3618          }          }
3619        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1348  while (!done) Line 3622  while (!done)
3622            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
3623        }        }
3624    
3625      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
3626        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3627    
3628      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
3629      if non-interactive. */      if non-interactive. */
# Line 1379  while (!done) Line 3654  while (!done)
3654      within the regex; check for this so that we know how to process the data      within the regex; check for this so that we know how to process the data
3655      lines. */      lines. */
3656    
3657      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3658      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;        goto SKIP_DATA;
3659        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
     /* Print information if required. There are now two info-returning  
     functions. The old one has a limited interface and returns only limited  
     data. Check that it agrees with the newer one. */  
   
     if (log_store)  
       fprintf(outfile, "Memory allocation (code space): %d\n",  
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
3660    
3661      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3662      and remember the store that was got. */      and remember the store that was got. */
3663    
3664      true_size = ((real_pcre *)re)->size;      true_size = REAL_PCRE_SIZE(re);
3665      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3666    
3667        /* Output code size information if requested */
3668    
3669        if (log_store)
3670          {
3671          int name_count, name_entry_size, real_pcre_size;
3672    
3673          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3674          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3675    #ifdef SUPPORT_PCRE8
3676          if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3677            real_pcre_size = sizeof(real_pcre);
3678    #endif
3679    #ifdef SUPPORT_PCRE16
3680          if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3681            real_pcre_size = sizeof(real_pcre16);
3682    #endif
3683    #ifdef SUPPORT_PCRE32
3684          if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3685            real_pcre_size = sizeof(real_pcre32);
3686    #endif
3687          fprintf(outfile, "Memory allocation (code space): %d\n",
3688            (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3689          }
3690    
3691      /* If /S was present, study the regexp to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
3692      help with the matching. */      help with the matching, unless the pattern has the SS option, which
3693        suppresses the effect of /S (used for a few test patterns where studying is
3694        never sensible). */
3695    
3696      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
3697        {        {
3698        if (timeit > 0)        if (timeit > 0)
3699          {          {
# Line 1409  while (!done) Line 3701  while (!done)
3701          clock_t time_taken;          clock_t time_taken;
3702          clock_t start_time = clock();          clock_t start_time = clock();
3703          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3704            extra = pcre_study(re, study_options, &error);            {
3705              PCRE_STUDY(extra, re, study_options, &error);
3706              }
3707          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3708          if (extra != NULL) free(extra);          if (extra != NULL)
3709              {
3710              PCRE_FREE_STUDY(extra);
3711              }
3712          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3713            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3714              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3715          }          }
3716        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options, &error);
3717        if (error != NULL)        if (error != NULL)
3718          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3719        else if (extra != NULL)        else if (extra != NULL)
3720            {
3721          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3722            if (log_store)
3723              {
3724              size_t jitsize;
3725              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3726                  jitsize != 0)
3727                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3728              }
3729            }
3730        }        }
3731    
3732      /* If /K was present, we set up for handling MARK data. */      /* If /K was present, we set up for handling MARK data. */
3733    
3734      if (do_mark)      if (do_mark)
3735        {        {
3736        if (extra == NULL)        if (extra == NULL)
# Line 1432  while (!done) Line 3738  while (!done)
3738          extra = (pcre_extra *)malloc(sizeof(pcre_extra));          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3739          extra->flags = 0;          extra->flags = 0;
3740          }          }
3741        extra->mark = &markptr;        extra->mark = &markptr;
3742        extra->flags |= PCRE_EXTRA_MARK;        extra->flags |= PCRE_EXTRA_MARK;
       }  
   
     /* If the 'F' option was present, we flip the bytes of all the integer  
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
   
     if (do_flip)  
       {  
       real_pcre *rre = (real_pcre *)re;  
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));  
         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));  
         }  
3743        }        }
3744    
3745      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
3746    
3747      SHOW_INFO:      SHOW_INFO:
3748    
3749      if (do_debug)      if (do_debug)
3750        {        {
3751        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3752        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3753        }        }
3754    
3755      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1488  while (!done) Line 3757  while (!done)
3757      if (do_showinfo)      if (do_showinfo)
3758        {        {
3759        unsigned long int all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3760        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3761          hascrorlf;          hascrorlf, maxlookbehind;
3762        int nameentrysize, namecount;        int nameentrysize, namecount;
3763        const uschar *nametable;        const pcre_uint8 *nametable;
3764    
3765        new_info(re, NULL, PCRE_INFO_SIZE, &size);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3766        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3767        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3768        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3769        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3770        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3771        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3772        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3773        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3774        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3775        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3776              new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3777  #if !defined NOINFOCHECK            != 0)
3778        old_count = pcre_info(re, &old_options, &old_first_char);          goto SKIP_DATA;
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3779    
3780        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3781          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1541  while (!done) Line 3790  while (!done)
3790          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3791          while (namecount-- > 0)          while (namecount-- > 0)
3792            {            {
3793            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,            int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
3794              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int length = (int)STRLEN(nametable + imm2_size);
3795              GET2(nametable, 0));            fprintf(outfile, "  ");
3796            nametable += nameentrysize;            PCHARSV(nametable, imm2_size, length, outfile);
3797              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3798    #ifdef SUPPORT_PCRE32
3799              if (pcre_mode == PCRE32_MODE)
3800                fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
3801    #endif
3802    #ifdef SUPPORT_PCRE16
3803              if (pcre_mode == PCRE16_MODE)
3804                fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
3805    #endif
3806    #ifdef SUPPORT_PCRE8
3807              if (pcre_mode == PCRE8_MODE)
3808                fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
3809    #endif
3810              nametable += nameentrysize * CHAR_SIZE;
3811            }            }
3812          }          }
3813    
3814        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3815        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3816    
3817        all_options = ((real_pcre *)re)->options;        all_options = REAL_PCRE_OPTIONS(re);
3818        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3819    
3820        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3821          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3822            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3823            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3824            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1568  while (!done) Line 3831  while (!done)
3831            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3832            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3833            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3834            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3835            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3836              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3837              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3838            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3839    
3840        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1610  while (!done) Line 3875  while (!done)
3875          }          }
3876        else        else
3877          {          {
3878          int ch = first_char & 255;          const char *caseless =
3879          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
3880            "" : " (caseless)";            "" : " (caseless)";
3881          if (PRINTHEX(ch))  
3882            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3883              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3884          else          else
3885            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3886              fprintf(outfile, "First char = ");
3887              pchar(first_char, outfile);
3888              fprintf(outfile, "%s\n", caseless);
3889              }
3890          }          }
3891    
3892        if (need_char < 0)        if (need_char < 0)
# Line 1625  while (!done) Line 3895  while (!done)
3895          }          }
3896        else        else
3897          {          {
3898          int ch = need_char & 255;          const char *caseless =
3899          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
3900            "" : " (caseless)";            "" : " (caseless)";
3901          if (PRINTHEX(ch))  
3902            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3903              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3904          else          else
3905            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3906              fprintf(outfile, "Need char = ");
3907              pchar(need_char, outfile);
3908              fprintf(outfile, "%s\n", caseless);
3909              }
3910          }          }
3911    
3912          if (maxlookbehind > 0)
3913            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3914    
3915        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3916        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3917        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
3918        flipped.) */        flipped.) If study was forced by an external -s, don't show this
3919          information unless -i or -d was also present. This means that, except
3920          when auto-callouts are involved, the output from runs with and without
3921          -s should be identical. */
3922    
3923        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3924          {          {
3925          if (extra == NULL)          if (extra == NULL)
3926            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3927          else          else
3928            {            {
3929            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3930            int minlength;            int minlength;
3931    
3932            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3933            fprintf(outfile, "Subject length lower bound = %d\n", minlength);              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3934    
3935            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
           if (start_bits == NULL)  
             fprintf(outfile, "No set of starting bytes\n");  
           else  
3936              {              {
3937              int i;              if (start_bits == NULL)
3938              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3939              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3940                {                {
3941                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3942                  int c = 24;
3943                  fprintf(outfile, "Starting byte set: ");
3944                  for (i = 0; i < 256; i++)
3945                  {                  {
3946                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
                   {  
                   fprintf(outfile, "%c ", i);  
                   c += 2;  
                   }  
                 else  
3947                    {                    {
3948                    fprintf(outfile, "\\x%02x ", i);                    if (c > 75)
3949                    c += 5;                      {
3950                        fprintf(outfile, "\n  ");
3951                        c = 2;
3952                        }
3953                      if (PRINTOK(i) && i != ' ')
3954                        {
3955                        fprintf(outfile, "%c ", i);
3956                        c += 2;
3957                        }
3958                      else
3959                        {
3960                        fprintf(outfile, "\\x%02x ", i);
3961                        c += 5;
3962                        }
3963                    }                    }
3964                  }                  }
3965                  fprintf(outfile, "\n");
3966                }                }
3967              fprintf(outfile, "\n");              }
3968              }
3969    
3970            /* Show this only if the JIT was set by /S, not by -s. */
3971    
3972            if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3973                (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3974              {
3975              int jit;
3976              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3977                {
3978                if (jit)
3979                  fprintf(outfile, "JIT study was successful\n");
3980                else
3981    #ifdef SUPPORT_JIT
3982                  fprintf(outfile, "JIT study was not successful\n");
3983    #else
3984                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3985    #endif
3986              }              }
3987            }            }
3988          }          }
# Line 1699  while (!done) Line 4001  while (!done)
4001          }          }
4002        else        else
4003          {          {
4004          uschar sbuf[8];          pcre_uint8 sbuf[8];
4005          sbuf[0] = (uschar)((true_size >> 24) & 255);  
4006          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
4007          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4008          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4009            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
4010          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
4011          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4012          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4013          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
4014            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4015    
4016          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
4017              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1717  while (!done) Line 4020  while (!done)
4020            }            }
4021          else          else
4022            {            {
4023            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4024    
4025              /* If there is study data, write it. */
4026    
4027            if (extra != NULL)            if (extra != NULL)
4028              {              {
4029              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1727  while (!done) Line 4033  while (!done)
4033                  strerror(errno));                  strerror(errno));
4034                }                }
4035              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
4036              }              }
4037            }            }
4038          fclose(f);          fclose(f);
4039          }          }
4040    
4041        new_free(re);        new_free(re);
4042        if (extra != NULL) new_free(extra);        if (extra != NULL)
4043        if (tables != NULL) new_free((void *)tables);          {
4044            PCRE_FREE_STUDY(extra);
4045            }
4046          if (locale_set)
4047            {
4048            new_free((void *)tables);
4049            setlocale(LC_CTYPE, "C");
4050            locale_set = 0;
4051            }
4052        continue;  /* With next regex */        continue;  /* With next regex */
4053        }        }
4054      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1744  while (!done) Line 4057  while (!done)
4057    
4058    for (;;)    for (;;)
4059      {      {
4060      uschar *q;      pcre_uint8 *q;
4061      uschar *bptr;      pcre_uint8 *bptr;
4062      int *use_offsets = offsets;      int *use_offsets = offsets;
4063      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
4064      int callout_data = 0;      int callout_data = 0;
# Line 1757  while (!done) Line 4070  while (!done)
4070      int getlist = 0;      int getlist = 0;
4071      int gmatched = 0;      int gmatched = 0;
4072      int start_offset = 0;      int start_offset = 0;
4073        int start_offset_sign = 1;
4074      int g_notempty = 0;      int g_notempty = 0;
4075      int use_dfa = 0;      int use_dfa = 0;
4076    
     options = 0;  
   
4077      *copynames = 0;      *copynames = 0;
4078      *getnames = 0;      *getnames = 0;
4079    
4080      copynamesptr = copynames;  #ifdef SUPPORT_PCRE32
4081      getnamesptr = getnames;      cn32ptr = copynames;
4082        gn32ptr = getnames;
4083    #endif
4084    #ifdef SUPPORT_PCRE16
4085        cn16ptr = copynames16;
4086        gn16ptr = getnames16;
4087    #endif
4088    #ifdef SUPPORT_PCRE8
4089        cn8ptr = copynames8;
4090        gn8ptr = getnames8;
4091    #endif
4092    
4093      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
4094      first_callout = 1;      first_callout = 1;
4095        last_callout_mark = NULL;
4096      callout_extra = 0;      callout_extra = 0;
4097      callout_count = 0;      callout_count = 0;
4098      callout_fail_count = 999999;      callout_fail_count = 999999;
4099      callout_fail_id = -1;      callout_fail_id = -1;
4100      show_malloc = 0;      show_malloc = 0;
4101        options = 0;
4102    
4103      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
4104        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 1784  while (!done) Line 4108  while (!done)
4108        {        {
4109        if (extend_inputline(infile, buffer + len, "data> ") == NULL)        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4110          {          {
4111          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
4112              {
4113              fprintf(outfile, "\n");
4114              break;
4115              }
4116          done = 1;          done = 1;
4117          goto CONTINUE;          goto CONTINUE;
4118          }          }
# Line 1806  while (!done) Line 4134  while (!done)
4134        int i = 0;        int i = 0;
4135        int n = 0;        int n = 0;
4136    
4137        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4138          In non-UTF mode, allow the value of the byte to fall through to later,
4139          where values greater than 127 are turned into UTF-8 when running in
4140          16-bit mode. */
4141    
4142          if (c != '\\')
4143            {
4144            if (use_utf)
4145              {
4146              *q++ = c;
4147              continue;
4148              }
4149            }
4150    
4151          /* Handle backslash escapes */
4152    
4153          else switch ((c = *p++))
4154          {          {
4155          case 'a': c =    7; break;          case 'a': c =    7; break;
4156          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 1822  while (!done) Line 4166  while (!done)
4166          c -= '0';          c -= '0';
4167          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4168            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
4169          break;          break;
4170    
4171          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
4172          if (*p == '{')          if (*p == '{')
4173            {            {
4174            unsigned char *pt = p;            pcre_uint8 *pt = p;
4175            c = 0;            c = 0;
4176            while (isxdigit(*(++pt)))  
4177              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4178              when isxdigit() is a macro that refers to its argument more than
4179              once. This is banned by the C Standard, but apparently happens in at
4180              least one MacOS environment. */
4181    
4182              for (pt++; isxdigit(*pt); pt++)
4183                {
4184                if (++i == 9)
4185                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4186                                   "using only the first eight.\n");
4187                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4188                }
4189            if (*pt == '}')            if (*pt == '}')
4190              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             if (use_utf8)  
               {  
               utn = ord2utf8(c, buff8);  
               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
               c = buff8[ii];   /* Last byte */  
               }  
             else  
              {  
              if (c > 255)  
                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "  
                  "UTF-8 mode is not enabled.\n"  
                  "** Truncation will probably give the wrong result.\n", c);