/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 835 by ph10, Wed Dec 28 16:10:09 2011 UTC revision 1087 by chpe, Tue Oct 16 15:55:38 2012 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
50  #include "config.h"  #include "config.h"
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 58  POSSIBILITY OF SUCH DAMAGE.
58  #include <locale.h>  #include <locale.h>
59  #include <errno.h>  #include <errno.h>
60    
61  #ifdef SUPPORT_LIBREADLINE  /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67  #ifdef HAVE_UNISTD_H  #ifdef HAVE_UNISTD_H
68  #include <unistd.h>  #include <unistd.h>
69  #endif  #endif
70    #if defined(SUPPORT_LIBREADLINE)
71  #include <readline/readline.h>  #include <readline/readline.h>
72  #include <readline/history.h>  #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80  #endif  #endif
   
81    
82  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
83  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 90  input mode under Windows. */ Line 112  input mode under Windows. */
112  #else  #else
113  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
114  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
115    #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
116    #define INPUT_MODE   "r"
117    #define OUTPUT_MODE  "w"
118    #else
119  #define INPUT_MODE   "rb"  #define INPUT_MODE   "rb"
120  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
121  #endif  #endif
122    #endif
123    
124    #define PRIV(name) name
125    
126  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
127  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 105  here before pcre_internal.h so that the Line 133  here before pcre_internal.h so that the
133  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
134    
135  #include "pcre.h"  #include "pcre.h"
136    
137    #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138    /* Configure internal macros to 32 bit mode. */
139    #define COMPILE_PCRE32
140    #endif
141    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142    /* Configure internal macros to 16 bit mode. */
143    #define COMPILE_PCRE16
144    #endif
145    #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146    /* Configure internal macros to 16 bit mode. */
147    #define COMPILE_PCRE8
148    #endif
149    
150  #include "pcre_internal.h"  #include "pcre_internal.h"
151    
152    /* The pcre_printint() function, which prints the internal form of a compiled
153    regex, is held in a separate file so that (a) it can be compiled in either
154    8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
155    when that is compiled in debug mode. */
156    
157    #ifdef SUPPORT_PCRE8
158    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159    #endif
160    #ifdef SUPPORT_PCRE16
161    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162    #endif
163    #ifdef SUPPORT_PCRE32
164    void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165    #endif
166    
167  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
168  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source files here, changing the names of the
169  external symbols to prevent clashes. */  external symbols to prevent clashes. */
170    
171  #define _pcre_ucp_gentype      ucp_gentype  #define PCRE_INCLUDED
 #define _pcre_ucp_typerange    ucp_typerange  
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utf8_char_sizes  utf8_char_sizes  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
172    
173  #include "pcre_tables.c"  #include "pcre_tables.c"
174    #include "pcre_ucd.c"
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
175    
176  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
177  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
178  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
179  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
180  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
181    
182    #ifdef EBCDIC
183    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
184    #else
185    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
186    #endif
187    
188    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189    
190  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  /* Posix support is disabled in 16 or 32 bit only mode. */
191    #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192    #define NOPOSIX
193    #endif
194    
195  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
196  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 150  Makefile. */ Line 200  Makefile. */
200  #include "pcreposix.h"  #include "pcreposix.h"
201  #endif  #endif
202    
203  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
204  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
205  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
206  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
207  UTF8 support if PCRE is built without it. */  
208    #ifndef SUPPORT_UTF
209  #ifndef SUPPORT_UTF8  #ifndef NOUTF
210  #ifndef NOUTF8  #define NOUTF
211  #define NOUTF8  #endif
212  #endif  #endif
213    
214    /* To make the code a bit tidier for 8/16/32-bit support, we define macros
215    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216    only from one place and is handled differently). I couldn't dream up any way of
217    using a single macro to do this in a generic way, because of the many different
218    argument requirements. We know that at least one of SUPPORT_PCRE8 and
219    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
220    use these in the definitions of generic macros.
221    
222    **** Special note about the PCHARSxxx macros: the address of the string to be
223    printed is always given as two arguments: a base address followed by an offset.
224    The base address is cast to the correct data size for 8 or 16 bit data; the
225    offset is in units of this size. If the string were given as base+offset in one
226    argument, the casting might be incorrectly applied. */
227    
228    #ifdef SUPPORT_PCRE8
229    
230    #define PCHARS8(lv, p, offset, len, f) \
231      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
232    
233    #define PCHARSV8(p, offset, len, f) \
234      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235    
236    #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237      p = read_capture_name8(p, cn8, re)
238    
239    #define STRLEN8(p) ((int)strlen((char *)p))
240    
241    #define SET_PCRE_CALLOUT8(callout) \
242      pcre_callout = callout
243    
244    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
245       pcre_assign_jit_stack(extra, callback, userdata)
246    
247    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
248      re = pcre_compile((char *)pat, options, error, erroffset, tables)
249    
250    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251        namesptr, cbuffer, size) \
252      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
253        (char *)namesptr, cbuffer, size)
254    
255    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
256      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
257    
258    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259        offsets, size_offsets, workspace, size_workspace) \
260      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
261        offsets, size_offsets, workspace, size_workspace)
262    
263    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
264        offsets, size_offsets) \
265      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
266        offsets, size_offsets)
267    
268    #define PCRE_FREE_STUDY8(extra) \
269      pcre_free_study(extra)
270    
271    #define PCRE_FREE_SUBSTRING8(substring) \
272      pcre_free_substring(substring)
273    
274    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
275      pcre_free_substring_list(listptr)
276    
277    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
278        getnamesptr, subsptr) \
279      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
280        (char *)getnamesptr, subsptr)
281    
282    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
283      n = pcre_get_stringnumber(re, (char *)ptr)
284    
285    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
286      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
287    
288    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
289      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
290    
291    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
292      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
293    
294    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
295      pcre_printint(re, outfile, debug_lengths)
296    
297    #define PCRE_STUDY8(extra, re, options, error) \
298      extra = pcre_study(re, options, error)
299    
300    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
301      pcre_jit_stack_alloc(startsize, maxsize)
302    
303    #define PCRE_JIT_STACK_FREE8(stack) \
304      pcre_jit_stack_free(stack)
305    
306    #endif /* SUPPORT_PCRE8 */
307    
308    /* -----------------------------------------------------------*/
309    
310    #ifdef SUPPORT_PCRE16
311    
312    #define PCHARS16(lv, p, offset, len, f) \
313      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
314    
315    #define PCHARSV16(p, offset, len, f) \
316      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
317    
318    #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319      p = read_capture_name16(p, cn16, re)
320    
321    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
322    
323    #define SET_PCRE_CALLOUT16(callout) \
324      pcre16_callout = (int (*)(pcre16_callout_block *))callout
325    
326    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327      pcre16_assign_jit_stack((pcre16_extra *)extra, \
328        (pcre16_jit_callback)callback, userdata)
329    
330    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332        tables)
333    
334    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335        namesptr, cbuffer, size) \
336      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338    
339    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341        (PCRE_UCHAR16 *)cbuffer, size/2)
342    
343    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344        offsets, size_offsets, workspace, size_workspace) \
345      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347        workspace, size_workspace)
348    
349    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350        offsets, size_offsets) \
351      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352        len, start_offset, options, offsets, size_offsets)
353    
354    #define PCRE_FREE_STUDY16(extra) \
355      pcre16_free_study((pcre16_extra *)extra)
356    
357    #define PCRE_FREE_SUBSTRING16(substring) \
358      pcre16_free_substring((PCRE_SPTR16)substring)
359    
360    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362    
363    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364        getnamesptr, subsptr) \
365      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367    
368    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370    
371    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373        (PCRE_SPTR16 *)(void*)subsptr)
374    
375    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377        (PCRE_SPTR16 **)(void*)listptr)
378    
379    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381        tables)
382    
383    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384      pcre16_printint(re, outfile, debug_lengths)
385    
386    #define PCRE_STUDY16(extra, re, options, error) \
387      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388    
389    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391    
392    #define PCRE_JIT_STACK_FREE16(stack) \
393      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394    
395    #endif /* SUPPORT_PCRE16 */
396    
397    /* -----------------------------------------------------------*/
398    
399    #ifdef SUPPORT_PCRE32
400    
401    #define PCHARS32(lv, p, offset, len, f) \
402      lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)
403    
404    #define PCHARSV32(p, offset, len, f) \
405      (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)
406    
407    #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408      p = read_capture_name32(p, cn32, re)
409    
410    #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411    
412    #define SET_PCRE_CALLOUT32(callout) \
413      pcre32_callout = (int (*)(pcre32_callout_block *))callout
414    
415    #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416      pcre32_assign_jit_stack((pcre32_extra *)extra, \
417        (pcre32_jit_callback)callback, userdata)
418    
419    #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420      re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421        tables)
422    
423    #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424        namesptr, cbuffer, size) \
425      rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426        count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427    
428    #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429      rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430        (PCRE_UCHAR32 *)cbuffer, size/2)
431    
432    #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433        offsets, size_offsets, workspace, size_workspace) \
434      count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435        (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436        workspace, size_workspace)
437    
438    #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439        offsets, size_offsets) \
440      count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441        len, start_offset, options, offsets, size_offsets)
442    
443    #define PCRE_FREE_STUDY32(extra) \
444      pcre32_free_study((pcre32_extra *)extra)
445    
446    #define PCRE_FREE_SUBSTRING32(substring) \
447      pcre32_free_substring((PCRE_SPTR32)substring)
448    
449    #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450      pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
451    
452    #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453        getnamesptr, subsptr) \
454      rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455        count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456    
457    #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458      n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
459    
460    #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461      rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462        (PCRE_SPTR32 *)(void*)subsptr)
463    
464    #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465      rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466        (PCRE_SPTR32 **)(void*)listptr)
467    
468    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469      rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470        tables)
471    
472    #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473      pcre32_printint(re, outfile, debug_lengths)
474    
475    #define PCRE_STUDY32(extra, re, options, error) \
476      extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477    
478    #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479      (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480    
481    #define PCRE_JIT_STACK_FREE32(stack) \
482      pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483    
484    #endif /* SUPPORT_PCRE32 */
485    
486    
487    /* ----- Both modes are supported; a runtime test is needed, except for
488    pcre_config(), and the JIT stack functions, when it doesn't matter which
489    version is called. ----- */
490    
491    enum {
492      PCRE8_MODE,
493      PCRE16_MODE,
494      PCRE32_MODE
495    };
496    
497    #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2
498    
499    #define CHAR_SIZE (1 << pcre_mode)
500    
501    #define PCHARS(lv, p, offset, len, f) \
502      if (pcre_mode == PCRE32_MODE) \
503        PCHARS32(lv, p, offset, len, f); \
504      else if (pcre_mode == PCRE16_MODE) \
505        PCHARS16(lv, p, offset, len, f); \
506      else \
507        PCHARS8(lv, p, offset, len, f)
508    
509    #define PCHARSV(p, offset, len, f) \
510      if (pcre_mode == PCRE32_MODE) \
511        PCHARSV32(p, offset, len, f); \
512      else if (pcre_mode == PCRE16_MODE) \
513        PCHARSV16(p, offset, len, f); \
514      else \
515        PCHARSV8(p, offset, len, f)
516    
517    #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
518      if (pcre_mode == PCRE32_MODE) \
519        READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
520      else if (pcre_mode == PCRE16_MODE) \
521        READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
522      else \
523        READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
524    
525    #define SET_PCRE_CALLOUT(callout) \
526      if (pcre_mode == PCRE32_MODE) \
527        SET_PCRE_CALLOUT32(callout); \
528      else if (pcre_mode == PCRE16_MODE) \
529        SET_PCRE_CALLOUT16(callout); \
530      else \
531        SET_PCRE_CALLOUT8(callout)
532    
533    #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
534    
535    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
536      if (pcre_mode == PCRE32_MODE) \
537        PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
538      else if (pcre_mode == PCRE16_MODE) \
539        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
540      else \
541        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
542    
543    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
544      if (pcre_mode == PCRE32_MODE) \
545        PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
546      else if (pcre_mode == PCRE16_MODE) \
547        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
548      else \
549        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
550    
551    #define PCRE_CONFIG pcre_config
552    
553    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
554        namesptr, cbuffer, size) \
555      if (pcre_mode == PCRE32_MODE) \
556        PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
557          namesptr, cbuffer, size); \
558      else if (pcre_mode == PCRE16_MODE) \
559        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
560          namesptr, cbuffer, size); \
561      else \
562        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
563          namesptr, cbuffer, size)
564    
565    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
566      if (pcre_mode == PCRE32_MODE) \
567        PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
568      else if (pcre_mode == PCRE16_MODE) \
569        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
570      else \
571        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
572    
573    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
574        offsets, size_offsets, workspace, size_workspace) \
575      if (pcre_mode == PCRE32_MODE) \
576        PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
577          offsets, size_offsets, workspace, size_workspace); \
578      else if (pcre_mode == PCRE16_MODE) \
579        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
580          offsets, size_offsets, workspace, size_workspace); \
581      else \
582        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
583          offsets, size_offsets, workspace, size_workspace)
584    
585    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
586        offsets, size_offsets) \
587      if (pcre_mode == PCRE32_MODE) \
588        PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
589          offsets, size_offsets); \
590      else if (pcre_mode == PCRE16_MODE) \
591        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
592          offsets, size_offsets); \
593      else \
594        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
595          offsets, size_offsets)
596    
597    #define PCRE_FREE_STUDY(extra) \
598      if (pcre_mode == PCRE32_MODE) \
599        PCRE_FREE_STUDY32(extra); \
600      else if (pcre_mode == PCRE16_MODE) \
601        PCRE_FREE_STUDY16(extra); \
602      else \
603        PCRE_FREE_STUDY8(extra)
604    
605    #define PCRE_FREE_SUBSTRING(substring) \
606      if (pcre_mode == PCRE32_MODE) \
607        PCRE_FREE_SUBSTRING32(substring); \
608      else if (pcre_mode == PCRE16_MODE) \
609        PCRE_FREE_SUBSTRING16(substring); \
610      else \
611        PCRE_FREE_SUBSTRING8(substring)
612    
613    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
614      if (pcre_mode == PCRE32_MODE) \
615        PCRE_FREE_SUBSTRING_LIST32(listptr); \
616      else if (pcre_mode == PCRE16_MODE) \
617        PCRE_FREE_SUBSTRING_LIST16(listptr); \
618      else \
619        PCRE_FREE_SUBSTRING_LIST8(listptr)
620    
621    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
622        getnamesptr, subsptr) \
623      if (pcre_mode == PCRE32_MODE) \
624        PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
625          getnamesptr, subsptr); \
626      else if (pcre_mode == PCRE16_MODE) \
627        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
628          getnamesptr, subsptr); \
629      else \
630        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
631          getnamesptr, subsptr)
632    
633    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
634      if (pcre_mode == PCRE32_MODE) \
635        PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
636      else if (pcre_mode == PCRE16_MODE) \
637        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
638      else \
639        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
640    
641    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
642      if (pcre_mode == PCRE32_MODE) \
643        PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
644      else if (pcre_mode == PCRE16_MODE) \
645        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
646      else \
647        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
648    
649    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
650      if (pcre_mode == PCRE32_MODE) \
651        PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
652      else if (pcre_mode == PCRE16_MODE) \
653        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
654      else \
655        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
656    
657    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
658      (pcre_mode == PCRE32_MODE ? \
659         PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
660        : pcre_mode == PCRE16_MODE ? \
661          PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
662          : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
663    
664    #define PCRE_JIT_STACK_FREE(stack) \
665      if (pcre_mode == PCRE32_MODE) \
666        PCRE_JIT_STACK_FREE32(stack); \
667      else if (pcre_mode == PCRE16_MODE) \
668        PCRE_JIT_STACK_FREE16(stack); \
669      else \
670        PCRE_JIT_STACK_FREE8(stack)
671    
672    #define PCRE_MAKETABLES \
673      (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
674    
675    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
676      if (pcre_mode == PCRE32_MODE) \
677        PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
678      else if (pcre_mode == PCRE16_MODE) \
679        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
680      else \
681        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
682    
683    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
684      if (pcre_mode == PCRE32_MODE) \
685        PCRE_PRINTINT32(re, outfile, debug_lengths); \
686      else if (pcre_mode == PCRE16_MODE) \
687        PCRE_PRINTINT16(re, outfile, debug_lengths); \
688      else \
689        PCRE_PRINTINT8(re, outfile, debug_lengths)
690    
691    #define PCRE_STUDY(extra, re, options, error) \
692      if (pcre_mode == PCRE32_MODE) \
693        PCRE_STUDY32(extra, re, options, error); \
694      else if (pcre_mode == PCRE16_MODE) \
695        PCRE_STUDY16(extra, re, options, error); \
696      else \
697        PCRE_STUDY8(extra, re, options, error)
698    
699    /* ----- Only 8-bit mode is supported ----- */
700    
701    #elif defined SUPPORT_PCRE8
702    #define CHAR_SIZE                 1
703    #define PCHARS                    PCHARS8
704    #define PCHARSV                   PCHARSV8
705    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
706    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
707    #define STRLEN                    STRLEN8
708    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
709    #define PCRE_COMPILE              PCRE_COMPILE8
710    #define PCRE_CONFIG               pcre_config
711    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
712    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
713    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
714    #define PCRE_EXEC                 PCRE_EXEC8
715    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
716    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
717    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
718    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
719    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
720    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
721    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
722    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
723    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
724    #define PCRE_MAKETABLES           pcre_maketables()
725    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
726    #define PCRE_PRINTINT             PCRE_PRINTINT8
727    #define PCRE_STUDY                PCRE_STUDY8
728    
729    /* ----- Only 16-bit mode is supported ----- */
730    
731    #elif defined SUPPORT_PCRE16
732    #define CHAR_SIZE                 2
733    #define PCHARS                    PCHARS16
734    #define PCHARSV                   PCHARSV16
735    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
736    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
737    #define STRLEN                    STRLEN16
738    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
739    #define PCRE_COMPILE              PCRE_COMPILE16
740    #define PCRE_CONFIG               pcre16_config
741    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
742    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
743    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
744    #define PCRE_EXEC                 PCRE_EXEC16
745    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
746    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
747    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
748    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
749    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
750    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
751    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
752    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
753    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
754    #define PCRE_MAKETABLES           pcre16_maketables()
755    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
756    #define PCRE_PRINTINT             PCRE_PRINTINT16
757    #define PCRE_STUDY                PCRE_STUDY16
758    
759    /* ----- Only 32-bit mode is supported ----- */
760    
761    #elif defined SUPPORT_PCRE32
762    #define CHAR_SIZE                 4
763    #define PCHARS                    PCHARS32
764    #define PCHARSV                   PCHARSV32
765    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME32
766    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT32
767    #define STRLEN                    STRLEN32
768    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK32
769    #define PCRE_COMPILE              PCRE_COMPILE32
770    #define PCRE_CONFIG               pcre32_config
771    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
772    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING32
773    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC32
774    #define PCRE_EXEC                 PCRE_EXEC32
775    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY32
776    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING32
777    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST32
778    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING32
779    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER32
780    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING32
781    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST32
782    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC32
783    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE32
784    #define PCRE_MAKETABLES           pcre32_maketables()
785    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
786    #define PCRE_PRINTINT             PCRE_PRINTINT32
787    #define PCRE_STUDY                PCRE_STUDY32
788    
789  #endif  #endif
790    
791    /* ----- End of mode-specific function call macros ----- */
792    
793    
794  /* Other parameters */  /* Other parameters */
795    
# Line 173  UTF8 support if PCRE is built without it Line 801  UTF8 support if PCRE is built without it
801  #endif  #endif
802  #endif  #endif
803    
804    #if !defined NODFA
805    #define DFA_WS_DIMENSION 1000
806    #endif
807    
808  /* This is the default loop count for timing. */  /* This is the default loop count for timing. */
809    
810  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
# Line 187  static int callout_fail_count; Line 819  static int callout_fail_count;
819  static int callout_fail_id;  static int callout_fail_id;
820  static int debug_lengths;  static int debug_lengths;
821  static int first_callout;  static int first_callout;
822    static int jit_was_used;
823  static int locale_set = 0;  static int locale_set = 0;
824  static int show_malloc;  static int show_malloc;
825  static int use_utf8;  static int use_utf;
826  static size_t gotten_store;  static size_t gotten_store;
827    static size_t first_gotten_store = 0;
828  static const unsigned char *last_callout_mark = NULL;  static const unsigned char *last_callout_mark = NULL;
829    
830  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
831    
832  static int buffer_size = 50000;  static int buffer_size = 50000;
833  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
834  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
835  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
836    
837    /* Another buffer is needed translation to 16/32-bit character strings. It will
838    obtained and extended as required. */
839    
840    #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
841    
842    /* We need the table of operator lengths that is used for 16/32-bit compiling, in
843    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
844    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
845    appropriately for the 16/32-bit world. Just as a safety check, make sure that
846    COMPILE_PCRE[16|32] is *not* set. */
847    
848    #ifdef COMPILE_PCRE16
849    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
850    #endif
851    
852    #ifdef COMPILE_PCRE32
853    #error COMPILE_PCRE32 must not be set when compiling pcretest.c
854    #endif
855    
856    #if LINK_SIZE == 2
857    #undef LINK_SIZE
858    #define LINK_SIZE 1
859    #elif LINK_SIZE == 3 || LINK_SIZE == 4
860    #undef LINK_SIZE
861    #define LINK_SIZE 2
862    #else
863    #error LINK_SIZE must be either 2, 3, or 4
864    #endif
865    
866    #undef IMM2_SIZE
867    #define IMM2_SIZE 1
868    
869    #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
870    
871    #ifdef SUPPORT_PCRE16
872    static int buffer16_size = 0;
873    static pcre_uint16 *buffer16 = NULL;
874    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
875    #endif  /* SUPPORT_PCRE16 */
876    
877    #ifdef SUPPORT_PCRE32
878    static int buffer32_size = 0;
879    static pcre_uint32 *buffer32 = NULL;
880    static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
881    #endif  /* SUPPORT_PCRE32 */
882    
883    /* If we have 8-bit support, default to it; if there is also
884    16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
885    there must be 16-or 32-bit support, so default it to 1. */
886    
887    #if defined SUPPORT_PCRE8
888    static int pcre_mode = PCRE8_MODE;
889    #elif defined SUPPORT_PCRE16
890    static int pcre_mode = PCRE16_MODE;
891    #elif defined SUPPORT_PCRE32
892    static int pcre_mode = PCRE32_MODE;
893    #endif
894    
895    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
896    
897    static int jit_study_bits[] =
898      {
899      PCRE_STUDY_JIT_COMPILE,
900      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
901      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
902      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
903      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
904      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
905      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
906        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
907    };
908    
909    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
910      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
911    
912  /* Textual explanations for runtime error codes */  /* Textual explanations for runtime error codes */
913    
# Line 213  static const char *errtexts[] = { Line 922  static const char *errtexts[] = {
922    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
923    "match limit exceeded",    "match limit exceeded",
924    "callout error code",    "callout error code",
925    NULL,  /* BADUTF8 is handled specially */    NULL,  /* BADUTF8/16 is handled specially */
926    "bad UTF-8 offset",    NULL,  /* BADUTF8/16 offset is handled specially */
927    NULL,  /* PARTIAL is handled specially */    NULL,  /* PARTIAL is handled specially */
928    "not used - internal error",    "not used - internal error",
929    "internal error - pattern overwritten?",    "internal error - pattern overwritten?",
# Line 228  static const char *errtexts[] = { Line 937  static const char *errtexts[] = {
937    "not used - internal error",    "not used - internal error",
938    "invalid combination of newline options",    "invalid combination of newline options",
939    "bad offset value",    "bad offset value",
940    NULL,  /* SHORTUTF8 is handled specially */    NULL,  /* SHORTUTF8/16 is handled specially */
941    "nested recursion at the same subject position",    "nested recursion at the same subject position",
942    "JIT stack limit reached"    "JIT stack limit reached",
943      "pattern compiled in wrong mode: 8-bit/16-bit error",
944      "pattern compiled with other endianness",
945      "invalid data in workspace for DFA restart"
946  };  };
947    
948    
# Line 246  the L (locale) option also adjusts the t Line 958  the L (locale) option also adjusts the t
958  /* This is the set of tables distributed as default with PCRE. It recognizes  /* This is the set of tables distributed as default with PCRE. It recognizes
959  only ASCII characters. */  only ASCII characters. */
960    
961  static const unsigned char tables0[] = {  static const pcre_uint8 tables0[] = {
962    
963  /* This table is a lower casing table. */  /* This table is a lower casing table. */
964    
# Line 419  graph, print, punct, and cntrl. Other cl Line 1131  graph, print, punct, and cntrl. Other cl
1131  be at least an approximation of ISO 8859. In particular, there are characters  be at least an approximation of ISO 8859. In particular, there are characters
1132  greater than 128 that are marked as spaces, letters, etc. */  greater than 128 that are marked as spaces, letters, etc. */
1133    
1134  static const unsigned char tables1[] = {  static const pcre_uint8 tables1[] = {
1135  0,1,2,3,4,5,6,7,  0,1,2,3,4,5,6,7,
1136  8,9,10,11,12,13,14,15,  8,9,10,11,12,13,14,15,
1137  16,17,18,19,20,21,22,23,  16,17,18,19,20,21,22,23,
# Line 582  return sys_errlist[n]; Line 1294  return sys_errlist[n];
1294  #endif /* HAVE_STRERROR */  #endif /* HAVE_STRERROR */
1295    
1296    
1297    
1298    /*************************************************
1299    *       Print newline configuration              *
1300    *************************************************/
1301    
1302    /*
1303    Arguments:
1304      rc         the return code from PCRE_CONFIG_NEWLINE
1305      isc        TRUE if called from "-C newline"
1306    Returns:     nothing
1307    */
1308    
1309    static void
1310    print_newline_config(int rc, BOOL isc)
1311    {
1312    const char *s = NULL;
1313    if (!isc) printf("  Newline sequence is ");
1314    switch(rc)
1315      {
1316      case CHAR_CR: s = "CR"; break;
1317      case CHAR_LF: s = "LF"; break;
1318      case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1319      case -1: s = "ANY"; break;
1320      case -2: s = "ANYCRLF"; break;
1321    
1322      default:
1323      printf("a non-standard value: 0x%04x\n", rc);
1324      return;
1325      }
1326    
1327    printf("%s\n", s);
1328    }
1329    
1330    
1331    
1332  /*************************************************  /*************************************************
1333  *         JIT memory callback                    *  *         JIT memory callback                    *
1334  *************************************************/  *************************************************/
1335    
1336  static pcre_jit_stack* jit_callback(void *arg)  static pcre_jit_stack* jit_callback(void *arg)
1337  {  {
1338    jit_was_used = TRUE;
1339  return (pcre_jit_stack *)arg;  return (pcre_jit_stack *)arg;
1340  }  }
1341    
1342    
1343    #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1344    /*************************************************
1345    *            Convert UTF-8 string to value       *
1346    *************************************************/
1347    
1348    /* This function takes one or more bytes that represents a UTF-8 character,
1349    and returns the value of the character.
1350    
1351    Argument:
1352      utf8bytes   a pointer to the byte vector
1353      vptr        a pointer to an int to receive the value
1354    
1355    Returns:      >  0 => the number of bytes consumed
1356                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1357    */
1358    
1359    static int
1360    utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1361    {
1362    pcre_uint32 c = *utf8bytes++;
1363    pcre_uint32 d = c;
1364    int i, j, s;
1365    
1366    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1367      {
1368      if ((d & 0x80) == 0) break;
1369      d <<= 1;
1370      }
1371    
1372    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1373    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1374    
1375    /* i now has a value in the range 1-5 */
1376    
1377    s = 6*i;
1378    d = (c & utf8_table3[i]) << s;
1379    
1380    for (j = 0; j < i; j++)
1381      {
1382      c = *utf8bytes++;
1383      if ((c & 0xc0) != 0x80) return -(j+1);
1384      s -= 6;
1385      d |= (c & 0x3f) << s;
1386      }
1387    
1388    /* Check that encoding was the correct unique one */
1389    
1390    for (j = 0; j < utf8_table1_size; j++)
1391      if (d <= utf8_table1[j]) break;
1392    if (j != i) return -(i+1);
1393    
1394    /* Valid value */
1395    
1396    *vptr = d;
1397    return i+1;
1398    }
1399    #endif /* NOUTF || SUPPORT_PCRE16 */
1400    
1401    
1402    
1403    #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1404    /*************************************************
1405    *       Convert character value to UTF-8         *
1406    *************************************************/
1407    
1408    /* This function takes an integer value in the range 0 - 0x7fffffff
1409    and encodes it as a UTF-8 character in 0 to 6 bytes.
1410    
1411    Arguments:
1412      cvalue     the character value
1413      utf8bytes  pointer to buffer for result - at least 6 bytes long
1414    
1415    Returns:     number of characters placed in the buffer
1416    */
1417    
1418    static int
1419    ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1420    {
1421    register int i, j;
1422    if (cvalue > 0x7fffffffu)
1423      return -1;
1424    for (i = 0; i < utf8_table1_size; i++)
1425      if (cvalue <= utf8_table1[i]) break;
1426    utf8bytes += i;
1427    for (j = i; j > 0; j--)
1428     {
1429     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1430     cvalue >>= 6;
1431     }
1432    *utf8bytes = utf8_table2[i] | cvalue;
1433    return i + 1;
1434    }
1435    #endif
1436    
1437    
1438    #ifdef SUPPORT_PCRE16
1439    /*************************************************
1440    *         Convert a string to 16-bit             *
1441    *************************************************/
1442    
1443    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1444    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1445    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1446    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1447    result is always left in buffer16.
1448    
1449    Note that this function does not object to surrogate values. This is
1450    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1451    for the purpose of testing that they are correctly faulted.
1452    
1453    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1454    in UTF-8 so that values greater than 255 can be handled.
1455    
1456    Arguments:
1457      data       TRUE if converting a data line; FALSE for a regex
1458      p          points to a byte string
1459      utf        true if UTF-8 (to be converted to UTF-16)
1460      len        number of bytes in the string (excluding trailing zero)
1461    
1462    Returns:     number of 16-bit data items used (excluding trailing zero)
1463                 OR -1 if a UTF-8 string is malformed
1464                 OR -2 if a value > 0x10ffff is encountered
1465                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1466    */
1467    
1468    static int
1469    to16(int data, pcre_uint8 *p, int utf, int len)
1470    {
1471    pcre_uint16 *pp;
1472    
1473    if (buffer16_size < 2*len + 2)
1474      {
1475      if (buffer16 != NULL) free(buffer16);
1476      buffer16_size = 2*len + 2;
1477      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1478      if (buffer16 == NULL)
1479        {
1480        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1481        exit(1);
1482        }
1483      }
1484    
1485    pp = buffer16;
1486    
1487    if (!utf && !data)
1488      {
1489      while (len-- > 0) *pp++ = *p++;
1490      }
1491    
1492    else
1493      {
1494      pcre_uint32 c = 0;
1495      while (len > 0)
1496        {
1497        int chlen = utf82ord(p, &c);
1498        if (chlen <= 0) return -1;
1499        if (c > 0x10ffff) return -2;
1500        p += chlen;
1501        len -= chlen;
1502        if (c < 0x10000) *pp++ = c; else
1503          {
1504          if (!utf) return -3;
1505          c -= 0x10000;
1506          *pp++ = 0xD800 | (c >> 10);
1507          *pp++ = 0xDC00 | (c & 0x3ff);
1508          }
1509        }
1510      }
1511    
1512    *pp = 0;
1513    return pp - buffer16;
1514    }
1515    #endif
1516    
1517    #ifdef SUPPORT_PCRE32
1518    /*************************************************
1519    *         Convert a string to 32-bit             *
1520    *************************************************/
1521    
1522    /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1523    8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1524    times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1525    in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1526    result is always left in buffer32.
1527    
1528    Note that this function does not object to surrogate values. This is
1529    deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1530    for the purpose of testing that they are correctly faulted.
1531    
1532    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1533    in UTF-8 so that values greater than 255 can be handled.
1534    
1535    Arguments:
1536      data       TRUE if converting a data line; FALSE for a regex
1537      p          points to a byte string
1538      utf        true if UTF-8 (to be converted to UTF-32)
1539      len        number of bytes in the string (excluding trailing zero)
1540    
1541    Returns:     number of 32-bit data items used (excluding trailing zero)
1542                 OR -1 if a UTF-8 string is malformed
1543                 OR -2 if a value > 0x10ffff is encountered
1544                 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1545    */
1546    
1547    static int
1548    to32(int data, pcre_uint8 *p, int utf, int len)
1549    {
1550    pcre_uint32 *pp;
1551    
1552    if (buffer32_size < 4*len + 4)
1553      {
1554      if (buffer32 != NULL) free(buffer32);
1555      buffer32_size = 4*len + 4;
1556      buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1557      if (buffer32 == NULL)
1558        {
1559        fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1560        exit(1);
1561        }
1562      }
1563    
1564    pp = buffer32;
1565    
1566    if (!utf && !data)
1567      {
1568      while (len-- > 0) *pp++ = *p++;
1569      }
1570    
1571    else
1572      {
1573      pcre_uint32 c = 0;
1574      while (len > 0)
1575        {
1576        int chlen = utf82ord(p, &c);
1577        if (chlen <= 0) return -1;
1578        if (utf)
1579          {
1580          if (c > 0x10ffff) return -2;
1581          if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1582          }
1583    
1584        p += chlen;
1585        len -= chlen;
1586        *pp++ = c;
1587        }
1588      }
1589    
1590    *pp = 0;
1591    return pp - buffer32;
1592    }
1593    #endif
1594    
1595  /*************************************************  /*************************************************
1596  *        Read or extend an input line            *  *        Read or extend an input line            *
1597  *************************************************/  *************************************************/
# Line 615  Returns:       pointer to the start of n Line 1615  Returns:       pointer to the start of n
1615                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1616  */  */
1617    
1618  static uschar *  static pcre_uint8 *
1619  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1620  {  {
1621  uschar *here = start;  pcre_uint8 *here = start;
1622    
1623  for (;;)  for (;;)
1624    {    {
1625    int rlen = (int)(buffer_size - (here - buffer));    size_t rlen = (size_t)(buffer_size - (here - buffer));
1626    
1627    if (rlen > 1000)    if (rlen > 1000)
1628      {      {
1629      int dlen;      int dlen;
1630    
1631      /* If libreadline support is required, use readline() to read a line if the      /* If libreadline or libedit support is required, use readline() to read a
1632      input is a terminal. Note that readline() removes the trailing newline, so      line if the input is a terminal. Note that readline() removes the trailing
1633      we must put it back again, to be compatible with fgets(). */      newline, so we must put it back again, to be compatible with fgets(). */
1634    
1635  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1636      if (isatty(fileno(f)))      if (isatty(fileno(f)))
1637        {        {
1638        size_t len;        size_t len;
# Line 665  for (;;) Line 1665  for (;;)
1665    else    else
1666      {      {
1667      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1668      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1669      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1670      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1671    
1672      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1673        {        {
# Line 698  return NULL;  /* Control never gets here Line 1698  return NULL;  /* Control never gets here
1698    
1699    
1700    
   
   
   
   
1701  /*************************************************  /*************************************************
1702  *          Read number from string               *  *          Read number from string               *
1703  *************************************************/  *************************************************/
# Line 718  Returns:        the unsigned long Line 1714  Returns:        the unsigned long
1714  */  */
1715    
1716  static int  static int
1717  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1718  {  {
1719  int result = 0;  int result = 0;
1720  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 729  return(result); Line 1725  return(result);
1725    
1726    
1727    
   
1728  /*************************************************  /*************************************************
1729  *            Convert UTF-8 string to value       *  *             Print one character                *
1730  *************************************************/  *************************************************/
1731    
1732  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
1733    
1734  Argument:  static int pchar(pcre_uint32 c, FILE *f)
1735    utf8bytes   a pointer to the byte vector  {
1736    vptr        a pointer to an int to receive the value  int n;
1737    if (PRINTOK(c))
1738      {
1739      if (f != NULL) fprintf(f, "%c", c);
1740      return 1;
1741      }
1742    
1743  Returns:      >  0 => the number of bytes consumed  if (c < 0x100)
1744                -6 to 0 => malformed UTF-8 character at offset = (-return)    {
1745  */    if (use_utf)
1746        {
1747        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1748        return 6;
1749        }
1750      else
1751        {
1752        if (f != NULL) fprintf(f, "\\x%02x", c);
1753        return 4;
1754        }
1755      }
1756    
1757  #if !defined NOUTF8  if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1758    return n >= 0 ? n : 0;
1759    }
1760    
1761  static int  
1762  utf82ord(unsigned char *utf8bytes, int *vptr)  
1763    #ifdef SUPPORT_PCRE8
1764    /*************************************************
1765    *         Print 8-bit character string           *
1766    *************************************************/
1767    
1768    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1769    If handed a NULL file, just counts chars without printing. */
1770    
1771    static int pchars(pcre_uint8 *p, int length, FILE *f)
1772  {  {
1773  int c = *utf8bytes++;  pcre_uint32 c = 0;
1774  int d = c;  int yield = 0;
 int i, j, s;  
1775    
1776  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  if (length < 0)
1777      length = strlen((char *)p);
1778    
1779    while (length-- > 0)
1780    {    {
1781    if ((d & 0x80) == 0) break;  #if !defined NOUTF
1782    d <<= 1;    if (use_utf)
1783        {
1784        int rc = utf82ord(p, &c);
1785        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1786          {
1787          length -= rc - 1;
1788          p += rc;
1789          yield += pchar(c, f);
1790          continue;
1791          }
1792        }
1793    #endif
1794      c = *p++;
1795      yield += pchar(c, f);
1796    }    }
1797    
1798  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  return yield;
1799  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  }
1800    #endif
1801    
 /* i now has a value in the range 1-5 */  
1802    
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
1803    
1804  for (j = 0; j < i; j++)  #ifdef SUPPORT_PCRE16
1805    /*************************************************
1806    *    Find length of 0-terminated 16-bit string   *
1807    *************************************************/
1808    
1809    static int strlen16(PCRE_SPTR16 p)
1810    {
1811    int len = 0;
1812    while (*p++ != 0) len++;
1813    return len;
1814    }
1815    #endif  /* SUPPORT_PCRE16 */
1816    
1817    
1818    
1819    #ifdef SUPPORT_PCRE32
1820    /*************************************************
1821    *    Find length of 0-terminated 32-bit string   *
1822    *************************************************/
1823    
1824    static int strlen32(PCRE_SPTR32 p)
1825    {
1826    int len = 0;
1827    while (*p++ != 0) len++;
1828    return len;
1829    }
1830    #endif  /* SUPPORT_PCRE32 */
1831    
1832    
1833    
1834    #ifdef SUPPORT_PCRE16
1835    /*************************************************
1836    *           Print 16-bit character string        *
1837    *************************************************/
1838    
1839    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1840    If handed a NULL file, just counts chars without printing. */
1841    
1842    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1843    {
1844    int yield = 0;
1845    
1846    if (length < 0)
1847      length = strlen16(p);
1848    
1849    while (length-- > 0)
1850    {    {
1851    c = *utf8bytes++;    pcre_uint32 c = *p++ & 0xffff;
1852    if ((c & 0xc0) != 0x80) return -(j+1);  #if !defined NOUTF
1853    s -= 6;    if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1854    d |= (c & 0x3f) << s;      {
1855        int d = *p & 0xffff;
1856        if (d >= 0xDC00 && d < 0xDFFF)
1857          {
1858          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1859          length--;
1860          p++;
1861          }
1862        }
1863    #endif
1864      yield += pchar(c, f);
1865    }    }
1866    
1867  /* Check that encoding was the correct unique one */  return yield;
1868    }
1869    #endif  /* SUPPORT_PCRE16 */
1870    
 for (j = 0; j < utf8_table1_size; j++)  
   if (d <= utf8_table1[j]) break;  
 if (j != i) return -(i+1);  
1871    
 /* Valid value */  
1872    
1873  *vptr = d;  #ifdef SUPPORT_PCRE32
1874  return i+1;  /*************************************************
1875    *           Print 32-bit character string        *
1876    *************************************************/
1877    
1878    /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
1879    If handed a NULL file, just counts chars without printing. */
1880    
1881    static int pchars32(PCRE_SPTR32 p, int length, FILE *f)
1882    {
1883    int yield = 0;
1884    
1885    if (length < 0)
1886      length = strlen32(p);
1887    
1888    while (length-- > 0)
1889      {
1890      pcre_uint32 c = *p++;
1891      yield += pchar(c, f);
1892      }
1893    
1894    return yield;
1895  }  }
1896    #endif  /* SUPPORT_PCRE32 */
 #endif  
1897    
1898    
1899    
1900    #ifdef SUPPORT_PCRE8
1901  /*************************************************  /*************************************************
1902  *       Convert character value to UTF-8         *  *     Read a capture name (8-bit) and check it   *
1903  *************************************************/  *************************************************/
1904    
1905  /* This function takes an integer value in the range 0 - 0x7fffffff  static pcre_uint8 *
1906  and encodes it as a UTF-8 character in 0 to 6 bytes.  read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1907    {
1908    pcre_uint8 *npp = *pp;
1909    while (isalnum(*p)) *npp++ = *p++;
1910    *npp++ = 0;
1911    *npp = 0;
1912    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1913      {
1914      fprintf(outfile, "no parentheses with name \"");
1915      PCHARSV(*pp, 0, -1, outfile);
1916      fprintf(outfile, "\"\n");
1917      }
1918    
1919  Arguments:  *pp = npp;
1920    cvalue     the character value  return p;
1921    utf8bytes  pointer to buffer for result - at least 6 bytes long  }
1922    #endif  /* SUPPORT_PCRE8 */
1923    
 Returns:     number of characters placed in the buffer  
 */  
1924    
 #if !defined NOUTF8  
1925    
1926  static int  #ifdef SUPPORT_PCRE16
1927  ord2utf8(int cvalue, uschar *utf8bytes)  /*************************************************
1928    *     Read a capture name (16-bit) and check it  *
1929    *************************************************/
1930    
1931    /* Note that the text being read is 8-bit. */
1932    
1933    static pcre_uint8 *
1934    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1935  {  {
1936  register int i, j;  pcre_uint16 *npp = *pp;
1937  for (i = 0; i < utf8_table1_size; i++)  while (isalnum(*p)) *npp++ = *p++;
1938    if (cvalue <= utf8_table1[i]) break;  *npp++ = 0;
1939  utf8bytes += i;  *npp = 0;
1940  for (j = i; j > 0; j--)  if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1941   {    {
1942   *utf8bytes-- = 0x80 | (cvalue & 0x3f);    fprintf(outfile, "no parentheses with name \"");
1943   cvalue >>= 6;    PCHARSV(*pp, 0, -1, outfile);
1944   }    fprintf(outfile, "\"\n");
1945  *utf8bytes = utf8_table2[i] | cvalue;    }
1946  return i + 1;  *pp = npp;
1947    return p;
1948  }  }
1949    #endif  /* SUPPORT_PCRE16 */
 #endif  
1950    
1951    
1952    
1953    #ifdef SUPPORT_PCRE32
1954  /*************************************************  /*************************************************
1955  *             Print character string             *  *     Read a capture name (32-bit) and check it  *
1956  *************************************************/  *************************************************/
1957    
1958  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Note that the text being read is 8-bit. */
 mode. Yields number of characters printed. If handed a NULL file, just counts  
 chars without printing. */  
1959    
1960  static int pchars(unsigned char *p, int length, FILE *f)  static pcre_uint8 *
1961    read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
1962  {  {
1963  int c = 0;  pcre_uint32 *npp = *pp;
1964  int yield = 0;  while (isalnum(*p)) *npp++ = *p++;
1965    *npp++ = 0;
1966  while (length-- > 0)  *npp = 0;
1967    if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
1968    {    {
1969  #if !defined NOUTF8    fprintf(outfile, "no parentheses with name \"");
1970    if (use_utf8)    PCHARSV(*pp, 0, -1, outfile);
1971      {    fprintf(outfile, "\"\n");
     int rc = utf82ord(p, &c);  
   
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
       {  
       length -= rc - 1;  
       p += rc;  
       if (PRINTHEX(c))  
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
       }  
     }  
 #endif  
   
    /* Not UTF-8, or malformed UTF-8  */  
   
   c = *p++;  
   if (PRINTHEX(c))  
     {  
     if (f != NULL) fprintf(f, "%c", c);  
     yield++;  
     }  
   else  
     {  
     if (f != NULL) fprintf(f, "\\x%02x", c);  
     yield += 4;  
     }  
1972    }    }
1973    *pp = npp;
1974  return yield;  return p;
1975  }  }
1976    #endif  /* SUPPORT_PCRE32 */
1977    
1978    
1979    
# Line 916  if (callout_extra) Line 2002  if (callout_extra)
2002      else      else
2003        {        {
2004        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
2005        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
2006          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
2007        fprintf(f, "\n");        fprintf(f, "\n");
2008        }        }
# Line 929  printed lengths of the substrings. */ Line 2015  printed lengths of the substrings. */
2015    
2016  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
2017    
2018  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2019  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
2020    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
2021    
2022  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2023    
2024  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
2025    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
2026    
2027  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 974  first_callout = 0; Line 2060  first_callout = 0;
2060    
2061  if (cb->mark != last_callout_mark)  if (cb->mark != last_callout_mark)
2062    {    {
2063    fprintf(outfile, "Latest Mark: %s\n",    if (cb->mark == NULL)
2064      (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));      fprintf(outfile, "Latest Mark: <unset>\n");
2065      else
2066        {
2067        fprintf(outfile, "Latest Mark: ");
2068        PCHARSV(cb->mark, 0, -1, outfile);
2069        putc('\n', outfile);
2070        }
2071    last_callout_mark = cb->mark;    last_callout_mark = cb->mark;
2072    }    }
2073    
# Line 999  return (cb->callout_number != callout_fa Line 2091  return (cb->callout_number != callout_fa
2091  *************************************************/  *************************************************/
2092    
2093  /* Alternative malloc function, to test functionality and save the size of a  /* Alternative malloc function, to test functionality and save the size of a
2094  compiled re. The show_malloc variable is set only during matching. */  compiled re, which is the first store request that pcre_compile() makes. The
2095    show_malloc variable is set only during matching. */
2096    
2097  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
2098  {  {
2099  void *block = malloc(size);  void *block = malloc(size);
2100  gotten_store = size;  gotten_store = size;
2101    if (first_gotten_store == 0) first_gotten_store = size;
2102  if (show_malloc)  if (show_malloc)
2103    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
2104  return block;  return block;
# Line 1039  free(block); Line 2133  free(block);
2133  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
2134  *************************************************/  *************************************************/
2135    
2136  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
2137    one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2138    value, but the code is defensive.
2139    
2140    Arguments:
2141      re        compiled regex
2142      study     study data
2143      option    PCRE_INFO_xxx option
2144      ptr       where to put the data
2145    
2146    Returns:    0 when OK, < 0 on error
2147    */
2148    
2149  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static int
2150    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2151  {  {
2152  int rc;  int rc;
2153  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
2154    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (pcre_mode == PCRE32_MODE)
2155    #ifdef SUPPORT_PCRE32
2156      rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2157    #else
2158      rc = PCRE_ERROR_BADMODE;
2159    #endif
2160    else if (pcre_mode == PCRE16_MODE)
2161    #ifdef SUPPORT_PCRE16
2162      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2163    #else
2164      rc = PCRE_ERROR_BADMODE;
2165    #endif
2166    else
2167    #ifdef SUPPORT_PCRE8
2168      rc = pcre_fullinfo(re, study, option, ptr);
2169    #else
2170      rc = PCRE_ERROR_BADMODE;
2171    #endif
2172    
2173    if (rc < 0)
2174      {
2175      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2176        pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2177      if (rc == PCRE_ERROR_BADMODE)
2178        fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2179          "%d-bit mode\n", 8 * CHAR_SIZE,
2180          8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2181      }
2182    
2183    return rc;
2184  }  }
2185    
2186    
2187    
2188  /*************************************************  /*************************************************
2189  *         Byte flipping function                 *  *             Swap byte functions                *
2190  *************************************************/  *************************************************/
2191    
2192  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2193  byteflip(unsigned long int value, int n)  value, respectively.
2194    
2195    Arguments:
2196      value        any number
2197    
2198    Returns:       the byte swapped value
2199    */
2200    
2201    static pcre_uint32
2202    swap_uint32(pcre_uint32 value)
2203  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
2204  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
2205         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
2206         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
2207         ((value & 0xff000000) >> 24);         (value >> 24);
2208    }
2209    
2210    static pcre_uint16
2211    swap_uint16(pcre_uint16 value)
2212    {
2213    return (value >> 8) | (value << 8);
2214    }
2215    
2216    
2217    
2218    /*************************************************
2219    *        Flip bytes in a compiled pattern        *
2220    *************************************************/
2221    
2222    /* This function is called if the 'F' option was present on a pattern that is
2223    to be written to a file. We flip the bytes of all the integer fields in the
2224    regex data block and the study block. In 16-bit mode this also flips relevant
2225    bytes in the pattern itself. This is to make it possible to test PCRE's
2226    ability to reload byte-flipped patterns, e.g. those compiled on a different
2227    architecture. */
2228    
2229    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2230    static void
2231    regexflip8_or_16(pcre *ere, pcre_extra *extra)
2232    {
2233    real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2234    #ifdef SUPPORT_PCRE16
2235    int op;
2236    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2237    int length = re->name_count * re->name_entry_size;
2238    #ifdef SUPPORT_UTF
2239    BOOL utf = (re->options & PCRE_UTF16) != 0;
2240    BOOL utf16_char = FALSE;
2241    #endif /* SUPPORT_UTF */
2242    #endif /* SUPPORT_PCRE16 */
2243    
2244    /* Always flip the bytes in the main data block and study blocks. */
2245    
2246    re->magic_number = REVERSED_MAGIC_NUMBER;
2247    re->size = swap_uint32(re->size);
2248    re->options = swap_uint32(re->options);
2249    re->flags = swap_uint16(re->flags);
2250    re->top_bracket = swap_uint16(re->top_bracket);
2251    re->top_backref = swap_uint16(re->top_backref);
2252    re->first_char = swap_uint16(re->first_char);
2253    re->req_char = swap_uint16(re->req_char);
2254    re->name_table_offset = swap_uint16(re->name_table_offset);
2255    re->name_entry_size = swap_uint16(re->name_entry_size);
2256    re->name_count = swap_uint16(re->name_count);
2257    
2258    if (extra != NULL)
2259      {
2260      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2261      rsd->size = swap_uint32(rsd->size);
2262      rsd->flags = swap_uint32(rsd->flags);
2263      rsd->minlength = swap_uint32(rsd->minlength);
2264      }
2265    
2266    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2267    in the name table, if present, and then in the pattern itself. */
2268    
2269    #ifdef SUPPORT_PCRE16
2270    if (pcre_mode != PCRE16_MODE) return;
2271    
2272    while(TRUE)
2273      {
2274      /* Swap previous characters. */
2275      while (length-- > 0)
2276        {
2277        *ptr = swap_uint16(*ptr);
2278        ptr++;
2279        }
2280    #ifdef SUPPORT_UTF
2281      if (utf16_char)
2282        {
2283        if ((ptr[-1] & 0xfc00) == 0xd800)
2284          {
2285          /* We know that there is only one extra character in UTF-16. */
2286          *ptr = swap_uint16(*ptr);
2287          ptr++;
2288          }
2289        }
2290      utf16_char = FALSE;
2291    #endif /* SUPPORT_UTF */
2292    
2293      /* Get next opcode. */
2294    
2295      length = 0;
2296      op = *ptr;
2297      *ptr++ = swap_uint16(op);
2298    
2299      switch (op)
2300        {
2301        case OP_END:
2302        return;
2303    
2304    #ifdef SUPPORT_UTF
2305        case OP_CHAR:
2306        case OP_CHARI:
2307        case OP_NOT:
2308        case OP_NOTI:
2309        case OP_STAR:
2310        case OP_MINSTAR:
2311        case OP_PLUS:
2312        case OP_MINPLUS:
2313        case OP_QUERY:
2314        case OP_MINQUERY:
2315        case OP_UPTO:
2316        case OP_MINUPTO:
2317        case OP_EXACT:
2318        case OP_POSSTAR:
2319        case OP_POSPLUS:
2320        case OP_POSQUERY:
2321        case OP_POSUPTO:
2322        case OP_STARI:
2323        case OP_MINSTARI:
2324        case OP_PLUSI:
2325        case OP_MINPLUSI:
2326        case OP_QUERYI:
2327        case OP_MINQUERYI:
2328        case OP_UPTOI:
2329        case OP_MINUPTOI:
2330        case OP_EXACTI:
2331        case OP_POSSTARI:
2332        case OP_POSPLUSI:
2333        case OP_POSQUERYI:
2334        case OP_POSUPTOI:
2335        case OP_NOTSTAR:
2336        case OP_NOTMINSTAR:
2337        case OP_NOTPLUS:
2338        case OP_NOTMINPLUS:
2339        case OP_NOTQUERY:
2340        case OP_NOTMINQUERY:
2341        case OP_NOTUPTO:
2342        case OP_NOTMINUPTO:
2343        case OP_NOTEXACT:
2344        case OP_NOTPOSSTAR:
2345        case OP_NOTPOSPLUS:
2346        case OP_NOTPOSQUERY:
2347        case OP_NOTPOSUPTO:
2348        case OP_NOTSTARI:
2349        case OP_NOTMINSTARI:
2350        case OP_NOTPLUSI:
2351        case OP_NOTMINPLUSI:
2352        case OP_NOTQUERYI:
2353        case OP_NOTMINQUERYI:
2354        case OP_NOTUPTOI:
2355        case OP_NOTMINUPTOI:
2356        case OP_NOTEXACTI:
2357        case OP_NOTPOSSTARI:
2358        case OP_NOTPOSPLUSI:
2359        case OP_NOTPOSQUERYI:
2360        case OP_NOTPOSUPTOI:
2361        if (utf) utf16_char = TRUE;
2362    #endif
2363        /* Fall through. */
2364    
2365        default:
2366        length = OP_lengths16[op] - 1;
2367        break;
2368    
2369        case OP_CLASS:
2370        case OP_NCLASS:
2371        /* Skip the character bit map. */
2372        ptr += 32/sizeof(pcre_uint16);
2373        length = 0;
2374        break;
2375    
2376        case OP_XCLASS:
2377        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2378        if (LINK_SIZE > 1)
2379          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2380            - (1 + LINK_SIZE + 1));
2381        else
2382          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2383    
2384        /* Reverse the size of the XCLASS instance. */
2385        *ptr = swap_uint16(*ptr);
2386        ptr++;
2387        if (LINK_SIZE > 1)
2388          {
2389          *ptr = swap_uint16(*ptr);
2390          ptr++;
2391          }
2392    
2393        op = *ptr;
2394        *ptr = swap_uint16(op);
2395        ptr++;
2396        if ((op & XCL_MAP) != 0)
2397          {
2398          /* Skip the character bit map. */
2399          ptr += 32/sizeof(pcre_uint16);
2400          length -= 32/sizeof(pcre_uint16);
2401          }
2402        break;
2403        }
2404      }
2405    /* Control should never reach here in 16 bit mode. */
2406    #endif /* SUPPORT_PCRE16 */
2407    }
2408    #endif /* SUPPORT_PCRE[8|16] */
2409    
2410    
2411    
2412    #if defined SUPPORT_PCRE32
2413    static void
2414    regexflip_32(pcre *ere, pcre_extra *extra)
2415    {
2416    real_pcre32 *re = (real_pcre32 *)ere;
2417    int op;
2418    pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2419    int length = re->name_count * re->name_entry_size;
2420    #ifdef SUPPORT_UTF
2421    BOOL utf = (re->options & PCRE_UTF32) != 0;
2422    #endif /* SUPPORT_UTF */
2423    
2424    /* Always flip the bytes in the main data block and study blocks. */
2425    
2426    re->magic_number = REVERSED_MAGIC_NUMBER;
2427    re->size = swap_uint32(re->size);
2428    re->options = swap_uint32(re->options);
2429    re->flags = swap_uint16(re->flags);
2430    re->top_bracket = swap_uint16(re->top_bracket);
2431    re->top_backref = swap_uint16(re->top_backref);
2432    re->first_char = swap_uint32(re->first_char);
2433    re->req_char = swap_uint32(re->req_char);
2434    re->name_table_offset = swap_uint16(re->name_table_offset);
2435    re->name_entry_size = swap_uint16(re->name_entry_size);
2436    re->name_count = swap_uint16(re->name_count);
2437    
2438    if (extra != NULL)
2439      {
2440      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2441      rsd->size = swap_uint32(rsd->size);
2442      rsd->flags = swap_uint32(rsd->flags);
2443      rsd->minlength = swap_uint32(rsd->minlength);
2444      }
2445    
2446    /* In 32-bit mode we must swap bytes
2447    in the name table, if present, and then in the pattern itself. */
2448    
2449    while(TRUE)
2450      {
2451      /* Swap previous characters. */
2452      while (length-- > 0)
2453        {
2454        *ptr = swap_uint32(*ptr);
2455        ptr++;
2456        }
2457    
2458      /* Get next opcode. */
2459    
2460      length = 0;
2461      op = *ptr;
2462      *ptr++ = swap_uint32(op);
2463    
2464      switch (op)
2465        {
2466        case OP_END:
2467        return;
2468    
2469        default:
2470        length = OP_lengths32[op] - 1;
2471        break;
2472    
2473        case OP_CLASS:
2474        case OP_NCLASS:
2475        /* Skip the character bit map. */
2476        ptr += 32/sizeof(pcre_uint32);
2477        length = 0;
2478        break;
2479    
2480        case OP_XCLASS:
2481        /* LINK_SIZE can only be 1 in 32-bit mode. */
2482        length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2483    
2484        /* Reverse the size of the XCLASS instance. */
2485        *ptr = swap_uint32(*ptr);
2486        ptr++;
2487    
2488        op = *ptr;
2489        *ptr = swap_uint32(op);
2490        ptr++;
2491        if ((op & XCL_MAP) != 0)
2492          {
2493          /* Skip the character bit map. */
2494          ptr += 32/sizeof(pcre_uint32);
2495          length -= 32/sizeof(pcre_uint32);
2496          }
2497        break;
2498        }
2499      }
2500    /* Control should never reach here in 32 bit mode. */
2501  }  }
2502    
2503    #endif /* SUPPORT_PCRE32 */
2504    
2505    
2506    
2507    static void
2508    regexflip(pcre *ere, pcre_extra *extra)
2509    {
2510    #if defined SUPPORT_PCRE32
2511      if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2512        regexflip_32(ere, extra);
2513    #endif
2514    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2515      if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2516        regexflip8_or_16(ere, extra);
2517    #endif
2518    }
2519    
2520    
2521    
# Line 1072  return ((value & 0x000000ff) << 24) | Line 2524  return ((value & 0x000000ff) << 24) |
2524  *************************************************/  *************************************************/
2525    
2526  static int  static int
2527  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2528    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
2529    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
2530  {  {
# Line 1087  for (;;) Line 2539  for (;;)
2539    {    {
2540    *limit = mid;    *limit = mid;
2541    
2542    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2543      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2544    
2545    if (count == errnumber)    if (count == errnumber)
# Line 1132  Returns:    < 0, = 0, or > 0, according Line 2584  Returns:    < 0, = 0, or > 0, according
2584  */  */
2585    
2586  static int  static int
2587  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2588  {  {
2589  while (n--)  while (n--)
2590    {    {
# Line 1159  Returns:      appropriate PCRE_NEWLINE_x Line 2611  Returns:      appropriate PCRE_NEWLINE_x
2611  */  */
2612    
2613  static int  static int
2614  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2615  {  {
2616  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2617  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2618  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2619  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2620  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2621  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2622  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2623  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2624  return 0;  return 0;
2625  }  }
# Line 1183  usage(void) Line 2635  usage(void)
2635  {  {
2636  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2637  printf("Input and output default to stdin and stdout.\n");  printf("Input and output default to stdin and stdout.\n");
2638  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2639  printf("If input is a terminal, readline() is used to read from it.\n");  printf("If input is a terminal, readline() is used to read from it.\n");
2640  #else  #else
2641  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2642  #endif  #endif
2643  printf("\nOptions:\n");  printf("\nOptions:\n");
2644  printf("  -b       show compiled code (bytecode)\n");  #ifdef SUPPORT_PCRE16
2645    printf("  -16      use the 16-bit library\n");
2646    #endif
2647    #ifdef SUPPORT_PCRE32
2648    printf("  -32      use the 32-bit library\n");
2649    #endif
2650    printf("  -b       show compiled code\n");
2651  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2652    printf("  -C arg   show a specific compile-time option\n");
2653    printf("           and exit with its value. The arg can be:\n");
2654    printf("     linksize     internal link size [2, 3, 4]\n");
2655    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2656    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2657    printf("     pcre32       32 bit library support enabled [0, 1]\n");
2658    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2659    printf("     ucp          Unicode Properties supported [0, 1]\n");
2660    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2661    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2662  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2663  #if !defined NODFA  #if !defined NODFA
2664  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
# Line 1207  printf("  -q       quiet: do not output Line 2675  printf("  -q       quiet: do not output
2675  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2676  printf("  -s       force each pattern to be studied at basic level\n"  printf("  -s       force each pattern to be studied at basic level\n"
2677         "  -s+      force each pattern to be studied, using JIT if available\n"         "  -s+      force each pattern to be studied, using JIT if available\n"
2678           "  -s++     ditto, verifying when JIT was actually used\n"
2679           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2680           "             where 1 <= n <= 7 selects JIT options\n"
2681           "  -s++n    ditto, verifying when JIT was actually used\n"
2682         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2683  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2684  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 1226  options, followed by a set of test data, Line 2698  options, followed by a set of test data,
2698  int main(int argc, char **argv)  int main(int argc, char **argv)
2699  {  {
2700  FILE *infile = stdin;  FILE *infile = stdin;
2701    const char *version;
2702  int options = 0;  int options = 0;
2703  int study_options = 0;  int study_options = 0;
2704  int default_find_match_limit = FALSE;  int default_find_match_limit = FALSE;
# Line 1240  int quiet = 0; Line 2713  int quiet = 0;
2713  int size_offsets = 45;  int size_offsets = 45;
2714  int size_offsets_max;  int size_offsets_max;
2715  int *offsets = NULL;  int *offsets = NULL;
 #if !defined NOPOSIX  
 int posix = 0;  
 #endif  
2716  int debug = 0;  int debug = 0;
2717  int done = 0;  int done = 0;
2718  int all_use_dfa = 0;  int all_use_dfa = 0;
2719    int verify_jit = 0;
2720  int yield = 0;  int yield = 0;
2721  int stack_size;  int stack_size;
2722    
2723  pcre_jit_stack *jit_stack = NULL;  #if !defined NOPOSIX
2724    int posix = 0;
2725    #endif
2726    #if !defined NODFA
2727    int *dfa_workspace = NULL;
2728    #endif
2729    
2730  /* These vectors store, end-to-end, a list of captured substring names. Assume  pcre_jit_stack *jit_stack = NULL;
 that 1024 is plenty long enough for the few names we'll be testing. */  
2731    
2732  uschar copynames[1024];  /* These vectors store, end-to-end, a list of zero-terminated captured
2733  uschar getnames[1024];  substring names, each list itself being terminated by an empty name. Assume
2734    that 1024 is plenty long enough for the few names we'll be testing. It is
2735    easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2736    for the actual memory, to ensure alignment. */
2737    
2738    pcre_uint32 copynames[1024];
2739    pcre_uint32 getnames[1024];
2740    
2741    #ifdef SUPPORT_PCRE32
2742    pcre_uint32 *cn32ptr;
2743    pcre_uint32 *gn32ptr;
2744    #endif
2745    
2746  uschar *copynamesptr;  #ifdef SUPPORT_PCRE16
2747  uschar *getnamesptr;  pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2748    pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2749    pcre_uint16 *cn16ptr;
2750    pcre_uint16 *gn16ptr;
2751    #endif
2752    
2753  /* Get buffers from malloc() so that Electric Fence will check their misuse  #ifdef SUPPORT_PCRE8
2754  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2755    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2756    pcre_uint8 *cn8ptr;
2757    pcre_uint8 *gn8ptr;
2758    #endif
2759    
2760  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2761  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-
2762  pbuffer = (unsigned char *)malloc(buffer_size);  and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2763    
2764    buffer = (pcre_uint8 *)malloc(buffer_size);
2765    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2766    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2767    
2768  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2769    
# Line 1281  it set 0x8000, but then I was advised th Line 2778  it set 0x8000, but then I was advised th
2778  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2779  #endif  #endif
2780    
2781    /* Get the version number: both pcre_version() and pcre16_version() give the
2782    same answer. We just need to ensure that we call one that is available. */
2783    
2784    #if defined SUPPORT_PCRE8
2785    version = pcre_version();
2786    #elif defined SUPPORT_PCRE16
2787    version = pcre16_version();
2788    #elif defined SUPPORT_PCRE32
2789    version = pcre32_version();
2790    #endif
2791    
2792  /* Scan options */  /* Scan options */
2793    
2794  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2795    {    {
2796    unsigned char *endptr;    pcre_uint8 *endptr;
2797      char *arg = argv[op];
2798    
2799      if (strcmp(arg, "-m") == 0) showstore = 1;
2800      else if (strcmp(arg, "-s") == 0) force_study = 0;
2801    
2802    if (strcmp(argv[op], "-m") == 0) showstore = 1;    else if (strncmp(arg, "-s+", 3) == 0)
   else if (strcmp(argv[op], "-s") == 0) force_study = 0;  
   else if (strcmp(argv[op], "-s+") == 0)  
2803      {      {
2804        arg += 3;
2805        if (*arg == '+') { arg++; verify_jit = TRUE; }
2806      force_study = 1;      force_study = 1;
2807      force_study_options = PCRE_STUDY_JIT_COMPILE;      if (*arg == 0)
2808          force_study_options = jit_study_bits[6];
2809        else if (*arg >= '1' && *arg <= '7')
2810          force_study_options = jit_study_bits[*arg - '1'];
2811        else goto BAD_ARG;
2812        }
2813      else if (strcmp(arg, "-16") == 0)
2814        {
2815    #ifdef SUPPORT_PCRE16
2816        pcre_mode = PCRE16_MODE;
2817    #else
2818        printf("** This version of PCRE was built without 16-bit support\n");
2819        exit(1);
2820    #endif
2821        }
2822      else if (strcmp(arg, "-32") == 0)
2823        {
2824    #ifdef SUPPORT_PCRE32
2825        pcre_mode = PCRE32_MODE;
2826    #else
2827        printf("** This version of PCRE was built without 32-bit support\n");
2828        exit(1);
2829    #endif
2830      }      }
2831    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(arg, "-q") == 0) quiet = 1;
2832    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(arg, "-b") == 0) debug = 1;
2833    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(arg, "-i") == 0) showinfo = 1;
2834    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2835    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;    else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2836  #if !defined NODFA  #if !defined NODFA
2837    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2838  #endif  #endif
2839    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2840        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2841          *endptr == 0))          *endptr == 0))
2842      {      {
2843      op++;      op++;
2844      argc--;      argc--;
2845      }      }
2846    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2847      {      {
2848      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2849      int temp;      int temp;
2850      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2851                       *endptr == 0))                       *endptr == 0))
2852        {        {
2853        timeitm = temp;        timeitm = temp;
# Line 1323  while (argc > 1 && argv[op][0] == '-') Line 2857  while (argc > 1 && argv[op][0] == '-')
2857      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2858      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2859      }      }
2860    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2861        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2862          *endptr == 0))          *endptr == 0))
2863      {      {
2864  #if defined(_WIN32) || defined(WIN32) || defined(__minix)  #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2865      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2866      exit(1);      exit(1);
2867  #else  #else
# Line 1346  while (argc > 1 && argv[op][0] == '-') Line 2880  while (argc > 1 && argv[op][0] == '-')
2880  #endif  #endif
2881      }      }
2882  #if !defined NOPOSIX  #if !defined NOPOSIX
2883    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2884  #endif  #endif
2885    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2886      {      {
2887      int rc;      int rc;
2888      unsigned long int lrc;      unsigned long int lrc;
2889      printf("PCRE version %s\n", pcre_version());  
2890        if (argc > 2)
2891          {
2892          if (strcmp(argv[op + 1], "linksize") == 0)
2893            {
2894            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2895            printf("%d\n", rc);
2896            yield = rc;
2897            }
2898          else if (strcmp(argv[op + 1], "pcre8") == 0)
2899            {
2900    #ifdef SUPPORT_PCRE8
2901            printf("1\n");
2902            yield = 1;
2903    #else
2904            printf("0\n");
2905            yield = 0;
2906    #endif
2907            }
2908          else if (strcmp(argv[op + 1], "pcre16") == 0)
2909            {
2910    #ifdef SUPPORT_PCRE16
2911            printf("1\n");
2912            yield = 1;
2913    #else
2914            printf("0\n");
2915            yield = 0;
2916    #endif
2917            }
2918          else if (strcmp(argv[op + 1], "pcre32") == 0)
2919            {
2920    #ifdef SUPPORT_PCRE32
2921            printf("1\n");
2922            yield = 1;
2923    #else
2924            printf("0\n");
2925            yield = 0;
2926    #endif
2927            goto EXIT;
2928            }
2929          if (strcmp(argv[op + 1], "utf") == 0)
2930            {
2931    #ifdef SUPPORT_PCRE8
2932            if (pcre_mode == PCRE8_MODE)
2933              (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2934    #endif
2935    #ifdef SUPPORT_PCRE16
2936            if (pcre_mode == PCRE16_MODE)
2937              (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2938    #endif
2939    #ifdef SUPPORT_PCRE32
2940            if (pcre_mode == PCRE32_MODE)
2941              (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
2942    #endif
2943            printf("%d\n", rc);
2944            yield = rc;
2945            goto EXIT;
2946            }
2947          else if (strcmp(argv[op + 1], "ucp") == 0)
2948            {
2949            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2950            printf("%d\n", rc);
2951            yield = rc;
2952            }
2953          else if (strcmp(argv[op + 1], "jit") == 0)
2954            {
2955            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2956            printf("%d\n", rc);
2957            yield = rc;
2958            }
2959          else if (strcmp(argv[op + 1], "newline") == 0)
2960            {
2961            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2962            print_newline_config(rc, TRUE);
2963            }
2964          else if (strcmp(argv[op + 1], "ebcdic") == 0)
2965            {
2966    #ifdef EBCDIC
2967            printf("1\n");
2968            yield = 1;
2969    #else
2970            printf("0\n");
2971    #endif
2972            }
2973          else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2974            {
2975    #ifdef EBCDIC
2976            printf("0x%02x\n", CHAR_LF);
2977    #else
2978            printf("0\n");
2979    #endif
2980            }
2981          else
2982            {
2983            printf("Unknown -C option: %s\n", argv[op + 1]);
2984            }
2985          goto EXIT;
2986          }
2987    
2988        /* No argument for -C: output all configuration information. */
2989    
2990        printf("PCRE version %s\n", version);
2991      printf("Compiled with\n");      printf("Compiled with\n");
2992    
2993    #ifdef EBCDIC
2994        printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2995    #endif
2996    
2997    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2998    are set, either both UTFs are supported or both are not supported. */
2999    
3000    #ifdef SUPPORT_PCRE8
3001        printf("  8-bit support\n");
3002      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3003      printf("  %sUTF-8 support\n", rc? "" : "No ");        printf ("  %sUTF-8 support\n", rc ? "" : "No ");
3004      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #endif
3005    #ifdef SUPPORT_PCRE16
3006        printf("  16-bit support\n");
3007        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3008        printf ("  %sUTF-16 support\n", rc ? "" : "No ");
3009    #endif
3010    #ifdef SUPPORT_PCRE32
3011        printf("  32-bit support\n");
3012        (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3013        printf ("  %sUTF-32 support\n", rc ? "" : "No ");
3014    #endif
3015    
3016        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3017      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
3018      (void)pcre_config(PCRE_CONFIG_JIT, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3019      if (rc)      if (rc)
3020        printf("  Just-in-time compiler support\n");        {
3021          const char *arch;
3022          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3023          printf("  Just-in-time compiler support: %s\n", arch);
3024          }
3025      else      else
3026        printf("  No just-in-time compiler support\n");        printf("  No just-in-time compiler support\n");
3027      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3028      /* Note that these values are always the ASCII values, even      print_newline_config(rc, FALSE);
3029      in EBCDIC environments. CR is 13 and NL is 10. */      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
     printf("  Newline sequence is %s\n", (rc == 13)? "CR" :  
       (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :  
       (rc == -2)? "ANYCRLF" :  
       (rc == -1)? "ANY" : "???");  
     (void)pcre_config(PCRE_CONFIG_BSR, &rc);  
3030      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3031                                       "all Unicode newlines");                                       "all Unicode newlines");
3032      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3033      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
3034      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3035      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
3036      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3037      printf("  Default match limit = %ld\n", lrc);      printf("  Default match limit = %ld\n", lrc);
3038      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3039      printf("  Default recursion depth limit = %ld\n", lrc);      printf("  Default recursion depth limit = %ld\n", lrc);
3040      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3041      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
3042        if (showstore)
3043          {
3044          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3045          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3046          }
3047        printf("\n");
3048      goto EXIT;      goto EXIT;
3049      }      }
3050    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
3051             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
3052      {      {
3053      usage();      usage();
3054      goto EXIT;      goto EXIT;
3055      }      }
3056    else    else
3057      {      {
3058      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
3059        printf("** Unknown or malformed option %s\n", arg);
3060      usage();      usage();
3061      yield = 1;      yield = 1;
3062      goto EXIT;      goto EXIT;
# Line 1440  if (argc > 2) Line 3103  if (argc > 2)
3103    
3104  /* Set alternative malloc function */  /* Set alternative malloc function */
3105    
3106    #ifdef SUPPORT_PCRE8
3107  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
3108  pcre_free = new_free;  pcre_free = new_free;
3109  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
3110  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
3111    #endif
3112    
3113    #ifdef SUPPORT_PCRE16
3114    pcre16_malloc = new_malloc;
3115    pcre16_free = new_free;
3116    pcre16_stack_malloc = stack_malloc;
3117    pcre16_stack_free = stack_free;
3118    #endif
3119    
3120    #ifdef SUPPORT_PCRE32
3121    pcre32_malloc = new_malloc;
3122    pcre32_free = new_free;
3123    pcre32_stack_malloc = stack_malloc;
3124    pcre32_stack_free = stack_free;
3125    #endif
3126    
3127  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
3128    
3129  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3130    
3131  /* Main loop */  /* Main loop */
3132    
# Line 1462  while (!done) Line 3141  while (!done)
3141  #endif  #endif
3142    
3143    const char *error;    const char *error;
3144    unsigned char *markptr;    pcre_uint8 *markptr;
3145    unsigned char *p, *pp, *ppp;    pcre_uint8 *p, *pp, *ppp;
3146    unsigned char *to_file = NULL;    pcre_uint8 *to_file = NULL;
3147    const unsigned char *tables = NULL;    const pcre_uint8 *tables = NULL;
3148      unsigned long int get_options;
3149    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
3150    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
3151    int do_allcaps = 0;    int do_allcaps = 0;
# Line 1481  while (!done) Line 3161  while (!done)
3161    int do_flip = 0;    int do_flip = 0;
3162    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
3163    
3164    use_utf8 = 0;  #if !defined NODFA
3165      int dfa_matched = 0;
3166    #endif
3167    
3168      use_utf = 0;
3169    debug_lengths = 1;    debug_lengths = 1;
3170    
3171    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1496  while (!done) Line 3180  while (!done)
3180    
3181    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3182      {      {
3183      unsigned long int magic, get_options;      pcre_uint32 magic;
3184      uschar sbuf[8];      pcre_uint8 sbuf[8];
3185      FILE *f;      FILE *f;
3186    
3187      p++;      p++;
3188        if (*p == '!')
3189          {
3190          do_debug = TRUE;
3191          do_showinfo = TRUE;
3192          p++;
3193          }
3194    
3195      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
3196      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
3197      *pp = 0;      *pp = 0;
# Line 1512  while (!done) Line 3203  while (!done)
3203        continue;        continue;
3204        }        }
3205    
3206        first_gotten_store = 0;
3207      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3208    
3209      true_size =      true_size =
# Line 1519  while (!done) Line 3211  while (!done)
3211      true_study_size =      true_study_size =
3212        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3213    
3214      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
3215      regex_gotten_store = gotten_store;      if (re == NULL)
3216          {
3217          printf("** Failed to get %d bytes of memory for pcre object\n",
3218            (int)true_size);
3219          yield = 1;
3220          goto EXIT;
3221          }
3222        regex_gotten_store = first_gotten_store;
3223    
3224      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3225    
3226      magic = ((real_pcre *)re)->magic_number;      magic = REAL_PCRE_MAGIC(re);
3227      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
3228        {        {
3229        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
3230          {          {
3231          do_flip = 1;          do_flip = 1;
3232          }          }
3233        else        else
3234          {          {
3235          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3236            new_free(re);
3237          fclose(f);          fclose(f);
3238          continue;          continue;
3239          }          }
3240        }        }
3241    
3242        /* We hide the byte-invert info for little and big endian tests. */
3243      fprintf(outfile, "Compiled pattern%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3244        do_flip? " (byte-inverted)" : "", p);        do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
   
     /* Need to know if UTF-8 for printing data strings */  
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
3245    
3246      /* Now see if there is any following study data. */      /* Now see if there is any following study data. */
3247    
# Line 1563  while (!done) Line 3259  while (!done)
3259          {          {
3260          FAIL_READ:          FAIL_READ:
3261          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
3262          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL)
3263          if (re != NULL) new_free(re);            {
3264              PCRE_FREE_STUDY(extra);
3265              }
3266            new_free(re);
3267          fclose(f);          fclose(f);
3268          continue;          continue;
3269          }          }
# Line 1573  while (!done) Line 3272  while (!done)
3272        }        }
3273      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
3274    
3275        /* Flip the necessary bytes. */
3276        if (do_flip)
3277          {
3278          int rc;
3279          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3280          if (rc == PCRE_ERROR_BADMODE)
3281            {
3282            /* Simulate the result of the function call below. */
3283            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3284              pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3285              PCRE_INFO_OPTIONS);
3286            fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3287              "%d-bit mode\n", 8 * CHAR_SIZE,
3288              8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3289            new_free(re);
3290            fclose(f);
3291            continue;
3292            }
3293          }
3294    
3295        /* Need to know if UTF-8 for printing data strings. */
3296    
3297        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3298          {
3299          new_free(re);
3300          fclose(f);
3301          continue;
3302          }
3303        use_utf = (get_options & PCRE_UTF8) != 0;
3304    
3305      fclose(f);      fclose(f);
3306      goto SHOW_INFO;      goto SHOW_INFO;
3307      }      }
3308    
3309    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
3310    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
3311    
3312    delimiter = *p++;    delimiter = *p++;
3313    
# Line 1629  while (!done) Line 3358  while (!done)
3358    /* Look for options after final delimiter */    /* Look for options after final delimiter */
3359    
3360    options = 0;    options = 0;
3361      study_options = force_study_options;
3362    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
3363    
3364    while (*pp != 0)    while (*pp != 0)
# Line 1665  while (!done) Line 3395  while (!done)
3395  #endif  #endif
3396    
3397        case 'S':        case 'S':
3398        if (do_study == 0)        do_study = 1;
3399          for (;;)
3400          {          {
3401          do_study = 1;          switch (*pp++)
         if (*pp == '+')  
3402            {            {
3403            study_options |= PCRE_STUDY_JIT_COMPILE;            case 'S':
3404            pp++;            do_study = 0;
3405              no_force_study = 1;
3406              break;
3407    
3408              case '!':
3409              study_options |= PCRE_STUDY_EXTRA_NEEDED;
3410              break;
3411    
3412              case '+':
3413              if (*pp == '+')
3414                {
3415                verify_jit = TRUE;
3416                pp++;
3417                }
3418              if (*pp >= '1' && *pp <= '7')
3419                study_options |= jit_study_bits[*pp++ - '1'];
3420              else
3421                study_options |= jit_study_bits[6];
3422              break;
3423    
3424              case '-':
3425              study_options &= ~PCRE_STUDY_ALLJIT;
3426              break;
3427    
3428              default:
3429              pp--;
3430              goto ENDLOOP;
3431            }            }
3432          }          }
3433        else        ENDLOOP:
         {  
         do_study = 0;  
         no_force_study = 1;  
         }  
3434        break;        break;
3435    
3436        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
# Line 1686  while (!done) Line 3438  while (!done)
3438        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
3439        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3440        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
3441        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
3442        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
3443    
3444        case 'T':        case 'T':
# Line 1720  while (!done) Line 3472  while (!done)
3472          goto SKIP_DATA;          goto SKIP_DATA;
3473          }          }
3474        locale_set = 1;        locale_set = 1;
3475        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
3476        pp = ppp;        pp = ppp;
3477        break;        break;
3478    
# Line 1733  while (!done) Line 3485  while (!done)
3485    
3486        case '<':        case '<':
3487          {          {
3488          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3489            {            {
3490            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
3491            pp += 3;            pp += 3;
# Line 1761  while (!done) Line 3513  while (!done)
3513    
3514    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
3515    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
3516    local character tables. */    local character tables. Neither does it have 16-bit support. */
3517    
3518  #if !defined NOPOSIX  #if !defined NOPOSIX
3519    if (posix || do_posix)    if (posix || do_posix)
# Line 1777  while (!done) Line 3529  while (!done)
3529      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3530      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3531    
3532        first_gotten_store = 0;
3533      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
3534    
3535      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1796  while (!done) Line 3549  while (!done)
3549  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
3550    
3551      {      {
3552      unsigned long int get_options;      /* In 16- or 32-bit mode, convert the input. */
3553    
3554    #ifdef SUPPORT_PCRE16
3555        if (pcre_mode == PCRE16_MODE)
3556          {
3557          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3558            {
3559            case -1:
3560            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3561              "converted to UTF-16\n");
3562            goto SKIP_DATA;
3563    
3564            case -2:
3565            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3566              "cannot be converted to UTF-16\n");
3567            goto SKIP_DATA;
3568    
3569            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3570            fprintf(outfile, "**Failed: character value greater than 0xffff "
3571              "cannot be converted to 16-bit in non-UTF mode\n");
3572            goto SKIP_DATA;
3573    
3574            default:
3575            break;
3576            }
3577          p = (pcre_uint8 *)buffer16;
3578          }
3579    #endif
3580    
3581    #ifdef SUPPORT_PCRE32
3582        if (pcre_mode == PCRE32_MODE)
3583          {
3584          switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3585            {
3586            case -1:
3587            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3588              "converted to UTF-32\n");
3589            goto SKIP_DATA;
3590    
3591            case -2:
3592            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3593              "cannot be converted to UTF-32\n");
3594            goto SKIP_DATA;
3595    
3596            case -3:
3597            fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3598            goto SKIP_DATA;
3599    
3600            default:
3601            break;
3602            }
3603          p = (pcre_uint8 *)buffer32;
3604          }
3605    #endif
3606    
3607        /* Compile many times when timing */
3608    
3609      if (timeit > 0)      if (timeit > 0)
3610        {        {
# Line 1805  while (!done) Line 3613  while (!done)
3613        clock_t start_time = clock();        clock_t start_time = clock();
3614        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
3615          {          {
3616          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3617          if (re != NULL) free(re);          if (re != NULL) free(re);
3618          }          }
3619        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1814  while (!done) Line 3622  while (!done)
3622            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
3623        }        }
3624    
3625      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
3626        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3627    
3628      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
3629      if non-interactive. */      if non-interactive. */
# Line 1845  while (!done) Line 3654  while (!done)
3654      within the regex; check for this so that we know how to process the data      within the regex; check for this so that we know how to process the data
3655      lines. */      lines. */
3656    
3657      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3658      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;        goto SKIP_DATA;
3659        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
     /* Print information if required. There are now two info-returning  
     functions. The old one has a limited interface and returns only limited  
     data. Check that it agrees with the newer one. */  
   
     if (log_store)  
       fprintf(outfile, "Memory allocation (code space): %d\n",  
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
3660    
3661      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3662      and remember the store that was got. */      and remember the store that was got. */
3663    
3664      true_size = ((real_pcre *)re)->size;      true_size = REAL_PCRE_SIZE(re);
3665      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3666    
3667        /* Output code size information if requested */
3668    
3669        if (log_store)
3670          {
3671          int name_count, name_entry_size, real_pcre_size;
3672    
3673          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3674          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3675    #ifdef SUPPORT_PCRE8
3676          if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3677            real_pcre_size = sizeof(real_pcre);
3678    #endif
3679    #ifdef SUPPORT_PCRE16
3680          if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3681            real_pcre_size = sizeof(real_pcre16);
3682    #endif
3683    #ifdef SUPPORT_PCRE32
3684          if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3685            real_pcre_size = sizeof(real_pcre32);
3686    #endif
3687          fprintf(outfile, "Memory allocation (code space): %d\n",
3688            (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3689          }
3690    
3691      /* If -s or /S was present, study the regex to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
3692      help with the matching, unless the pattern has the SS option, which      help with the matching, unless the pattern has the SS option, which
# Line 1877  while (!done) Line 3701  while (!done)
3701          clock_t time_taken;          clock_t time_taken;
3702          clock_t start_time = clock();          clock_t start_time = clock();
3703          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3704            extra = pcre_study(re, study_options | force_study_options, &error);            {
3705              PCRE_STUDY(extra, re, study_options, &error);
3706              }
3707          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3708          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL)
3709              {
3710              PCRE_FREE_STUDY(extra);
3711              }
3712          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3713            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3714              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3715          }          }
3716        extra = pcre_study(re, study_options | force_study_options, &error);        PCRE_STUDY(extra, re, study_options, &error);
3717        if (error != NULL)        if (error != NULL)
3718          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3719        else if (extra != NULL)        else if (extra != NULL)
3720            {
3721          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3722            if (log_store)
3723              {
3724              size_t jitsize;
3725              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3726                  jitsize != 0)
3727                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3728              }
3729            }
3730        }        }
3731    
3732      /* If /K was present, we set up for handling MARK data. */      /* If /K was present, we set up for handling MARK data. */
# Line 1904  while (!done) Line 3742  while (!done)
3742        extra->flags |= PCRE_EXTRA_MARK;        extra->flags |= PCRE_EXTRA_MARK;
3743        }        }
3744    
3745      /* If the 'F' option was present, we flip the bytes of all the integer      /* Extract and display information from the compiled data if required. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
   
     if (do_flip)  
       {  
       real_pcre *rre = (real_pcre *)re;  
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));  
         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));  
         }  
       }  
   
     /* Extract information from the compiled data if required */  
3746    
3747      SHOW_INFO:      SHOW_INFO:
3748    
3749      if (do_debug)      if (do_debug)
3750        {        {
3751        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3752        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3753        }        }
3754    
3755      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1956  while (!done) Line 3757  while (!done)
3757      if (do_showinfo)      if (do_showinfo)
3758        {        {
3759        unsigned long int all_options;        unsigned long int all_options;
3760  #if !defined NOINFOCHECK        pcre_uint32 first_char, need_char;
3761        int old_first_char, old_options, old_count;        int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
3762  #endif          hascrorlf, maxlookbehind;
       int count, backrefmax, first_char, need_char, okpartial, jchanged,  
         hascrorlf;  
3763        int nameentrysize, namecount;        int nameentrysize, namecount;
3764        const uschar *nametable;        const pcre_uint8 *nametable;
3765    
3766        new_info(re, NULL, PCRE_INFO_SIZE, &size);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3767        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3768        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3769        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_FIRSTLITERAL, &first_char) +
3770        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_FIRSTLITERALSET, &first_char_set) +
3771        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_LASTLITERAL2, &need_char) +
3772        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_LASTLITERAL2SET, &need_char_set) +
3773        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3774        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3775        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3776        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3777              new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3778  #if !defined NOINFOCHECK            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3779        old_count = pcre_info(re, &old_options, &old_first_char);            new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3780        if (count < 0) fprintf(outfile,            != 0)
3781          "Error %d from pcre_info()\n", count);          goto SKIP_DATA;
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3782    
3783        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3784          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 2009  while (!done) Line 3793  while (!done)
3793          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3794          while (namecount-- > 0)          while (namecount-- > 0)
3795            {            {
3796            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,            int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
3797              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int length = (int)STRLEN(nametable + imm2_size);
3798              GET2(nametable, 0));            fprintf(outfile, "  ");
3799            nametable += nameentrysize;            PCHARSV(nametable, imm2_size, length, outfile);
3800              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3801    #ifdef SUPPORT_PCRE32
3802              if (pcre_mode == PCRE32_MODE)
3803                fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
3804    #endif
3805    #ifdef SUPPORT_PCRE16
3806              if (pcre_mode == PCRE16_MODE)
3807                fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
3808    #endif
3809    #ifdef SUPPORT_PCRE8
3810              if (pcre_mode == PCRE8_MODE)
3811                fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
3812    #endif
3813              nametable += nameentrysize * CHAR_SIZE;
3814            }            }
3815          }          }
3816    
3817        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3818        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3819    
3820        all_options = ((real_pcre *)re)->options;        all_options = REAL_PCRE_OPTIONS(re);
3821        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3822    
3823        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3824          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 2036  while (!done) Line 3834  while (!done)
3834            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3835            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3836            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3837            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3838            ((get_options & PCRE_UCP) != 0)? " ucp" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3839            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3840            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3841            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3842    
# Line 2070  while (!done) Line 3868  while (!done)
3868          break;          break;
3869          }          }
3870    
3871        if (first_char == -1)        if (first_char_set == 2)
3872          {          {
3873          fprintf(outfile, "First char at start or follows newline\n");          fprintf(outfile, "First char at start or follows newline\n");
3874          }          }
3875        else if (first_char < 0)        else if (first_char_set == 1)
3876          {          {
3877          fprintf(outfile, "No first char\n");          const char *caseless =
3878              ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
3879              "" : " (caseless)";
3880    
3881            if (PRINTOK(first_char))
3882              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3883            else
3884              {
3885              fprintf(outfile, "First char = ");
3886              pchar(first_char, outfile);
3887              fprintf(outfile, "%s\n", caseless);
3888              }
3889          }          }
3890        else        else
3891          {          {
3892          int ch = first_char & 255;          fprintf(outfile, "No first char\n");
         const char *caseless = ((first_char & REQ_CASELESS) == 0)?  
           "" : " (caseless)";  
         if (PRINTHEX(ch))  
           fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);  
         else  
           fprintf(outfile, "First char = %d%s\n", ch, caseless);  
3893          }          }
3894    
3895        if (need_char < 0)        if (need_char_set == 0)
3896          {          {
3897          fprintf(outfile, "No need char\n");          fprintf(outfile, "No need char\n");
3898          }          }
3899        else        else
3900          {          {
3901          int ch = need_char & 255;          const char *caseless =
3902          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
3903            "" : " (caseless)";            "" : " (caseless)";
3904          if (PRINTHEX(ch))  
3905            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3906              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3907          else          else
3908            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3909              fprintf(outfile, "Need char = ");
3910              pchar(need_char, outfile);
3911              fprintf(outfile, "%s\n", caseless);
3912              }
3913          }          }
3914    
3915          if (maxlookbehind > 0)
3916            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3917    
3918        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3919        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3920        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
# Line 2118  while (!done) Line 3929  while (!done)
3929            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3930          else          else
3931            {            {
3932            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3933            int minlength;            int minlength;
3934    
3935            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3936            fprintf(outfile, "Subject length lower bound = %d\n", minlength);              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3937    
3938            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
           if (start_bits == NULL)  
             fprintf(outfile, "No set of starting bytes\n");  
           else  
3939              {              {
3940              int i;              if (start_bits == NULL)
3941              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3942              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3943                {                {
3944                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3945                  int c = 24;
3946                  fprintf(outfile, "Starting byte set: ");
3947                  for (i = 0; i < 256; i++)
3948                  {                  {
3949                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
3950                    {                    {
3951                    fprintf(outfile, "%c ", i);                    if (c > 75)
3952                    c += 2;                      {
3953                    }                      fprintf(outfile, "\n  ");
3954                  else                      c = 2;
3955                    {                      }
3956                    fprintf(outfile, "\\x%02x ", i);                    if (PRINTOK(i) && i != ' ')
3957                    c += 5;                      {
3958                        fprintf(outfile, "%c ", i);
3959                        c += 2;
3960                        }
3961                      else
3962                        {
3963                        fprintf(outfile, "\\x%02x ", i);
3964                        c += 5;
3965                        }
3966                    }                    }
3967                  }                  }
3968                  fprintf(outfile, "\n");
3969                }                }
             fprintf(outfile, "\n");  
3970              }              }
3971            }            }
3972    
3973          /* Show this only if the JIT was set by /S, not by -s. */          /* Show this only if the JIT was set by /S, not by -s. */
3974    
3975          if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)          if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3976                (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3977            {            {
3978            int jit;            int jit;
3979            new_info(re, extra, PCRE_INFO_JIT, &jit);            if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3980            if (jit)              {
3981              fprintf(outfile, "JIT study was successful\n");              if (jit)
3982            else                fprintf(outfile, "JIT study was successful\n");
3983                else
3984  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
3985              fprintf(outfile, "JIT study was not successful\n");                fprintf(outfile, "JIT study was not successful\n");
3986  #else  #else
3987              fprintf(outfile, "JIT support is not available in this version of PCRE\n");                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3988  #endif  #endif
3989                }
3990            }            }
3991          }          }
3992        }        }
# Line 2188  while (!done) Line 4004  while (!done)
4004          }          }
4005        else        else
4006          {          {
4007          uschar sbuf[8];          pcre_uint8 sbuf[8];
4008          sbuf[0] = (uschar)((true_size >> 24) & 255);  
4009          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
4010          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4011          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4012            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
4013          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
4014          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4015          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4016          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
4017            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4018    
4019          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
4020              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 2225  while (!done) Line 4042  while (!done)
4042          }          }
4043    
4044        new_free(re);        new_free(re);
4045        if (extra != NULL) pcre_free_study(extra);        if (extra != NULL)
4046            {
4047            PCRE_FREE_STUDY(extra);
4048            }
4049        if (locale_set)        if (locale_set)
4050          {          {
4051          new_free((void *)tables);          new_free((void *)tables);
# Line 2240  while (!done) Line 4060  while (!done)
4060    
4061    for (;;)    for (;;)
4062      {      {
4063      uschar *q;      pcre_uint8 *q;
4064      uschar *bptr;      pcre_uint8 *bptr;
4065      int *use_offsets = offsets;      int *use_offsets = offsets;
4066      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
4067      int callout_data = 0;      int callout_data = 0;
4068      int callout_data_set = 0;      int callout_data_set = 0;
4069      int count, c;      int count;
4070        pcre_uint32 c;
4071      int copystrings = 0;      int copystrings = 0;
4072      int find_match_limit = default_find_match_limit;      int find_match_limit = default_find_match_limit;
4073      int getstrings = 0;      int getstrings = 0;
# Line 2257  while (!done) Line 4078  while (!done)
4078      int g_notempty = 0;      int g_notempty = 0;
4079      int use_dfa = 0;      int use_dfa = 0;
4080    
     options = 0;  
   
4081      *copynames = 0;      *copynames = 0;
4082      *getnames = 0;      *getnames = 0;
4083    
4084      copynamesptr = copynames;  #ifdef SUPPORT_PCRE32
4085      getnamesptr = getnames;      cn32ptr = copynames;
4086        gn32ptr = getnames;
4087    #endif
4088    #ifdef SUPPORT_PCRE16
4089        cn16ptr = copynames16;
4090        gn16ptr = getnames16;
4091    #endif
4092    #ifdef SUPPORT_PCRE8
4093        cn8ptr = copynames8;
4094        gn8ptr = getnames8;
4095    #endif
4096    
4097      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
4098      first_callout = 1;      first_callout = 1;
4099      last_callout_mark = NULL;      last_callout_mark = NULL;
4100      callout_extra = 0;      callout_extra = 0;
# Line 2273  while (!done) Line 4102  while (!done)
4102      callout_fail_count = 999999;      callout_fail_count = 999999;
4103      callout_fail_id = -1;      callout_fail_id = -1;
4104      show_malloc = 0;      show_malloc = 0;
4105        options = 0;
4106    
4107      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
4108        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 2308  while (!done) Line 4138  while (!done)
4138        int i = 0;        int i = 0;
4139        int n = 0;        int n = 0;
4140    
4141        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4142          In non-UTF mode, allow the value of the byte to fall through to later,
4143          where values greater than 127 are turned into UTF-8 when running in
4144          16-bit or 32-bit mode. */
4145    
4146          if (c != '\\')
4147            {
4148            if (use_utf)
4149              {
4150              *q++ = c;
4151              continue;
4152              }
4153            }
4154    
4155          /* Handle backslash escapes */
4156    
4157          else switch ((c = *p++))
4158          {          {
4159          case 'a': c =    7; break;          case 'a': c =    7; break;
4160          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 2324  while (!done) Line 4170  while (!done)
4170          c -= '0';          c -= '0';
4171          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4172            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
4173          break;          break;
4174    
4175          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
4176          if (*p == '{')          if (*p == '{')
4177            {            {
4178            unsigned char *pt = p;            pcre_uint8 *pt = p;
4179            c = 0;            c = 0;
4180    
4181            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
# Line 2353  while (!done) Line 4184  while (!done)
4184            least one MacOS environment. */            least one MacOS environment. */
4185    
4186            for (pt++; isxdigit(*pt); pt++)            for (pt++; isxdigit(*pt); pt++)
4187              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);              {
4188                if (++i == 9)
4189                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4190                                   "using only the first eight.\n");
4191                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4192                }
4193            if (*pt == '}')            if (*pt == '}')
4194              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             if (use_utf8)  
               {  
               utn = ord2utf8(c, buff8);  
               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
               c = buff8[ii];   /* Last byte */  
               }  
             else  
              {  
              if (c > 255)  
                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "  
                  "UTF-8 mode is not enabled.\n"  
                  "** Truncation will probably give the wrong result.\n", c);  
              }  
4195              p = pt + 1;              p = pt + 1;
4196              break;              break;
4197              }              }
4198            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
4199            }            }
 #endif  
4200    
4201          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
4202            allows UTF-8 characters to be constructed byte by byte, and also allows
4203            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4204            Otherwise, pass it down to later code so that it can be turned into
4205            UTF-8 when running in 16/32-bit mode. */
4206    
4207          c = 0;          c = 0;
4208          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
# Line 2386  while (!done) Line 4210  while (!done)
4210            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4211            p++;            p++;
4212            }            }
4213            if (use_utf)
4214              {
4215              *q++ = c;
4216              continue;
4217              }
4218          break;          break;
4219    
4220          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 2418  while (!done) Line 4247  while (!done)
4247            }            }
4248          else if (isalnum(*p))          else if (isalnum(*p))
4249            {            {
4250            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
4251            }            }
4252          else if (*p == '+')          else if (*p == '+')
4253            {            {
# Line 2434  while (!done) Line 4256  while (!done)
4256            }            }
4257          else if (*p == '-')          else if (*p == '-')
4258            {            {
4259            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
4260            p++;            p++;
4261            }            }
4262          else if (*p == '!')          else if (*p == '!')
# Line 2488  while (!done) Line 4310  while (!done)
4310            }            }
4311          else if (isalnum(*p))          else if (isalnum(*p))
4312            {            {
4313            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)getnamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);  
           getnamesptr = npp;  
4314            }            }
4315          continue;          continue;
4316    
# Line 2505  while (!done) Line 4320  while (!done)
4320              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4321              && extra->executable_jit != NULL)              && extra->executable_jit != NULL)
4322            {            {
4323            if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);            if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4324            jit_stack = pcre_jit_stack_alloc(1, n * 1024);            jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4325            pcre_assign_jit_stack(extra, jit_callback, jit_stack);            PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4326            }            }
4327          continue;          continue;
4328    
# Line 2543  while (!done) Line 4358  while (!done)
4358            }            }
4359          use_size_offsets = n;          use_size_offsets = n;
4360          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
4361              else use_offsets = offsets + size_offsets_max - n;  /* To catch overruns */
4362          continue;          continue;
4363    
4364          case 'P':          case 'P':
# Line 2603  while (!done) Line 4419  while (!done)
4419            }            }
4420          continue;          continue;
4421          }          }
4422        *q++ = c;  
4423          /* We now have a character value in c that may be greater than 255. In
4424          16-bit or 32-bit mode, we always convert characters to UTF-8 so that
4425          values greater than 255 can be passed to non-UTF 16- or 32-bit strings.
4426          In 8-bit       mode we convert to UTF-8 if we are in UTF mode. Values greater
4427          than 127       in UTF mode must have come from \x{...} or octal constructs
4428          because values from \x.. get this far only in non-UTF mode. */
4429    
4430    #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
4431          if (pcre_mode != PCRE8_MODE || use_utf)
4432            {
4433            pcre_uint8 buff8[8];
4434            int ii, utn;
4435            utn = ord2utf8(c, buff8);
4436            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
4437            }
4438          else
4439    #endif
4440            {
4441            if (c > 255)
4442              {
4443              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4444                "and UTF-8 mode is not enabled.\n", c);
4445              fprintf(outfile, "** Truncation will probably give the wrong "
4446                "result.\n");
4447              }
4448            *q++ = c;
4449            }
4450        }        }
4451    
4452        /* Reached end of subject string */
4453    
4454      *q = 0;      *q = 0;
4455      len = (int)(q - dbuffer);      len = (int)(q - dbuffer);
4456    
# Line 2653  while (!done) Line 4499  while (!done)
4499          (void)regerror(rc, &preg, (char *)buffer, buffer_size);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4500          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4501          }          }
4502        else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)        else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
               != 0)  
4503          {          {
4504          fprintf(outfile, "Matched with REG_NOSUB\n");          fprintf(outfile, "Matched with REG_NOSUB\n");
4505          }          }
# Line 2666  while (!done) Line 4511  while (!done)
4511            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
4512              {              {
4513              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
4514              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
4515                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
4516              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4517              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
4518                {                {
4519                fprintf(outfile, "%2d+ ", (int)i);                fprintf(outfile, "%2d+ ", (int)i);
4520                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
4521                  outfile);                  outfile);
4522                fprintf(outfile, "\n");                fprintf(outfile, "\n");
4523                }                }
# Line 2680  while (!done) Line 4525  while (!done)
4525            }            }
4526          }          }
4527        free(pmatch);        free(pmatch);
4528          goto NEXT_DATA;
4529        }        }
4530    
4531    #endif  /* !defined NOPOSIX */
4532    
4533      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
4534    
4535      else  #ifdef SUPPORT_PCRE16
4536  #endif  /* !defined NOPOSIX */      if (pcre_mode == PCRE16_MODE)
4537          {
4538          len = to16(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF8, len);
4539          switch(len)
4540            {
4541            case -1:
4542            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
4543              "converted to UTF-16\n");
4544            goto NEXT_DATA;
4545    
4546            case -2:
4547            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4548              "cannot be converted to UTF-16\n");
4549            goto NEXT_DATA;
4550    
4551            case -3:
4552            fprintf(outfile, "**Failed: character value greater than 0xffff "
4553              "cannot be converted to 16-bit in non-UTF mode\n");
4554            goto NEXT_DATA;
4555    
4556            default:
4557            break;
4558            }
4559          bptr = (pcre_uint8 *)buffer16;
4560          }
4561    #endif
4562    
4563    #ifdef SUPPORT_PCRE32
4564        if (pcre_mode == PCRE32_MODE)
4565          {
4566          len = to32(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF32, len);
4567          switch(len)
4568            {
4569            case -1:
4570            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
4571              "converted to UTF-32\n");
4572            goto NEXT_DATA;
4573    
4574            case -2:
4575            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4576              "cannot be converted to UTF-32\n");
4577            goto NEXT_DATA;
4578    
4579            case -3:
4580            fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
4581            goto NEXT_DATA;
4582    
4583            default:
4584            break;
4585            }
4586          bptr = (pcre_uint8 *)buffer32;
4587          }
4588    #endif
4589    
4590        /* Ensure that there is a JIT callback if we want to verify that JIT was
4591        actually used. If jit_stack == NULL, no stack has yet been assigned. */
4592    
4593        if (verify_jit && jit_stack == NULL && extra != NULL)
4594           { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4595    
4596      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
4597        {        {
4598        markptr = NULL;        markptr = NULL;
4599          jit_was_used = FALSE;
4600    
4601        if (timeitm > 0)        if (timeitm > 0)
4602          {          {