/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 516 by ph10, Tue May 4 15:51:35 2010 UTC revision 1022 by ph10, Tue Aug 28 12:28:15 2012 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
50  #include "config.h"  #include "config.h"
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 58  POSSIBILITY OF SUCH DAMAGE.
58  #include <locale.h>  #include <locale.h>
59  #include <errno.h>  #include <errno.h>
60    
61  #ifdef SUPPORT_LIBREADLINE  /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67  #ifdef HAVE_UNISTD_H  #ifdef HAVE_UNISTD_H
68  #include <unistd.h>  #include <unistd.h>
69  #endif  #endif
70    #if defined(SUPPORT_LIBREADLINE)
71  #include <readline/readline.h>  #include <readline/readline.h>
72  #include <readline/history.h>  #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80  #endif  #endif
   
81    
82  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
83  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 79  input mode under Windows. */ Line 101  input mode under Windows. */
101  #define fileno _fileno  #define fileno _fileno
102  #endif  #endif
103    
104    /* A user sent this fix for Borland Builder 5 under Windows. */
105    
106    #ifdef __BORLANDC__
107    #define _setmode(handle, mode) setmode(handle, mode)
108    #endif
109    
110    /* Not Windows */
111    
112  #else  #else
113  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
114  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 86  input mode under Windows. */ Line 116  input mode under Windows. */
116  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
117  #endif  #endif
118    
119    #define PRIV(name) name
120    
121  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
122  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 97  here before pcre_internal.h so that the Line 128  here before pcre_internal.h so that the
128  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
129    
130  #include "pcre.h"  #include "pcre.h"
131    
132    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
133    /* Configure internal macros to 16 bit mode. */
134    #define COMPILE_PCRE16
135    #endif
136    
137  #include "pcre_internal.h"  #include "pcre_internal.h"
138    
139    /* The pcre_printint() function, which prints the internal form of a compiled
140    regex, is held in a separate file so that (a) it can be compiled in either
141    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
142    when that is compiled in debug mode. */
143    
144    #ifdef SUPPORT_PCRE8
145    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
146    #endif
147    #ifdef SUPPORT_PCRE16
148    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
149    #endif
150    
151  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
152  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source file here, changing the names of the
153  external symbols to prevent clashes. */  external symbols to prevent clashes. */
154    
155  #define _pcre_ucp_gentype      ucp_gentype  #define PCRE_INCLUDED
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
156    
157  #include "pcre_tables.c"  #include "pcre_tables.c"
158    
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
   
159  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
160  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
161  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
162  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
163  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
164    
165    #ifdef EBCDIC
166    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
167    #else
168    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
169    #endif
170    
171    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
172    
173  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  /* Posix support is disabled in 16 bit only mode. */
174    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
175    #define NOPOSIX
176    #endif
177    
178  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
179  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 140  Makefile. */ Line 183  Makefile. */
183  #include "pcreposix.h"  #include "pcreposix.h"
184  #endif  #endif
185    
186  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
187  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
188  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
189  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
190  UTF8 support if PCRE is built without it. */  
191    #ifndef SUPPORT_UTF
192  #ifndef SUPPORT_UTF8  #ifndef NOUTF
193  #ifndef NOUTF8  #define NOUTF
194  #define NOUTF8  #endif
195  #endif  #endif
196    
197    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
198    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
199    only from one place and is handled differently). I couldn't dream up any way of
200    using a single macro to do this in a generic way, because of the many different
201    argument requirements. We know that at least one of SUPPORT_PCRE8 and
202    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
203    use these in the definitions of generic macros.
204    
205    **** Special note about the PCHARSxxx macros: the address of the string to be
206    printed is always given as two arguments: a base address followed by an offset.
207    The base address is cast to the correct data size for 8 or 16 bit data; the
208    offset is in units of this size. If the string were given as base+offset in one
209    argument, the casting might be incorrectly applied. */
210    
211    #ifdef SUPPORT_PCRE8
212    
213    #define PCHARS8(lv, p, offset, len, f) \
214      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
215    
216    #define PCHARSV8(p, offset, len, f) \
217      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
218    
219    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
220      p = read_capture_name8(p, cn8, re)
221    
222    #define STRLEN8(p) ((int)strlen((char *)p))
223    
224    #define SET_PCRE_CALLOUT8(callout) \
225      pcre_callout = callout
226    
227    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
228       pcre_assign_jit_stack(extra, callback, userdata)
229    
230    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
231      re = pcre_compile((char *)pat, options, error, erroffset, tables)
232    
233    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
234        namesptr, cbuffer, size) \
235      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
236        (char *)namesptr, cbuffer, size)
237    
238    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
239      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
240    
241    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
242        offsets, size_offsets, workspace, size_workspace) \
243      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
244        offsets, size_offsets, workspace, size_workspace)
245    
246    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
247        offsets, size_offsets) \
248      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
249        offsets, size_offsets)
250    
251    #define PCRE_FREE_STUDY8(extra) \
252      pcre_free_study(extra)
253    
254    #define PCRE_FREE_SUBSTRING8(substring) \
255      pcre_free_substring(substring)
256    
257    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
258      pcre_free_substring_list(listptr)
259    
260    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
261        getnamesptr, subsptr) \
262      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
263        (char *)getnamesptr, subsptr)
264    
265    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
266      n = pcre_get_stringnumber(re, (char *)ptr)
267    
268    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
269      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
270    
271    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
272      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
273    
274    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
275      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
276    
277    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
278      pcre_printint(re, outfile, debug_lengths)
279    
280    #define PCRE_STUDY8(extra, re, options, error) \
281      extra = pcre_study(re, options, error)
282    
283    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
284      pcre_jit_stack_alloc(startsize, maxsize)
285    
286    #define PCRE_JIT_STACK_FREE8(stack) \
287      pcre_jit_stack_free(stack)
288    
289    #endif /* SUPPORT_PCRE8 */
290    
291    /* -----------------------------------------------------------*/
292    
293    #ifdef SUPPORT_PCRE16
294    
295    #define PCHARS16(lv, p, offset, len, f) \
296      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
297    
298    #define PCHARSV16(p, offset, len, f) \
299      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
300    
301    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
302      p = read_capture_name16(p, cn16, re)
303    
304    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
305    
306    #define SET_PCRE_CALLOUT16(callout) \
307      pcre16_callout = (int (*)(pcre16_callout_block *))callout
308    
309    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
310      pcre16_assign_jit_stack((pcre16_extra *)extra, \
311        (pcre16_jit_callback)callback, userdata)
312    
313    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
314      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
315        tables)
316    
317    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
318        namesptr, cbuffer, size) \
319      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
320        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
321    
322    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
323      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
324        (PCRE_UCHAR16 *)cbuffer, size/2)
325    
326    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
327        offsets, size_offsets, workspace, size_workspace) \
328      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
329        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
330        workspace, size_workspace)
331    
332    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
333        offsets, size_offsets) \
334      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
335        len, start_offset, options, offsets, size_offsets)
336    
337    #define PCRE_FREE_STUDY16(extra) \
338      pcre16_free_study((pcre16_extra *)extra)
339    
340    #define PCRE_FREE_SUBSTRING16(substring) \
341      pcre16_free_substring((PCRE_SPTR16)substring)
342    
343    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
344      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
345    
346    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
347        getnamesptr, subsptr) \
348      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
349        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
350    
351    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
352      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
353    
354    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
355      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
356        (PCRE_SPTR16 *)(void*)subsptr)
357    
358    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
359      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
360        (PCRE_SPTR16 **)(void*)listptr)
361    
362    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
363      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
364        tables)
365    
366    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
367      pcre16_printint(re, outfile, debug_lengths)
368    
369    #define PCRE_STUDY16(extra, re, options, error) \
370      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
371    
372    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
373      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
374    
375    #define PCRE_JIT_STACK_FREE16(stack) \
376      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
377    
378    #endif /* SUPPORT_PCRE16 */
379    
380    
381    /* ----- Both modes are supported; a runtime test is needed, except for
382    pcre_config(), and the JIT stack functions, when it doesn't matter which
383    version is called. ----- */
384    
385    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
386    
387    #define CHAR_SIZE (use_pcre16? 2:1)
388    
389    #define PCHARS(lv, p, offset, len, f) \
390      if (use_pcre16) \
391        PCHARS16(lv, p, offset, len, f); \
392      else \
393        PCHARS8(lv, p, offset, len, f)
394    
395    #define PCHARSV(p, offset, len, f) \
396      if (use_pcre16) \
397        PCHARSV16(p, offset, len, f); \
398      else \
399        PCHARSV8(p, offset, len, f)
400    
401    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
402      if (use_pcre16) \
403        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
404      else \
405        READ_CAPTURE_NAME8(p, cn8, cn16, re)
406    
407    #define SET_PCRE_CALLOUT(callout) \
408      if (use_pcre16) \
409        SET_PCRE_CALLOUT16(callout); \
410      else \
411        SET_PCRE_CALLOUT8(callout)
412    
413    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
414    
415    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
416      if (use_pcre16) \
417        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
418      else \
419        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
420    
421    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
422      if (use_pcre16) \
423        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
424      else \
425        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
426    
427    #define PCRE_CONFIG pcre_config
428    
429    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
430        namesptr, cbuffer, size) \
431      if (use_pcre16) \
432        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
433          namesptr, cbuffer, size); \
434      else \
435        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
436          namesptr, cbuffer, size)
437    
438    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
439      if (use_pcre16) \
440        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
441      else \
442        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
443    
444    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
445        offsets, size_offsets, workspace, size_workspace) \
446      if (use_pcre16) \
447        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
448          offsets, size_offsets, workspace, size_workspace); \
449      else \
450        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
451          offsets, size_offsets, workspace, size_workspace)
452    
453    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
454        offsets, size_offsets) \
455      if (use_pcre16) \
456        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
457          offsets, size_offsets); \
458      else \
459        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
460          offsets, size_offsets)
461    
462    #define PCRE_FREE_STUDY(extra) \
463      if (use_pcre16) \
464        PCRE_FREE_STUDY16(extra); \
465      else \
466        PCRE_FREE_STUDY8(extra)
467    
468    #define PCRE_FREE_SUBSTRING(substring) \
469      if (use_pcre16) \
470        PCRE_FREE_SUBSTRING16(substring); \
471      else \
472        PCRE_FREE_SUBSTRING8(substring)
473    
474    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
475      if (use_pcre16) \
476        PCRE_FREE_SUBSTRING_LIST16(listptr); \
477      else \
478        PCRE_FREE_SUBSTRING_LIST8(listptr)
479    
480    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
481        getnamesptr, subsptr) \
482      if (use_pcre16) \
483        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
484          getnamesptr, subsptr); \
485      else \
486        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
487          getnamesptr, subsptr)
488    
489    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
490      if (use_pcre16) \
491        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
492      else \
493        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
494    
495    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
496      if (use_pcre16) \
497        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
498      else \
499        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
500    
501    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
502      if (use_pcre16) \
503        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
504      else \
505        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
506    
507    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
508      (use_pcre16 ? \
509         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
510        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
511    
512    #define PCRE_JIT_STACK_FREE(stack) \
513      if (use_pcre16) \
514        PCRE_JIT_STACK_FREE16(stack); \
515      else \
516        PCRE_JIT_STACK_FREE8(stack)
517    
518    #define PCRE_MAKETABLES \
519      (use_pcre16? pcre16_maketables() : pcre_maketables())
520    
521    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
522      if (use_pcre16) \
523        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
524      else \
525        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
526    
527    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
528      if (use_pcre16) \
529        PCRE_PRINTINT16(re, outfile, debug_lengths); \
530      else \
531        PCRE_PRINTINT8(re, outfile, debug_lengths)
532    
533    #define PCRE_STUDY(extra, re, options, error) \
534      if (use_pcre16) \
535        PCRE_STUDY16(extra, re, options, error); \
536      else \
537        PCRE_STUDY8(extra, re, options, error)
538    
539    /* ----- Only 8-bit mode is supported ----- */
540    
541    #elif defined SUPPORT_PCRE8
542    #define CHAR_SIZE                 1
543    #define PCHARS                    PCHARS8
544    #define PCHARSV                   PCHARSV8
545    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
546    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
547    #define STRLEN                    STRLEN8
548    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
549    #define PCRE_COMPILE              PCRE_COMPILE8
550    #define PCRE_CONFIG               pcre_config
551    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
552    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
553    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
554    #define PCRE_EXEC                 PCRE_EXEC8
555    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
556    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
557    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
558    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
559    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
560    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
561    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
562    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
563    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
564    #define PCRE_MAKETABLES           pcre_maketables()
565    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
566    #define PCRE_PRINTINT             PCRE_PRINTINT8
567    #define PCRE_STUDY                PCRE_STUDY8
568    
569    /* ----- Only 16-bit mode is supported ----- */
570    
571    #else
572    #define CHAR_SIZE                 2
573    #define PCHARS                    PCHARS16
574    #define PCHARSV                   PCHARSV16
575    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
576    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
577    #define STRLEN                    STRLEN16
578    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
579    #define PCRE_COMPILE              PCRE_COMPILE16
580    #define PCRE_CONFIG               pcre16_config
581    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
582    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
583    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
584    #define PCRE_EXEC                 PCRE_EXEC16
585    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
586    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
587    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
588    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
589    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
590    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
591    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
592    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
593    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
594    #define PCRE_MAKETABLES           pcre16_maketables()
595    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
596    #define PCRE_PRINTINT             PCRE_PRINTINT16
597    #define PCRE_STUDY                PCRE_STUDY16
598  #endif  #endif
599    
600    /* ----- End of mode-specific function call macros ----- */
601    
602    
603  /* Other parameters */  /* Other parameters */
604    
# Line 163  UTF8 support if PCRE is built without it Line 610  UTF8 support if PCRE is built without it
610  #endif  #endif
611  #endif  #endif
612    
613    #if !defined NODFA
614    #define DFA_WS_DIMENSION 1000
615    #endif
616    
617  /* This is the default loop count for timing. */  /* This is the default loop count for timing. */
618    
619  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
# Line 177  static int callout_fail_count; Line 628  static int callout_fail_count;
628  static int callout_fail_id;  static int callout_fail_id;
629  static int debug_lengths;  static int debug_lengths;
630  static int first_callout;  static int first_callout;
631    static int jit_was_used;
632  static int locale_set = 0;  static int locale_set = 0;
633  static int show_malloc;  static int show_malloc;
634  static int use_utf8;  static int use_utf;
635  static size_t gotten_store;  static size_t gotten_store;
636    static size_t first_gotten_store = 0;
637    static const unsigned char *last_callout_mark = NULL;
638    
639  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
640    
641  static int buffer_size = 50000;  static int buffer_size = 50000;
642  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
643  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
644  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
645    
646    /* Another buffer is needed translation to 16-bit character strings. It will
647    obtained and extended as required. */
648    
649    #ifdef SUPPORT_PCRE16
650    static int buffer16_size = 0;
651    static pcre_uint16 *buffer16 = NULL;
652    
653    #ifdef SUPPORT_PCRE8
654    
655    /* We need the table of operator lengths that is used for 16-bit compiling, in
656    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
657    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
658    appropriately for the 16-bit world. Just as a safety check, make sure that
659    COMPILE_PCRE16 is *not* set. */
660    
661    #ifdef COMPILE_PCRE16
662    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
663    #endif
664    
665    #if LINK_SIZE == 2
666    #undef LINK_SIZE
667    #define LINK_SIZE 1
668    #elif LINK_SIZE == 3 || LINK_SIZE == 4
669    #undef LINK_SIZE
670    #define LINK_SIZE 2
671    #else
672    #error LINK_SIZE must be either 2, 3, or 4
673    #endif
674    
675    #undef IMM2_SIZE
676    #define IMM2_SIZE 1
677    
678    #endif /* SUPPORT_PCRE8 */
679    
680    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
681    #endif  /* SUPPORT_PCRE16 */
682    
683    /* If we have 8-bit support, default use_pcre16 to false; if there is also
684    16-bit support, it can be changed by an option. If there is no 8-bit support,
685    there must be 16-bit support, so default it to 1. */
686    
687    #ifdef SUPPORT_PCRE8
688    static int use_pcre16 = 0;
689    #else
690    static int use_pcre16 = 1;
691    #endif
692    
693    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
694    
695    static int jit_study_bits[] =
696      {
697      PCRE_STUDY_JIT_COMPILE,
698      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
699      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
700      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
701      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
702      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
703      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
704        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
705    };
706    
707    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
708      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
709    
710    /* Textual explanations for runtime error codes */
711    
712    static const char *errtexts[] = {
713      NULL,  /* 0 is no error */
714      NULL,  /* NOMATCH is handled specially */
715      "NULL argument passed",
716      "bad option value",
717      "magic number missing",
718      "unknown opcode - pattern overwritten?",
719      "no more memory",
720      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
721      "match limit exceeded",
722      "callout error code",
723      NULL,  /* BADUTF8/16 is handled specially */
724      NULL,  /* BADUTF8/16 offset is handled specially */
725      NULL,  /* PARTIAL is handled specially */
726      "not used - internal error",
727      "internal error - pattern overwritten?",
728      "bad count value",
729      "item unsupported for DFA matching",
730      "backreference condition or recursion test not supported for DFA matching",
731      "match limit not supported for DFA matching",
732      "workspace size exceeded in DFA matching",
733      "too much recursion for DFA matching",
734      "recursion limit exceeded",
735      "not used - internal error",
736      "invalid combination of newline options",
737      "bad offset value",
738      NULL,  /* SHORTUTF8/16 is handled specially */
739      "nested recursion at the same subject position",
740      "JIT stack limit reached",
741      "pattern compiled in wrong mode: 8-bit/16-bit error",
742      "pattern compiled with other endianness",
743      "invalid data in workspace for DFA restart"
744    };
745    
746    
747    /*************************************************
748    *         Alternate character tables             *
749    *************************************************/
750    
751    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
752    using the default tables of the library. However, the T option can be used to
753    select alternate sets of tables, for different kinds of testing. Note also that
754    the L (locale) option also adjusts the tables. */
755    
756    /* This is the set of tables distributed as default with PCRE. It recognizes
757    only ASCII characters. */
758    
759    static const pcre_uint8 tables0[] = {
760    
761    /* This table is a lower casing table. */
762    
763        0,  1,  2,  3,  4,  5,  6,  7,
764        8,  9, 10, 11, 12, 13, 14, 15,
765       16, 17, 18, 19, 20, 21, 22, 23,
766       24, 25, 26, 27, 28, 29, 30, 31,
767       32, 33, 34, 35, 36, 37, 38, 39,
768       40, 41, 42, 43, 44, 45, 46, 47,
769       48, 49, 50, 51, 52, 53, 54, 55,
770       56, 57, 58, 59, 60, 61, 62, 63,
771       64, 97, 98, 99,100,101,102,103,
772      104,105,106,107,108,109,110,111,
773      112,113,114,115,116,117,118,119,
774      120,121,122, 91, 92, 93, 94, 95,
775       96, 97, 98, 99,100,101,102,103,
776      104,105,106,107,108,109,110,111,
777      112,113,114,115,116,117,118,119,
778      120,121,122,123,124,125,126,127,
779      128,129,130,131,132,133,134,135,
780      136,137,138,139,140,141,142,143,
781      144,145,146,147,148,149,150,151,
782      152,153,154,155,156,157,158,159,
783      160,161,162,163,164,165,166,167,
784      168,169,170,171,172,173,174,175,
785      176,177,178,179,180,181,182,183,
786      184,185,186,187,188,189,190,191,
787      192,193,194,195,196,197,198,199,
788      200,201,202,203,204,205,206,207,
789      208,209,210,211,212,213,214,215,
790      216,217,218,219,220,221,222,223,
791      224,225,226,227,228,229,230,231,
792      232,233,234,235,236,237,238,239,
793      240,241,242,243,244,245,246,247,
794      248,249,250,251,252,253,254,255,
795    
796    /* This table is a case flipping table. */
797    
798        0,  1,  2,  3,  4,  5,  6,  7,
799        8,  9, 10, 11, 12, 13, 14, 15,
800       16, 17, 18, 19, 20, 21, 22, 23,
801       24, 25, 26, 27, 28, 29, 30, 31,
802       32, 33, 34, 35, 36, 37, 38, 39,
803       40, 41, 42, 43, 44, 45, 46, 47,
804       48, 49, 50, 51, 52, 53, 54, 55,
805       56, 57, 58, 59, 60, 61, 62, 63,
806       64, 97, 98, 99,100,101,102,103,
807      104,105,106,107,108,109,110,111,
808      112,113,114,115,116,117,118,119,
809      120,121,122, 91, 92, 93, 94, 95,
810       96, 65, 66, 67, 68, 69, 70, 71,
811       72, 73, 74, 75, 76, 77, 78, 79,
812       80, 81, 82, 83, 84, 85, 86, 87,
813       88, 89, 90,123,124,125,126,127,
814      128,129,130,131,132,133,134,135,
815      136,137,138,139,140,141,142,143,
816      144,145,146,147,148,149,150,151,
817      152,153,154,155,156,157,158,159,
818      160,161,162,163,164,165,166,167,
819      168,169,170,171,172,173,174,175,
820      176,177,178,179,180,181,182,183,
821      184,185,186,187,188,189,190,191,
822      192,193,194,195,196,197,198,199,
823      200,201,202,203,204,205,206,207,
824      208,209,210,211,212,213,214,215,
825      216,217,218,219,220,221,222,223,
826      224,225,226,227,228,229,230,231,
827      232,233,234,235,236,237,238,239,
828      240,241,242,243,244,245,246,247,
829      248,249,250,251,252,253,254,255,
830    
831    /* This table contains bit maps for various character classes. Each map is 32
832    bytes long and the bits run from the least significant end of each byte. The
833    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
834    graph, print, punct, and cntrl. Other classes are built from combinations. */
835    
836      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
837      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
838      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
839      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840    
841      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
842      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
843      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845    
846      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
847      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850    
851      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
852      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
853      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
855    
856      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
858      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
859      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
860    
861      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
862      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
863      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
864      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
865    
866      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
867      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
868      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
869      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
870    
871      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
872      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
873      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
874      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
875    
876      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
877      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
878      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
879      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
880    
881      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
882      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
883      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
884      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
885    
886    /* This table identifies various classes of character by individual bits:
887      0x01   white space character
888      0x02   letter
889      0x04   decimal digit
890      0x08   hexadecimal digit
891      0x10   alphanumeric or '_'
892      0x80   regular expression metacharacter or binary zero
893    */
894    
895      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
896      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
897      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
898      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
899      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
900      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
901      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
902      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
903      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
904      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
905      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
906      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
907      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
908      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
909      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
910      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
911      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
912      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
913      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
914      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
915      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
916      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
917      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
918      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
919      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
920      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
921      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
922      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
923      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
924      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
925      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
926      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
927    
928    /* This is a set of tables that came orginally from a Windows user. It seems to
929    be at least an approximation of ISO 8859. In particular, there are characters
930    greater than 128 that are marked as spaces, letters, etc. */
931    
932    static const pcre_uint8 tables1[] = {
933    0,1,2,3,4,5,6,7,
934    8,9,10,11,12,13,14,15,
935    16,17,18,19,20,21,22,23,
936    24,25,26,27,28,29,30,31,
937    32,33,34,35,36,37,38,39,
938    40,41,42,43,44,45,46,47,
939    48,49,50,51,52,53,54,55,
940    56,57,58,59,60,61,62,63,
941    64,97,98,99,100,101,102,103,
942    104,105,106,107,108,109,110,111,
943    112,113,114,115,116,117,118,119,
944    120,121,122,91,92,93,94,95,
945    96,97,98,99,100,101,102,103,
946    104,105,106,107,108,109,110,111,
947    112,113,114,115,116,117,118,119,
948    120,121,122,123,124,125,126,127,
949    128,129,130,131,132,133,134,135,
950    136,137,138,139,140,141,142,143,
951    144,145,146,147,148,149,150,151,
952    152,153,154,155,156,157,158,159,
953    160,161,162,163,164,165,166,167,
954    168,169,170,171,172,173,174,175,
955    176,177,178,179,180,181,182,183,
956    184,185,186,187,188,189,190,191,
957    224,225,226,227,228,229,230,231,
958    232,233,234,235,236,237,238,239,
959    240,241,242,243,244,245,246,215,
960    248,249,250,251,252,253,254,223,
961    224,225,226,227,228,229,230,231,
962    232,233,234,235,236,237,238,239,
963    240,241,242,243,244,245,246,247,
964    248,249,250,251,252,253,254,255,
965    0,1,2,3,4,5,6,7,
966    8,9,10,11,12,13,14,15,
967    16,17,18,19,20,21,22,23,
968    24,25,26,27,28,29,30,31,
969    32,33,34,35,36,37,38,39,
970    40,41,42,43,44,45,46,47,
971    48,49,50,51,52,53,54,55,
972    56,57,58,59,60,61,62,63,
973    64,97,98,99,100,101,102,103,
974    104,105,106,107,108,109,110,111,
975    112,113,114,115,116,117,118,119,
976    120,121,122,91,92,93,94,95,
977    96,65,66,67,68,69,70,71,
978    72,73,74,75,76,77,78,79,
979    80,81,82,83,84,85,86,87,
980    88,89,90,123,124,125,126,127,
981    128,129,130,131,132,133,134,135,
982    136,137,138,139,140,141,142,143,
983    144,145,146,147,148,149,150,151,
984    152,153,154,155,156,157,158,159,
985    160,161,162,163,164,165,166,167,
986    168,169,170,171,172,173,174,175,
987    176,177,178,179,180,181,182,183,
988    184,185,186,187,188,189,190,191,
989    224,225,226,227,228,229,230,231,
990    232,233,234,235,236,237,238,239,
991    240,241,242,243,244,245,246,215,
992    248,249,250,251,252,253,254,223,
993    192,193,194,195,196,197,198,199,
994    200,201,202,203,204,205,206,207,
995    208,209,210,211,212,213,214,247,
996    216,217,218,219,220,221,222,255,
997    0,62,0,0,1,0,0,0,
998    0,0,0,0,0,0,0,0,
999    32,0,0,0,1,0,0,0,
1000    0,0,0,0,0,0,0,0,
1001    0,0,0,0,0,0,255,3,
1002    126,0,0,0,126,0,0,0,
1003    0,0,0,0,0,0,0,0,
1004    0,0,0,0,0,0,0,0,
1005    0,0,0,0,0,0,255,3,
1006    0,0,0,0,0,0,0,0,
1007    0,0,0,0,0,0,12,2,
1008    0,0,0,0,0,0,0,0,
1009    0,0,0,0,0,0,0,0,
1010    254,255,255,7,0,0,0,0,
1011    0,0,0,0,0,0,0,0,
1012    255,255,127,127,0,0,0,0,
1013    0,0,0,0,0,0,0,0,
1014    0,0,0,0,254,255,255,7,
1015    0,0,0,0,0,4,32,4,
1016    0,0,0,128,255,255,127,255,
1017    0,0,0,0,0,0,255,3,
1018    254,255,255,135,254,255,255,7,
1019    0,0,0,0,0,4,44,6,
1020    255,255,127,255,255,255,127,255,
1021    0,0,0,0,254,255,255,255,
1022    255,255,255,255,255,255,255,127,
1023    0,0,0,0,254,255,255,255,
1024    255,255,255,255,255,255,255,255,
1025    0,2,0,0,255,255,255,255,
1026    255,255,255,255,255,255,255,127,
1027    0,0,0,0,255,255,255,255,
1028    255,255,255,255,255,255,255,255,
1029    0,0,0,0,254,255,0,252,
1030    1,0,0,248,1,0,0,120,
1031    0,0,0,0,254,255,255,255,
1032    0,0,128,0,0,0,128,0,
1033    255,255,255,255,0,0,0,0,
1034    0,0,0,0,0,0,0,128,
1035    255,255,255,255,0,0,0,0,
1036    0,0,0,0,0,0,0,0,
1037    128,0,0,0,0,0,0,0,
1038    0,1,1,0,1,1,0,0,
1039    0,0,0,0,0,0,0,0,
1040    0,0,0,0,0,0,0,0,
1041    1,0,0,0,128,0,0,0,
1042    128,128,128,128,0,0,128,0,
1043    28,28,28,28,28,28,28,28,
1044    28,28,0,0,0,0,0,128,
1045    0,26,26,26,26,26,26,18,
1046    18,18,18,18,18,18,18,18,
1047    18,18,18,18,18,18,18,18,
1048    18,18,18,128,128,0,128,16,
1049    0,26,26,26,26,26,26,18,
1050    18,18,18,18,18,18,18,18,
1051    18,18,18,18,18,18,18,18,
1052    18,18,18,128,128,0,0,0,
1053    0,0,0,0,0,1,0,0,
1054    0,0,0,0,0,0,0,0,
1055    0,0,0,0,0,0,0,0,
1056    0,0,0,0,0,0,0,0,
1057    1,0,0,0,0,0,0,0,
1058    0,0,18,0,0,0,0,0,
1059    0,0,20,20,0,18,0,0,
1060    0,20,18,0,0,0,0,0,
1061    18,18,18,18,18,18,18,18,
1062    18,18,18,18,18,18,18,18,
1063    18,18,18,18,18,18,18,0,
1064    18,18,18,18,18,18,18,18,
1065    18,18,18,18,18,18,18,18,
1066    18,18,18,18,18,18,18,18,
1067    18,18,18,18,18,18,18,0,
1068    18,18,18,18,18,18,18,18
1069    };
1070    
1071    
1072    
1073    
1074    #ifndef HAVE_STRERROR
1075    /*************************************************
1076    *     Provide strerror() for non-ANSI libraries  *
1077    *************************************************/
1078    
1079    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1080    in their libraries, but can provide the same facility by this simple
1081    alternative function. */
1082    
1083    extern int   sys_nerr;
1084    extern char *sys_errlist[];
1085    
1086    char *
1087    strerror(int n)
1088    {
1089    if (n < 0 || n >= sys_nerr) return "unknown error number";
1090    return sys_errlist[n];
1091    }
1092    #endif /* HAVE_STRERROR */
1093    
1094    
1095    /*************************************************
1096    *         JIT memory callback                    *
1097    *************************************************/
1098    
1099    static pcre_jit_stack* jit_callback(void *arg)
1100    {
1101    jit_was_used = TRUE;
1102    return (pcre_jit_stack *)arg;
1103    }
1104    
1105    
1106    #if !defined NOUTF || defined SUPPORT_PCRE16
1107    /*************************************************
1108    *            Convert UTF-8 string to value       *
1109    *************************************************/
1110    
1111    /* This function takes one or more bytes that represents a UTF-8 character,
1112    and returns the value of the character.
1113    
1114    Argument:
1115      utf8bytes   a pointer to the byte vector
1116      vptr        a pointer to an int to receive the value
1117    
1118    Returns:      >  0 => the number of bytes consumed
1119                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1120    */
1121    
1122    static int
1123    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1124    {
1125    int c = *utf8bytes++;
1126    int d = c;
1127    int i, j, s;
1128    
1129    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1130      {
1131      if ((d & 0x80) == 0) break;
1132      d <<= 1;
1133      }
1134    
1135    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1136    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1137    
1138    /* i now has a value in the range 1-5 */
1139    
1140    s = 6*i;
1141    d = (c & utf8_table3[i]) << s;
1142    
1143    for (j = 0; j < i; j++)
1144      {
1145      c = *utf8bytes++;
1146      if ((c & 0xc0) != 0x80) return -(j+1);
1147      s -= 6;
1148      d |= (c & 0x3f) << s;
1149      }
1150    
1151    /* Check that encoding was the correct unique one */
1152    
1153    for (j = 0; j < utf8_table1_size; j++)
1154      if (d <= utf8_table1[j]) break;
1155    if (j != i) return -(i+1);
1156    
1157    /* Valid value */
1158    
1159    *vptr = d;
1160    return i+1;
1161    }
1162    #endif /* NOUTF || SUPPORT_PCRE16 */
1163    
1164    
1165    
1166    #if !defined NOUTF || defined SUPPORT_PCRE16
1167    /*************************************************
1168    *       Convert character value to UTF-8         *
1169    *************************************************/
1170    
1171    /* This function takes an integer value in the range 0 - 0x7fffffff
1172    and encodes it as a UTF-8 character in 0 to 6 bytes.
1173    
1174    Arguments:
1175      cvalue     the character value
1176      utf8bytes  pointer to buffer for result - at least 6 bytes long
1177    
1178    Returns:     number of characters placed in the buffer
1179    */
1180    
1181    static int
1182    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1183    {
1184    register int i, j;
1185    for (i = 0; i < utf8_table1_size; i++)
1186      if (cvalue <= utf8_table1[i]) break;
1187    utf8bytes += i;
1188    for (j = i; j > 0; j--)
1189     {
1190     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1191     cvalue >>= 6;
1192     }
1193    *utf8bytes = utf8_table2[i] | cvalue;
1194    return i + 1;
1195    }
1196    #endif
1197    
1198    
1199    #ifdef SUPPORT_PCRE16
1200    /*************************************************
1201    *         Convert a string to 16-bit             *
1202    *************************************************/
1203    
1204    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1205    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1206    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1207    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1208    result is always left in buffer16.
1209    
1210    Note that this function does not object to surrogate values. This is
1211    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1212    for the purpose of testing that they are correctly faulted.
1213    
1214    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1215    in UTF-8 so that values greater than 255 can be handled.
1216    
1217    Arguments:
1218      data       TRUE if converting a data line; FALSE for a regex
1219      p          points to a byte string
1220      utf        true if UTF-8 (to be converted to UTF-16)
1221      len        number of bytes in the string (excluding trailing zero)
1222    
1223    Returns:     number of 16-bit data items used (excluding trailing zero)
1224                 OR -1 if a UTF-8 string is malformed
1225                 OR -2 if a value > 0x10ffff is encountered
1226                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1227    */
1228    
1229    static int
1230    to16(int data, pcre_uint8 *p, int utf, int len)
1231    {
1232    pcre_uint16 *pp;
1233    
1234    if (buffer16_size < 2*len + 2)
1235      {
1236      if (buffer16 != NULL) free(buffer16);
1237      buffer16_size = 2*len + 2;
1238      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1239      if (buffer16 == NULL)
1240        {
1241        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1242        exit(1);
1243        }
1244      }
1245    
1246    pp = buffer16;
1247    
1248    if (!utf && !data)
1249      {
1250      while (len-- > 0) *pp++ = *p++;
1251      }
1252    
1253    else
1254      {
1255      int c = 0;
1256      while (len > 0)
1257        {
1258        int chlen = utf82ord(p, &c);
1259        if (chlen <= 0) return -1;
1260        if (c > 0x10ffff) return -2;
1261        p += chlen;
1262        len -= chlen;
1263        if (c < 0x10000) *pp++ = c; else
1264          {
1265          if (!utf) return -3;
1266          c -= 0x10000;
1267          *pp++ = 0xD800 | (c >> 10);
1268          *pp++ = 0xDC00 | (c & 0x3ff);
1269          }
1270        }
1271      }
1272    
1273    *pp = 0;
1274    return pp - buffer16;
1275    }
1276    #endif
1277    
1278    
1279  /*************************************************  /*************************************************
1280  *        Read or extend an input line            *  *        Read or extend an input line            *
1281  *************************************************/  *************************************************/
# Line 214  Returns:       pointer to the start of n Line 1299  Returns:       pointer to the start of n
1299                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1300  */  */
1301    
1302  static uschar *  static pcre_uint8 *
1303  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1304  {  {
1305  uschar *here = start;  pcre_uint8 *here = start;
1306    
1307  for (;;)  for (;;)
1308    {    {
1309    int rlen = buffer_size - (here - buffer);    size_t rlen = (size_t)(buffer_size - (here - buffer));
1310    
1311    if (rlen > 1000)    if (rlen > 1000)
1312      {      {
1313      int dlen;      int dlen;
1314    
1315      /* If libreadline support is required, use readline() to read a line if the      /* If libreadline or libedit support is required, use readline() to read a
1316      input is a terminal. Note that readline() removes the trailing newline, so      line if the input is a terminal. Note that readline() removes the trailing
1317      we must put it back again, to be compatible with fgets(). */      newline, so we must put it back again, to be compatible with fgets(). */
1318    
1319  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1320      if (isatty(fileno(f)))      if (isatty(fileno(f)))
1321        {        {
1322        size_t len;        size_t len;
# Line 264  for (;;) Line 1349  for (;;)
1349    else    else
1350      {      {
1351      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1352      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1353      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1354      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1355    
1356      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1357        {        {
# Line 297  return NULL;  /* Control never gets here Line 1382  return NULL;  /* Control never gets here
1382    
1383    
1384    
   
   
   
   
1385  /*************************************************  /*************************************************
1386  *          Read number from string               *  *          Read number from string               *
1387  *************************************************/  *************************************************/
# Line 317  Returns:        the unsigned long Line 1398  Returns:        the unsigned long
1398  */  */
1399    
1400  static int  static int
1401  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1402  {  {
1403  int result = 0;  int result = 0;
1404  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 328  return(result); Line 1409  return(result);
1409    
1410    
1411    
   
1412  /*************************************************  /*************************************************
1413  *            Convert UTF-8 string to value       *  *             Print one character                *
1414  *************************************************/  *************************************************/
1415    
1416  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
   
 Argument:  
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
   
 Returns:      >  0 => the number of bytes consumed  
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
1417    
1418  static int  static int pchar(int c, FILE *f)
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1419  {  {
1420  int c = *utf8bytes++;  if (PRINTOK(c))
1421  int d = c;    {
1422  int i, j, s;    if (f != NULL) fprintf(f, "%c", c);
1423      return 1;
1424      }
1425    
1426  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  if (c < 0x100)
1427    {    {
1428    if ((d & 0x80) == 0) break;    if (use_utf)
1429    d <<= 1;      {
1430        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1431        return 6;
1432        }
1433      else
1434        {
1435        if (f != NULL) fprintf(f, "\\x%02x", c);
1436        return 4;
1437        }
1438    }    }
1439    
1440  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1441  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  return (c <= 0x000000ff)? 6 :
1442           (c <= 0x00000fff)? 7 :
1443           (c <= 0x0000ffff)? 8 :
1444           (c <= 0x000fffff)? 9 : 10;
1445    }
1446    
 /* i now has a value in the range 1-5 */  
1447    
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
1448    
1449  for (j = 0; j < i; j++)  #ifdef SUPPORT_PCRE8
1450    {  /*************************************************
1451    c = *utf8bytes++;  *         Print 8-bit character string           *
1452    if ((c & 0xc0) != 0x80) return -(j+1);  *************************************************/
   s -= 6;  
   d |= (c & 0x3f) << s;  
   }  
1453    
1454  /* Check that encoding was the correct unique one */  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1455    If handed a NULL file, just counts chars without printing. */
1456    
1457  for (j = 0; j < utf8_table1_size; j++)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1458    if (d <= utf8_table1[j]) break;  {
1459  if (j != i) return -(i+1);  int c = 0;
1460    int yield = 0;
1461    
1462  /* Valid value */  if (length < 0)
1463      length = strlen((char *)p);
1464    
1465  *vptr = d;  while (length-- > 0)
1466  return i+1;    {
1467  }  #if !defined NOUTF
1468      if (use_utf)
1469        {
1470        int rc = utf82ord(p, &c);
1471        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1472          {
1473          length -= rc - 1;
1474          p += rc;
1475          yield += pchar(c, f);
1476          continue;
1477          }
1478        }
1479    #endif
1480      c = *p++;
1481      yield += pchar(c, f);
1482      }
1483    
1484    return yield;
1485    }
1486  #endif  #endif
1487    
1488    
1489    
1490    #ifdef SUPPORT_PCRE16
1491  /*************************************************  /*************************************************
1492  *       Convert character value to UTF-8         *  *    Find length of 0-terminated 16-bit string   *
1493  *************************************************/  *************************************************/
1494    
1495  /* This function takes an integer value in the range 0 - 0x7fffffff  static int strlen16(PCRE_SPTR16 p)
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 ord2utf8(int cvalue, uschar *utf8bytes)  
1496  {  {
1497  register int i, j;  int len = 0;
1498  for (i = 0; i < utf8_table1_size; i++)  while (*p++ != 0) len++;
1499    if (cvalue <= utf8_table1[i]) break;  return len;
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1500  }  }
1501    #endif  /* SUPPORT_PCRE16 */
 #endif  
   
1502    
1503    
1504    #ifdef SUPPORT_PCRE16
1505  /*************************************************  /*************************************************
1506  *             Print character string             *  *           Print 16-bit character string        *
1507  *************************************************/  *************************************************/
1508    
1509  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1510  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1511    
1512  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1513  {  {
 int c = 0;  
1514  int yield = 0;  int yield = 0;
1515    
1516    if (length < 0)
1517      length = strlen16(p);
1518    
1519  while (length-- > 0)  while (length-- > 0)
1520    {    {
1521  #if !defined NOUTF8    int c = *p++ & 0xffff;
1522    if (use_utf8)  #if !defined NOUTF
1523      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1524      {      {
1525      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
1526        if (d >= 0xDC00 && d < 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
1527        {        {
1528        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1529        p += rc;        length--;
1530        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
1531        }        }
1532      }      }
1533  #endif  #endif
1534      yield += pchar(c, f);
1535      }
1536    
1537     /* Not UTF-8, or malformed UTF-8  */  return yield;
1538    }
1539    #endif  /* SUPPORT_PCRE16 */
1540    
1541    c = *p++;  
1542    if (PRINTHEX(c))  
1543      {  #ifdef SUPPORT_PCRE8
1544      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1545      yield++;  *     Read a capture name (8-bit) and check it   *
1546      }  *************************************************/
1547    else  
1548      {  static pcre_uint8 *
1549      if (f != NULL) fprintf(f, "\\x%02x", c);  read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1550      yield += 4;  {
1551      }  pcre_uint8 *npp = *pp;
1552    while (isalnum(*p)) *npp++ = *p++;
1553    *npp++ = 0;
1554    *npp = 0;
1555    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1556      {
1557      fprintf(outfile, "no parentheses with name \"");
1558      PCHARSV(*pp, 0, -1, outfile);
1559      fprintf(outfile, "\"\n");
1560    }    }
1561    
1562  return yield;  *pp = npp;
1563    return p;
1564    }
1565    #endif  /* SUPPORT_PCRE8 */
1566    
1567    
1568    
1569    #ifdef SUPPORT_PCRE16
1570    /*************************************************
1571    *     Read a capture name (16-bit) and check it  *
1572    *************************************************/
1573    
1574    /* Note that the text being read is 8-bit. */
1575    
1576    static pcre_uint8 *
1577    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1578    {
1579    pcre_uint16 *npp = *pp;
1580    while (isalnum(*p)) *npp++ = *p++;
1581    *npp++ = 0;
1582    *npp = 0;
1583    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1584      {
1585      fprintf(outfile, "no parentheses with name \"");
1586      PCHARSV(*pp, 0, -1, outfile);
1587      fprintf(outfile, "\"\n");
1588      }
1589    *pp = npp;
1590    return p;
1591  }  }
1592    #endif  /* SUPPORT_PCRE16 */
1593    
1594    
1595    
# Line 515  if (callout_extra) Line 1618  if (callout_extra)
1618      else      else
1619        {        {
1620        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1621        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1622          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1623        fprintf(f, "\n");        fprintf(f, "\n");
1624        }        }
# Line 528  printed lengths of the substrings. */ Line 1631  printed lengths of the substrings. */
1631    
1632  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1633    
1634  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1635  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1636    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1637    
1638  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1639    
1640  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1641    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1642    
1643  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 571  fprintf(outfile, "%.*s", (cb->next_item_ Line 1674  fprintf(outfile, "%.*s", (cb->next_item_
1674  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1675  first_callout = 0;  first_callout = 0;
1676    
1677    if (cb->mark != last_callout_mark)
1678      {
1679      if (cb->mark == NULL)
1680        fprintf(outfile, "Latest Mark: <unset>\n");
1681      else
1682        {
1683        fprintf(outfile, "Latest Mark: ");
1684        PCHARSV(cb->mark, 0, -1, outfile);
1685        putc('\n', outfile);
1686        }
1687      last_callout_mark = cb->mark;
1688      }
1689    
1690  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1691    {    {
1692    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 590  return (cb->callout_number != callout_fa Line 1706  return (cb->callout_number != callout_fa
1706  *            Local malloc functions              *  *            Local malloc functions              *
1707  *************************************************/  *************************************************/
1708    
1709  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1710  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1711    show_malloc variable is set only during matching. */
1712    
1713  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1714  {  {
1715  void *block = malloc(size);  void *block = malloc(size);
1716  gotten_store = size;  gotten_store = size;
1717    if (first_gotten_store == 0) first_gotten_store = size;
1718  if (show_malloc)  if (show_malloc)
1719    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1720  return block;  return block;
# Line 609  if (show_malloc) Line 1727  if (show_malloc)
1727  free(block);  free(block);
1728  }  }
1729    
   
1730  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1731    
1732  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 632  free(block); Line 1749  free(block);
1749  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1750  *************************************************/  *************************************************/
1751    
1752  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1753    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1754    value, but the code is defensive.
1755    
1756    Arguments:
1757      re        compiled regex
1758      study     study data
1759      option    PCRE_INFO_xxx option
1760      ptr       where to put the data
1761    
1762    Returns:    0 when OK, < 0 on error
1763    */
1764    
1765  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static int
1766    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1767  {  {
1768  int rc;  int rc;
1769  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1770    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1771    #ifdef SUPPORT_PCRE16
1772      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1773    #else
1774      rc = PCRE_ERROR_BADMODE;
1775    #endif
1776    else
1777    #ifdef SUPPORT_PCRE8
1778      rc = pcre_fullinfo(re, study, option, ptr);
1779    #else
1780      rc = PCRE_ERROR_BADMODE;
1781    #endif
1782    
1783    if (rc < 0)
1784      {
1785      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1786        use_pcre16? "16" : "", option);
1787      if (rc == PCRE_ERROR_BADMODE)
1788        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1789          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1790      }
1791    
1792    return rc;
1793  }  }
1794    
1795    
1796    
1797  /*************************************************  /*************************************************
1798  *         Byte flipping function                 *  *             Swap byte functions                *
1799  *************************************************/  *************************************************/
1800    
1801  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1802  byteflip(unsigned long int value, int n)  value, respectively.
1803    
1804    Arguments:
1805      value        any number
1806    
1807    Returns:       the byte swapped value
1808    */
1809    
1810    static pcre_uint32
1811    swap_uint32(pcre_uint32 value)
1812  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1813  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1814         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1815         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1816         ((value & 0xff000000) >> 24);         (value >> 24);
1817  }  }
1818    
1819    static pcre_uint16
1820    swap_uint16(pcre_uint16 value)
1821    {
1822    return (value >> 8) | (value << 8);
1823    }
1824    
1825    
1826    
1827    /*************************************************
1828    *        Flip bytes in a compiled pattern        *
1829    *************************************************/
1830    
1831    /* This function is called if the 'F' option was present on a pattern that is
1832    to be written to a file. We flip the bytes of all the integer fields in the
1833    regex data block and the study block. In 16-bit mode this also flips relevant
1834    bytes in the pattern itself. This is to make it possible to test PCRE's
1835    ability to reload byte-flipped patterns, e.g. those compiled on a different
1836    architecture. */
1837    
1838    static void
1839    regexflip(pcre *ere, pcre_extra *extra)
1840    {
1841    REAL_PCRE *re = (REAL_PCRE *)ere;
1842    #ifdef SUPPORT_PCRE16
1843    int op;
1844    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1845    int length = re->name_count * re->name_entry_size;
1846    #ifdef SUPPORT_UTF
1847    BOOL utf = (re->options & PCRE_UTF16) != 0;
1848    BOOL utf16_char = FALSE;
1849    #endif /* SUPPORT_UTF */
1850    #endif /* SUPPORT_PCRE16 */
1851    
1852    /* Always flip the bytes in the main data block and study blocks. */
1853    
1854    re->magic_number = REVERSED_MAGIC_NUMBER;
1855    re->size = swap_uint32(re->size);
1856    re->options = swap_uint32(re->options);
1857    re->flags = swap_uint16(re->flags);
1858    re->top_bracket = swap_uint16(re->top_bracket);
1859    re->top_backref = swap_uint16(re->top_backref);
1860    re->first_char = swap_uint16(re->first_char);
1861    re->req_char = swap_uint16(re->req_char);
1862    re->name_table_offset = swap_uint16(re->name_table_offset);
1863    re->name_entry_size = swap_uint16(re->name_entry_size);
1864    re->name_count = swap_uint16(re->name_count);
1865    
1866    if (extra != NULL)
1867      {
1868      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1869      rsd->size = swap_uint32(rsd->size);
1870      rsd->flags = swap_uint32(rsd->flags);
1871      rsd->minlength = swap_uint32(rsd->minlength);
1872      }
1873    
1874    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1875    in the name table, if present, and then in the pattern itself. */
1876    
1877    #ifdef SUPPORT_PCRE16
1878    if (!use_pcre16) return;
1879    
1880    while(TRUE)
1881      {
1882      /* Swap previous characters. */
1883      while (length-- > 0)
1884        {
1885        *ptr = swap_uint16(*ptr);
1886        ptr++;
1887        }
1888    #ifdef SUPPORT_UTF
1889      if (utf16_char)
1890        {
1891        if ((ptr[-1] & 0xfc00) == 0xd800)
1892          {
1893          /* We know that there is only one extra character in UTF-16. */
1894          *ptr = swap_uint16(*ptr);
1895          ptr++;
1896          }
1897        }
1898      utf16_char = FALSE;
1899    #endif /* SUPPORT_UTF */
1900    
1901      /* Get next opcode. */
1902    
1903      length = 0;
1904      op = *ptr;
1905      *ptr++ = swap_uint16(op);
1906    
1907      switch (op)
1908        {
1909        case OP_END:
1910        return;
1911    
1912    #ifdef SUPPORT_UTF
1913        case OP_CHAR:
1914        case OP_CHARI:
1915        case OP_NOT:
1916        case OP_NOTI:
1917        case OP_STAR:
1918        case OP_MINSTAR:
1919        case OP_PLUS:
1920        case OP_MINPLUS:
1921        case OP_QUERY:
1922        case OP_MINQUERY:
1923        case OP_UPTO:
1924        case OP_MINUPTO:
1925        case OP_EXACT:
1926        case OP_POSSTAR:
1927        case OP_POSPLUS:
1928        case OP_POSQUERY:
1929        case OP_POSUPTO:
1930        case OP_STARI:
1931        case OP_MINSTARI:
1932        case OP_PLUSI:
1933        case OP_MINPLUSI:
1934        case OP_QUERYI:
1935        case OP_MINQUERYI:
1936        case OP_UPTOI:
1937        case OP_MINUPTOI:
1938        case OP_EXACTI:
1939        case OP_POSSTARI:
1940        case OP_POSPLUSI:
1941        case OP_POSQUERYI:
1942        case OP_POSUPTOI:
1943        case OP_NOTSTAR:
1944        case OP_NOTMINSTAR:
1945        case OP_NOTPLUS:
1946        case OP_NOTMINPLUS:
1947        case OP_NOTQUERY:
1948        case OP_NOTMINQUERY:
1949        case OP_NOTUPTO:
1950        case OP_NOTMINUPTO:
1951        case OP_NOTEXACT:
1952        case OP_NOTPOSSTAR:
1953        case OP_NOTPOSPLUS:
1954        case OP_NOTPOSQUERY:
1955        case OP_NOTPOSUPTO:
1956        case OP_NOTSTARI:
1957        case OP_NOTMINSTARI:
1958        case OP_NOTPLUSI:
1959        case OP_NOTMINPLUSI:
1960        case OP_NOTQUERYI:
1961        case OP_NOTMINQUERYI:
1962        case OP_NOTUPTOI:
1963        case OP_NOTMINUPTOI:
1964        case OP_NOTEXACTI:
1965        case OP_NOTPOSSTARI:
1966        case OP_NOTPOSPLUSI:
1967        case OP_NOTPOSQUERYI:
1968        case OP_NOTPOSUPTOI:
1969        if (utf) utf16_char = TRUE;
1970    #endif
1971        /* Fall through. */
1972    
1973        default:
1974        length = OP_lengths16[op] - 1;
1975        break;
1976    
1977        case OP_CLASS:
1978        case OP_NCLASS:
1979        /* Skip the character bit map. */
1980        ptr += 32/sizeof(pcre_uint16);
1981        length = 0;
1982        break;
1983    
1984        case OP_XCLASS:
1985        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1986        if (LINK_SIZE > 1)
1987          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1988            - (1 + LINK_SIZE + 1));
1989        else
1990          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1991    
1992        /* Reverse the size of the XCLASS instance. */
1993        *ptr = swap_uint16(*ptr);
1994        ptr++;
1995        if (LINK_SIZE > 1)
1996          {
1997          *ptr = swap_uint16(*ptr);
1998          ptr++;
1999          }
2000    
2001        op = *ptr;
2002        *ptr = swap_uint16(op);
2003        ptr++;
2004        if ((op & XCL_MAP) != 0)
2005          {
2006          /* Skip the character bit map. */
2007          ptr += 32/sizeof(pcre_uint16);
2008          length -= 32/sizeof(pcre_uint16);
2009          }
2010        break;
2011        }
2012      }
2013    /* Control should never reach here in 16 bit mode. */
2014    #endif /* SUPPORT_PCRE16 */
2015    }
2016    
2017    
2018    
# Line 665  return ((value & 0x000000ff) << 24) | Line 2021  return ((value & 0x000000ff) << 24) |
2021  *************************************************/  *************************************************/
2022    
2023  static int  static int
2024  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2025    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
2026    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
2027  {  {
# Line 680  for (;;) Line 2036  for (;;)
2036    {    {
2037    *limit = mid;    *limit = mid;
2038    
2039    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2040      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2041    
2042    if (count == errnumber)    if (count == errnumber)
# Line 725  Returns:    < 0, = 0, or > 0, according Line 2081  Returns:    < 0, = 0, or > 0, according
2081  */  */
2082    
2083  static int  static int
2084  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2085  {  {
2086  while (n--)  while (n--)
2087    {    {
# Line 741  return 0; Line 2097  return 0;
2097  *         Check newline indicator                *  *         Check newline indicator                *
2098  *************************************************/  *************************************************/
2099    
2100  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
2101  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
2102    
2103  Arguments:  Arguments:
2104    p           points after the leading '<'    p           points after the leading '<'
# Line 753  Returns:      appropriate PCRE_NEWLINE_x Line 2108  Returns:      appropriate PCRE_NEWLINE_x
2108  */  */
2109    
2110  static int  static int
2111  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2112  {  {
2113  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2114  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2115  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2116  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2117  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2118  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2119  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2120  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2121  return 0;  return 0;
2122  }  }
# Line 777  usage(void) Line 2132  usage(void)
2132  {  {
2133  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2134  printf("Input and output default to stdin and stdout.\n");  printf("Input and output default to stdin and stdout.\n");
2135  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2136  printf("If input is a terminal, readline() is used to read from it.\n");  printf("If input is a terminal, readline() is used to read from it.\n");
2137  #else  #else
2138  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2139  #endif  #endif
2140  printf("\nOptions:\n");  printf("\nOptions:\n");
2141  printf("  -b       show compiled code (bytecode)\n");  #ifdef SUPPORT_PCRE16
2142    printf("  -16      use the 16-bit library\n");
2143    #endif
2144    printf("  -b       show compiled code\n");
2145  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2146    printf("  -C arg   show a specific compile-time option\n");
2147    printf("           and exit with its value. The arg can be:\n");
2148    printf("     linksize     internal link size [2, 3, 4]\n");
2149    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2150    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2151    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2152    printf("     ucp          Unicode Properties supported [0, 1]\n");
2153    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2154    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2155  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2156  #if !defined NODFA  #if !defined NODFA
2157  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
# Line 799  printf("  -p       use POSIX interface\n Line 2166  printf("  -p       use POSIX interface\n
2166  #endif  #endif
2167  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
2168  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2169  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
2170           "  -s+      force each pattern to be studied, using JIT if available\n"
2171           "  -s++     ditto, verifying when JIT was actually used\n"
2172           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2173           "             where 1 <= n <= 7 selects JIT options\n"
2174           "  -s++n    ditto, verifying when JIT was actually used\n"
2175         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2176  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2177  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 819  options, followed by a set of test data, Line 2191  options, followed by a set of test data,
2191  int main(int argc, char **argv)  int main(int argc, char **argv)
2192  {  {
2193  FILE *infile = stdin;  FILE *infile = stdin;
2194    const char *version;
2195  int options = 0;  int options = 0;
2196  int study_options = 0;  int study_options = 0;
2197  int default_find_match_limit = FALSE;  int default_find_match_limit = FALSE;
# Line 827  int timeit = 0; Line 2200  int timeit = 0;
2200  int timeitm = 0;  int timeitm = 0;
2201  int showinfo = 0;  int showinfo = 0;
2202  int showstore = 0;  int showstore = 0;
2203    int force_study = -1;
2204    int force_study_options = 0;
2205  int quiet = 0;  int quiet = 0;
2206  int size_offsets = 45;  int size_offsets = 45;
2207  int size_offsets_max;  int size_offsets_max;
2208  int *offsets = NULL;  int *offsets = NULL;
 #if !defined NOPOSIX  
 int posix = 0;  
 #endif  
2209  int debug = 0;  int debug = 0;
2210  int done = 0;  int done = 0;
2211  int all_use_dfa = 0;  int all_use_dfa = 0;
2212    int verify_jit = 0;
2213  int yield = 0;  int yield = 0;
2214  int stack_size;  int stack_size;
2215    
2216  /* These vectors store, end-to-end, a list of captured substring names. Assume  #if !defined NOPOSIX
2217  that 1024 is plenty long enough for the few names we'll be testing. */  int posix = 0;
2218    #endif
2219    #if !defined NODFA
2220    int *dfa_workspace = NULL;
2221    #endif
2222    
2223  uschar copynames[1024];  pcre_jit_stack *jit_stack = NULL;
 uschar getnames[1024];  
2224    
2225  uschar *copynamesptr;  /* These vectors store, end-to-end, a list of zero-terminated captured
2226  uschar *getnamesptr;  substring names, each list itself being terminated by an empty name. Assume
2227    that 1024 is plenty long enough for the few names we'll be testing. It is
2228    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2229    for the actual memory, to ensure alignment. */
2230    
2231    pcre_uint16 copynames[1024];
2232    pcre_uint16 getnames[1024];
2233    
2234    #ifdef SUPPORT_PCRE16
2235    pcre_uint16 *cn16ptr;
2236    pcre_uint16 *gn16ptr;
2237    #endif
2238    
2239  /* Get buffers from malloc() so that Electric Fence will check their misuse  #ifdef SUPPORT_PCRE8
2240  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2241    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2242    pcre_uint8 *cn8ptr;
2243    pcre_uint8 *gn8ptr;
2244    #endif
2245    
2246  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2247  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
2248  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
2249    
2250    buffer = (pcre_uint8 *)malloc(buffer_size);
2251    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2252    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2253    
2254  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2255    
# Line 869  it set 0x8000, but then I was advised th Line 2264  it set 0x8000, but then I was advised th
2264  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2265  #endif  #endif
2266    
2267    /* Get the version number: both pcre_version() and pcre16_version() give the
2268    same answer. We just need to ensure that we call one that is available. */
2269    
2270    #ifdef SUPPORT_PCRE8
2271    version = pcre_version();
2272    #else
2273    version = pcre16_version();
2274    #endif
2275    
2276  /* Scan options */  /* Scan options */
2277    
2278  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2279    {    {
2280    unsigned char *endptr;    pcre_uint8 *endptr;
2281      char *arg = argv[op];
2282    
2283      if (strcmp(arg, "-m") == 0) showstore = 1;
2284      else if (strcmp(arg, "-s") == 0) force_study = 0;
2285    
2286    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    else if (strncmp(arg, "-s+", 3) == 0)
2287      showstore = 1;      {
2288    else if (strcmp(argv[op], "-q") == 0) quiet = 1;      arg += 3;
2289    else if (strcmp(argv[op], "-b") == 0) debug = 1;      if (*arg == '+') { arg++; verify_jit = TRUE; }
2290    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;      force_study = 1;
2291    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;      if (*arg == 0)
2292    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;        force_study_options = jit_study_bits[6];
2293        else if (*arg >= '1' && *arg <= '7')
2294          force_study_options = jit_study_bits[*arg - '1'];
2295        else goto BAD_ARG;
2296        }
2297      else if (strcmp(arg, "-16") == 0)
2298        {
2299    #ifdef SUPPORT_PCRE16
2300        use_pcre16 = 1;
2301    #else
2302        printf("** This version of PCRE was built without 16-bit support\n");
2303        exit(1);
2304    #endif
2305        }
2306      else if (strcmp(arg, "-q") == 0) quiet = 1;
2307      else if (strcmp(arg, "-b") == 0) debug = 1;
2308      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2309      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2310      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2311  #if !defined NODFA  #if !defined NODFA
2312    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2313  #endif  #endif
2314    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2315        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2316          *endptr == 0))          *endptr == 0))
2317      {      {
2318      op++;      op++;
2319      argc--;      argc--;
2320      }      }
2321    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2322      {      {
2323      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2324      int temp;      int temp;
2325      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2326                       *endptr == 0))                       *endptr == 0))
2327        {        {
2328        timeitm = temp;        timeitm = temp;
# Line 906  while (argc > 1 && argv[op][0] == '-') Line 2332  while (argc > 1 && argv[op][0] == '-')
2332      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2333      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2334      }      }
2335    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2336        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2337          *endptr == 0))          *endptr == 0))
2338      {      {
2339  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2340      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2341      exit(1);      exit(1);
2342  #else  #else
# Line 929  while (argc > 1 && argv[op][0] == '-') Line 2355  while (argc > 1 && argv[op][0] == '-')
2355  #endif  #endif
2356      }      }
2357  #if !defined NOPOSIX  #if !defined NOPOSIX
2358    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2359  #endif  #endif
2360    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2361      {      {
2362      int rc;      int rc;
2363      unsigned long int lrc;      unsigned long int lrc;
2364      printf("PCRE version %s\n", pcre_version());  
2365        if (argc > 2)
2366          {
2367          if (strcmp(argv[op + 1], "linksize") == 0)
2368            {
2369            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2370            printf("%d\n", rc);
2371            yield = rc;
2372            goto EXIT;
2373            }
2374          if (strcmp(argv[op + 1], "pcre8") == 0)
2375            {
2376    #ifdef SUPPORT_PCRE8
2377            printf("1\n");
2378            yield = 1;
2379    #else
2380            printf("0\n");
2381            yield = 0;
2382    #endif
2383            goto EXIT;
2384            }
2385          if (strcmp(argv[op + 1], "pcre16") == 0)
2386            {
2387    #ifdef SUPPORT_PCRE16
2388            printf("1\n");
2389            yield = 1;
2390    #else
2391            printf("0\n");
2392            yield = 0;
2393    #endif
2394            goto EXIT;
2395            }
2396          if (strcmp(argv[op + 1], "utf") == 0)
2397            {
2398    #ifdef SUPPORT_PCRE8
2399            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2400            printf("%d\n", rc);
2401            yield = rc;
2402    #else
2403            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2404            printf("%d\n", rc);
2405            yield = rc;
2406    #endif
2407            goto EXIT;
2408            }
2409          if (strcmp(argv[op + 1], "ucp") == 0)
2410            {
2411            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2412            printf("%d\n", rc);
2413            yield = rc;
2414            goto EXIT;
2415            }
2416          if (strcmp(argv[op + 1], "jit") == 0)
2417            {
2418            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2419            printf("%d\n", rc);
2420            yield = rc;
2421            goto EXIT;
2422            }
2423          if (strcmp(argv[op + 1], "newline") == 0)
2424            {
2425            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2426            /* Note that these values are always the ASCII values, even
2427            in EBCDIC environments. CR is 13 and NL is 10. */
2428            printf("%s\n", (rc == 13)? "CR" :
2429              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2430              (rc == -2)? "ANYCRLF" :
2431              (rc == -1)? "ANY" : "???");
2432            goto EXIT;
2433            }
2434          printf("Unknown -C option: %s\n", argv[op + 1]);
2435          goto EXIT;
2436          }
2437    
2438        printf("PCRE version %s\n", version);
2439      printf("Compiled with\n");      printf("Compiled with\n");
2440    
2441    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2442    are set, either both UTFs are supported or both are not supported. */
2443    
2444    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2445        printf("  8-bit and 16-bit support\n");
2446        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2447        if (rc)
2448          printf("  UTF-8 and UTF-16 support\n");
2449        else
2450          printf("  No UTF-8 or UTF-16 support\n");
2451    #elif defined SUPPORT_PCRE8
2452        printf("  8-bit support only\n");
2453      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2454      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2455      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2456        printf("  16-bit support only\n");
2457        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2458        printf("  %sUTF-16 support\n", rc? "" : "No ");
2459    #endif
2460    
2461        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2462      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2463      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2464        if (rc)
2465          {
2466          const char *arch;
2467          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2468          printf("  Just-in-time compiler support: %s\n", arch);
2469          }
2470        else
2471          printf("  No just-in-time compiler support\n");
2472        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2473      /* Note that these values are always the ASCII values, even      /* Note that these values are always the ASCII values, even
2474      in EBCDIC environments. CR is 13 and NL is 10. */      in EBCDIC environments. CR is 13 and NL is 10. */
2475      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2476        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2477        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
2478        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2479      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2480      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2481                                       "all Unicode newlines");                                       "all Unicode newlines");
2482      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2483      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2484      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2485      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2486      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2487      printf("  Default match limit = %ld\n", lrc);      printf("  Default match limit = %ld\n", lrc);
2488      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2489      printf("  Default recursion depth limit = %ld\n", lrc);      printf("  Default recursion depth limit = %ld\n", lrc);
2490      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2491      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
2492        if (showstore)
2493          {
2494          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2495          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2496          }
2497        printf("\n");
2498      goto EXIT;      goto EXIT;
2499      }      }
2500    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
2501             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
2502      {      {
2503      usage();      usage();
2504      goto EXIT;      goto EXIT;
2505      }      }
2506    else    else
2507      {      {
2508      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
2509        printf("** Unknown or malformed option %s\n", arg);
2510      usage();      usage();
2511      yield = 1;      yield = 1;
2512      goto EXIT;      goto EXIT;
# Line 1018  if (argc > 2) Line 2553  if (argc > 2)
2553    
2554  /* Set alternative malloc function */  /* Set alternative malloc function */
2555    
2556    #ifdef SUPPORT_PCRE8
2557  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2558  pcre_free = new_free;  pcre_free = new_free;
2559  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2560  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2561    #endif
2562    
2563    #ifdef SUPPORT_PCRE16
2564    pcre16_malloc = new_malloc;
2565    pcre16_free = new_free;
2566    pcre16_stack_malloc = stack_malloc;
2567    pcre16_stack_free = stack_free;
2568    #endif
2569    
2570  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2571    
2572  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2573    
2574  /* Main loop */  /* Main loop */
2575    
# Line 1040  while (!done) Line 2584  while (!done)
2584  #endif  #endif
2585    
2586    const char *error;    const char *error;
2587    unsigned char *markptr;    pcre_uint8 *markptr;
2588    unsigned char *p, *pp, *ppp;    pcre_uint8 *p, *pp, *ppp;
2589    unsigned char *to_file = NULL;    pcre_uint8 *to_file = NULL;
2590    const unsigned char *tables = NULL;    const pcre_uint8 *tables = NULL;
2591      unsigned long int get_options;
2592    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2593    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2594      int do_allcaps = 0;
2595    int do_mark = 0;    int do_mark = 0;
2596    int do_study = 0;    int do_study = 0;
2597      int no_force_study = 0;
2598    int do_debug = debug;    int do_debug = debug;
2599    int do_G = 0;    int do_G = 0;
2600    int do_g = 0;    int do_g = 0;
2601    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2602    int do_showrest = 0;    int do_showrest = 0;
2603      int do_showcaprest = 0;
2604    int do_flip = 0;    int do_flip = 0;
2605    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2606    
2607    use_utf8 = 0;  #if !defined NODFA
2608      int dfa_matched = 0;
2609    #endif
2610    
2611      use_utf = 0;
2612    debug_lengths = 1;    debug_lengths = 1;
2613    
2614    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1071  while (!done) Line 2623  while (!done)
2623    
2624    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2625      {      {
2626      unsigned long int magic, get_options;      pcre_uint32 magic;
2627      uschar sbuf[8];      pcre_uint8 sbuf[8];
2628      FILE *f;      FILE *f;
2629    
2630      p++;      p++;
2631        if (*p == '!')
2632          {
2633          do_debug = TRUE;
2634          do_showinfo = TRUE;
2635          p++;
2636          }
2637    
2638      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2639      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2640      *pp = 0;      *pp = 0;
# Line 1087  while (!done) Line 2646  while (!done)
2646        continue;        continue;
2647        }        }
2648    
2649        first_gotten_store = 0;
2650      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2651    
2652      true_size =      true_size =
# Line 1094  while (!done) Line 2654  while (!done)
2654      true_study_size =      true_study_size =
2655        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2656    
2657      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
2658      regex_gotten_store = gotten_store;      if (re == NULL)
2659          {
2660          printf("** Failed to get %d bytes of memory for pcre object\n",
2661            (int)true_size);
2662          yield = 1;
2663          goto EXIT;
2664          }
2665        regex_gotten_store = first_gotten_store;
2666    
2667      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2668    
2669      magic = ((real_pcre *)re)->magic_number;      magic = ((REAL_PCRE *)re)->magic_number;
2670      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2671        {        {
2672        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2673          {          {
2674          do_flip = 1;          do_flip = 1;
2675          }          }
2676        else        else
2677          {          {
2678          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2679            new_free(re);
2680          fclose(f);          fclose(f);
2681          continue;          continue;
2682          }          }
2683        }        }
2684    
2685      fprintf(outfile, "Compiled regex%s loaded from %s\n",      /* We hide the byte-invert info for little and big endian tests. */
2686        do_flip? " (byte-inverted)" : "", p);      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2687          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
     /* Need to know if UTF-8 for printing data strings */  
2688    
2689      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      /* Now see if there is any following study data. */
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
   
     /* Now see if there is any following study data */  
2690    
2691      if (true_study_size != 0)      if (true_study_size != 0)
2692        {        {
# Line 1138  while (!done) Line 2702  while (!done)
2702          {          {
2703          FAIL_READ:          FAIL_READ:
2704          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2705          if (extra != NULL) new_free(extra);          if (extra != NULL)
2706          if (re != NULL) new_free(re);            {
2707              PCRE_FREE_STUDY(extra);
2708              }
2709            new_free(re);
2710          fclose(f);          fclose(f);
2711          continue;          continue;
2712          }          }
# Line 1148  while (!done) Line 2715  while (!done)
2715        }        }
2716      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2717    
2718        /* Flip the necessary bytes. */
2719        if (do_flip)
2720          {
2721          int rc;
2722          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2723          if (rc == PCRE_ERROR_BADMODE)
2724            {
2725            /* Simulate the result of the function call below. */
2726            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2727              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2728            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2729              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2730            new_free(re);
2731            fclose(f);
2732            continue;
2733            }
2734          }
2735    
2736        /* Need to know if UTF-8 for printing data strings. */
2737    
2738        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2739          {
2740          new_free(re);
2741          fclose(f);
2742          continue;
2743          }
2744        use_utf = (get_options & PCRE_UTF8) != 0;
2745    
2746      fclose(f);      fclose(f);
2747      goto SHOW_INFO;      goto SHOW_INFO;
2748      }      }
2749    
2750    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2751    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2752    
2753    delimiter = *p++;    delimiter = *p++;
2754    
# Line 1164  while (!done) Line 2759  while (!done)
2759      }      }
2760    
2761    pp = p;    pp = p;
2762    poffset = p - buffer;    poffset = (int)(p - buffer);
2763    
2764    for(;;)    for(;;)
2765      {      {
# Line 1204  while (!done) Line 2799  while (!done)
2799    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2800    
2801    options = 0;    options = 0;
2802    study_options = 0;    study_options = force_study_options;
2803    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
2804    
2805    while (*pp != 0)    while (*pp != 0)
# Line 1218  while (!done) Line 2813  while (!done)
2813        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2814        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2815    
2816        case '+': do_showrest = 1; break;        case '+':
2817          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2818          break;
2819    
2820          case '=': do_allcaps = 1; break;
2821        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2822        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2823        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1232  while (!done) Line 2831  while (!done)
2831        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2832        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2833    
2834  #if !defined NOPOSIX  #if !defined NOPOSIX
2835        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2836  #endif  #endif
2837    
2838          case 'S':
2839          do_study = 1;
2840          for (;;)
2841            {
2842            switch (*pp++)
2843              {
2844              case 'S':
2845              do_study = 0;
2846              no_force_study = 1;
2847              break;
2848    
2849              case '!':
2850              study_options |= PCRE_STUDY_EXTRA_NEEDED;
2851              break;
2852    
2853              case '+':
2854              if (*pp == '+')
2855                {
2856                verify_jit = TRUE;
2857                pp++;
2858                }
2859              if (*pp >= '1' && *pp <= '7')
2860                study_options |= jit_study_bits[*pp++ - '1'];
2861              else
2862                study_options |= jit_study_bits[6];
2863              break;
2864    
2865              case '-':
2866              study_options &= ~PCRE_STUDY_ALLJIT;
2867              break;
2868    
2869              default:
2870              pp--;
2871              goto ENDLOOP;
2872              }
2873            }
2874          ENDLOOP:
2875          break;
2876    
       case 'S': do_study = 1; break;  
2877        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2878          case 'W': options |= PCRE_UCP; break;
2879        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2880          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2881        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2882        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2883        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2884    
2885          case 'T':
2886          switch (*pp++)
2887            {
2888            case '0': tables = tables0; break;
2889            case '1': tables = tables1; break;
2890    
2891            case '\r':
2892            case '\n':
2893            case ' ':
2894            case 0:
2895            fprintf(outfile, "** Missing table number after /T\n");
2896            goto SKIP_DATA;
2897    
2898            default:
2899            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2900            goto SKIP_DATA;
2901            }
2902          break;
2903    
2904        case 'L':        case 'L':
2905        ppp = pp;        ppp = pp;
2906        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1255  while (!done) Line 2913  while (!done)
2913          goto SKIP_DATA;          goto SKIP_DATA;
2914          }          }
2915        locale_set = 1;        locale_set = 1;
2916        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2917        pp = ppp;        pp = ppp;
2918        break;        break;
2919    
# Line 1268  while (!done) Line 2926  while (!done)
2926    
2927        case '<':        case '<':
2928          {          {
2929          if (strncmp((char *)pp, "JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2930            {            {
2931            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
2932            pp += 3;            pp += 3;
# Line 1296  while (!done) Line 2954  while (!done)
2954    
2955    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2956    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2957    local character tables. */    local character tables. Neither does it have 16-bit support. */
2958    
2959  #if !defined NOPOSIX  #if !defined NOPOSIX
2960    if (posix || do_posix)    if (posix || do_posix)
# Line 1309  while (!done) Line 2967  while (!done)
2967      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2968      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2969      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2970        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2971      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2972    
2973        first_gotten_store = 0;
2974      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2975    
2976      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1330  while (!done) Line 2990  while (!done)
2990  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2991    
2992      {      {
2993      unsigned long int get_options;      /* In 16-bit mode, convert the input. */
2994    
2995    #ifdef SUPPORT_PCRE16
2996        if (use_pcre16)
2997          {
2998          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2999            {
3000            case -1:
3001            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3002              "converted to UTF-16\n");
3003            goto SKIP_DATA;
3004    
3005            case -2:
3006            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3007              "cannot be converted to UTF-16\n");
3008            goto SKIP_DATA;
3009    
3010            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3011            fprintf(outfile, "**Failed: character value greater than 0xffff "
3012              "cannot be converted to 16-bit in non-UTF mode\n");
3013            goto SKIP_DATA;
3014    
3015            default:
3016            break;
3017            }
3018          p = (pcre_uint8 *)buffer16;
3019          }
3020    #endif
3021    
3022        /* Compile many times when timing */
3023    
3024      if (timeit > 0)      if (timeit > 0)
3025        {        {
# Line 1339  while (!done) Line 3028  while (!done)
3028        clock_t start_time = clock();        clock_t start_time = clock();
3029        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
3030          {          {
3031          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3032          if (re != NULL) free(re);          if (re != NULL) free(re);
3033          }          }
3034        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1348  while (!done) Line 3037  while (!done)
3037            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
3038        }        }
3039    
3040      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
3041        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3042    
3043      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
3044      if non-interactive. */      if non-interactive. */
# Line 1379  while (!done) Line 3069  while (!done)
3069      within the regex; check for this so that we know how to process the data      within the regex; check for this so that we know how to process the data
3070      lines. */      lines. */
3071    
3072      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3073      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;        goto SKIP_DATA;
3074        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
     /* Print information if required. There are now two info-returning  
     functions. The old one has a limited interface and returns only limited  
     data. Check that it agrees with the newer one. */  
   
     if (log_store)  
       fprintf(outfile, "Memory allocation (code space): %d\n",  
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
3075    
3076      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3077      and remember the store that was got. */      and remember the store that was got. */
3078    
3079      true_size = ((real_pcre *)re)->size;      true_size = ((REAL_PCRE *)re)->size;
3080      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3081    
3082        /* Output code size information if requested */
3083    
3084      /* If /S was present, study the regexp to generate additional info to      if (log_store)
3085      help with the matching. */        fprintf(outfile, "Memory allocation (code space): %d\n",
3086            (int)(first_gotten_store -
3087                  sizeof(REAL_PCRE) -
3088                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3089    
3090        /* If -s or /S was present, study the regex to generate additional info to
3091        help with the matching, unless the pattern has the SS option, which
3092        suppresses the effect of /S (used for a few test patterns where studying is
3093        never sensible). */
3094    
3095      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
3096        {        {
3097        if (timeit > 0)        if (timeit > 0)
3098          {          {
# Line 1409  while (!done) Line 3100  while (!done)
3100          clock_t time_taken;          clock_t time_taken;
3101          clock_t start_time = clock();          clock_t start_time = clock();
3102          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3103            extra = pcre_study(re, study_options, &error);            {
3104              PCRE_STUDY(extra, re, study_options, &error);
3105              }
3106          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3107          if (extra != NULL) free(extra);          if (extra != NULL)
3108              {
3109              PCRE_FREE_STUDY(extra);
3110              }
3111          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3112            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3113              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3114          }          }
3115        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options, &error);
3116        if (error != NULL)        if (error != NULL)
3117          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3118        else if (extra != NULL)        else if (extra != NULL)
3119            {
3120          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3121            if (log_store)
3122              {
3123              size_t jitsize;
3124              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3125                  jitsize != 0)
3126                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3127              }
3128            }
3129        }        }
3130    
3131      /* If /K was present, we set up for handling MARK data. */      /* If /K was present, we set up for handling MARK data. */
# Line 1436  while (!done) Line 3141  while (!done)
3141        extra->flags |= PCRE_EXTRA_MARK;        extra->flags |= PCRE_EXTRA_MARK;
3142        }        }
3143    
3144      /* If the 'F' option was present, we flip the bytes of all the integer      /* Extract and display information from the compiled data if required. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
   
     if (do_flip)  
       {  
       real_pcre *rre = (real_pcre *)re;  
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));  
         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));  
         }  
       }  
   
     /* Extract information from the compiled data if required */  
3145    
3146      SHOW_INFO:      SHOW_INFO:
3147    
3148      if (do_debug)      if (do_debug)
3149        {        {
3150        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3151        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3152        }        }
3153    
3154      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1488  while (!done) Line 3156  while (!done)
3156      if (do_showinfo)      if (do_showinfo)
3157        {        {
3158        unsigned long int all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3159        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3160          hascrorlf;          hascrorlf, maxlookbehind;
3161        int nameentrysize, namecount;        int nameentrysize, namecount;
3162        const uschar *nametable;        const pcre_uint8 *nametable;
3163    
3164        new_info(re, NULL, PCRE_INFO_SIZE, &size);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3165        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3166        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3167        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3168        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3169        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3170        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3171        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3172        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3173        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3174        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3175              new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3176  #if !defined NOINFOCHECK            != 0)
3177        old_count = pcre_info(re, &old_options, &old_first_char);          goto SKIP_DATA;
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3178    
3179        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3180          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1541  while (!done) Line 3189  while (!done)
3189          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3190          while (namecount-- > 0)          while (namecount-- > 0)
3191            {            {
3192            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3193              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3194              GET2(nametable, 0));  #else
3195              int imm2_size = IMM2_SIZE;
3196    #endif
3197              int length = (int)STRLEN(nametable + imm2_size);
3198              fprintf(outfile, "  ");
3199              PCHARSV(nametable, imm2_size, length, outfile);
3200              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3201    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3202              fprintf(outfile, "%3d\n", use_pcre16?
3203                 (int)(((PCRE_SPTR16)nametable)[0])
3204                :((int)nametable[0] << 8) | (int)nametable[1]);
3205              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3206    #else
3207              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3208    #ifdef SUPPORT_PCRE8
3209            nametable += nameentrysize;            nametable += nameentrysize;
3210    #else
3211              nametable += nameentrysize * 2;
3212    #endif
3213    #endif
3214            }            }
3215          }          }
3216    
3217        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3218        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3219    
3220        all_options = ((real_pcre *)re)->options;        all_options = ((REAL_PCRE *)re)->options;
3221        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3222    
3223        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3224          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3225            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3226            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3227            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1568  while (!done) Line 3234  while (!done)
3234            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3235            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3236            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3237            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3238            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3239              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3240              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3241            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3242    
3243        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1610  while (!done) Line 3278  while (!done)
3278          }          }
3279        else        else
3280          {          {
3281          int ch = first_char & 255;          const char *caseless =
3282          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3283            "" : " (caseless)";            "" : " (caseless)";
3284          if (PRINTHEX(ch))  
3285            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3286              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3287          else          else
3288            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3289              fprintf(outfile, "First char = ");
3290              pchar(first_char, outfile);
3291              fprintf(outfile, "%s\n", caseless);
3292              }
3293          }          }
3294    
3295        if (need_char < 0)        if (need_char < 0)
# Line 1625  while (!done) Line 3298  while (!done)
3298          }          }
3299        else        else
3300          {          {
3301          int ch = need_char & 255;          const char *caseless =
3302          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3303            "" : " (caseless)";            "" : " (caseless)";
3304          if (PRINTHEX(ch))  
3305            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3306              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3307          else          else
3308            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3309              fprintf(outfile, "Need char = ");
3310              pchar(need_char, outfile);
3311              fprintf(outfile, "%s\n", caseless);
3312              }
3313          }          }
3314    
3315          if (maxlookbehind > 0)
3316            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3317    
3318        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3319        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3320        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
3321        flipped.) */        flipped.) If study was forced by an external -s, don't show this
3322          information unless -i or -d was also present. This means that, except
3323          when auto-callouts are involved, the output from runs with and without
3324          -s should be identical. */
3325    
3326        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3327          {          {
3328          if (extra == NULL)          if (extra == NULL)
3329            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3330          else          else
3331            {            {
3332            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3333            int minlength;            int minlength;
3334    
3335            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3336            fprintf(outfile, "Subject length lower bound = %d\n", minlength);              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3337    
3338            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
           if (start_bits == NULL)  
             fprintf(outfile, "No set of starting bytes\n");  
           else  
3339              {              {
3340              int i;              if (start_bits == NULL)
3341              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3342              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3343                {                {
3344                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3345                  int c = 24;
3346                  fprintf(outfile, "Starting byte set: ");
3347                  for (i = 0; i < 256; i++)
3348                  {                  {
3349                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
3350                    {                    {
3351                    fprintf(outfile, "\n  ");                    if (c > 75)
3352                    c = 2;                      {
3353                    }                      fprintf(outfile, "\n  ");
3354                  if (PRINTHEX(i) && i != ' ')                      c = 2;
3355                    {                      }
3356                    fprintf(outfile, "%c ", i);                    if (PRINTOK(i) && i != ' ')
3357                    c += 2;                      {
3358                    }                      fprintf(outfile, "%c ", i);
3359                  else                      c += 2;
3360                    {                      }
3361                    fprintf(outfile, "\\x%02x ", i);                    else
3362                    c += 5;                      {
3363                        fprintf(outfile, "\\x%02x ", i);
3364                        c += 5;
3365                        }
3366                    }                    }
3367                  }                  }
3368                  fprintf(outfile, "\n");
3369                }                }
3370              fprintf(outfile, "\n");              }
3371              }
3372    
3373            /* Show this only if the JIT was set by /S, not by -s. */
3374    
3375            if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3376                (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3377              {
3378              int jit;
3379              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3380                {
3381                if (jit)
3382                  fprintf(outfile, "JIT study was successful\n");
3383                else
3384    #ifdef SUPPORT_JIT
3385                  fprintf(outfile, "JIT study was not successful\n");
3386    #else
3387                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3388    #endif
3389              }              }
3390            }            }
3391          }          }
# Line 1699  while (!done) Line 3404  while (!done)
3404          }          }
3405        else        else
3406          {          {
3407          uschar sbuf[8];          pcre_uint8 sbuf[8];
3408          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3409          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3410          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3411          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3412            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3413          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3414          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3415          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3416          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3417            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3418    
3419          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3420              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1717  while (!done) Line 3423  while (!done)
3423            }            }
3424          else          else
3425            {            {
3426            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3427    
3428              /* If there is study data, write it. */
3429    
3430            if (extra != NULL)            if (extra != NULL)
3431              {              {
3432              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1727  while (!done) Line 3436  while (!done)
3436                  strerror(errno));                  strerror(errno));
3437                }                }
3438              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
3439              }              }
3440            }            }
3441          fclose(f);          fclose(f);
3442          }          }
3443    
3444        new_free(re);        new_free(re);
3445        if (extra != NULL) new_free(extra);        if (extra != NULL)
3446        if (tables != NULL) new_free((void *)tables);          {
3447            PCRE_FREE_STUDY(extra);
3448            }
3449          if (locale_set)
3450            {
3451            new_free((void *)tables);
3452            setlocale(LC_CTYPE, "C");
3453            locale_set = 0;
3454            }
3455        continue;  /* With next regex */        continue;  /* With next regex */
3456        }        }
3457      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1744  while (!done) Line 3460  while (!done)
3460    
3461    for (;;)    for (;;)
3462      {      {
3463      uschar *q;      pcre_uint8 *q;
3464      uschar *bptr;      pcre_uint8 *bptr;
3465      int *use_offsets = offsets;      int *use_offsets = offsets;
3466      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3467      int callout_data = 0;      int callout_data = 0;
# Line 1757  while (!done) Line 3473  while (!done)
3473      int getlist = 0;      int getlist = 0;
3474      int gmatched = 0;      int gmatched = 0;
3475      int start_offset = 0;      int start_offset = 0;
3476        int start_offset_sign = 1;
3477      int g_notempty = 0;      int g_notempty = 0;
3478      int use_dfa = 0;      int use_dfa = 0;
3479    
     options = 0;  
   
3480      *copynames = 0;      *copynames = 0;
3481      *getnames = 0;      *getnames = 0;
3482    
3483      copynamesptr = copynames;  #ifdef SUPPORT_PCRE16
3484      getnamesptr = getnames;      cn16ptr = copynames;
3485        gn16ptr = getnames;
3486    #endif
3487    #ifdef SUPPORT_PCRE8
3488        cn8ptr = copynames8;
3489        gn8ptr = getnames8;
3490    #endif
3491    
3492      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3493      first_callout = 1;      first_callout = 1;
3494        last_callout_mark = NULL;
3495      callout_extra = 0;      callout_extra = 0;
3496      callout_count = 0;      callout_count = 0;
3497      callout_fail_count = 999999;      callout_fail_count = 999999;
3498      callout_fail_id = -1;      callout_fail_id = -1;
3499      show_malloc = 0;      show_malloc = 0;
3500        options = 0;
3501    
3502      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3503        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 1784  while (!done) Line 3507  while (!done)
3507        {        {
3508        if (extend_inputline(infile, buffer + len, "data> ") == NULL)        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3509          {          {
3510          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
3511              {
3512              fprintf(outfile, "\n");
3513              break;
3514              }
3515          done = 1;          done = 1;
3516          goto CONTINUE;          goto CONTINUE;
3517          }          }
# Line 1806  while (!done) Line 3533  while (!done)
3533        int i = 0;        int i = 0;
3534        int n = 0;        int n = 0;
3535    
3536        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3537          In non-UTF mode, allow the value of the byte to fall through to later,
3538          where values greater than 127 are turned into UTF-8 when running in
3539          16-bit mode. */
3540    
3541          if (c != '\\')
3542            {
3543            if (use_utf)
3544              {
3545              *q++ = c;
3546              continue;
3547              }
3548            }
3549    
3550          /* Handle backslash escapes */
3551    
3552          else switch ((c = *p++))
3553          {          {
3554          case 'a': c =    7; break;          case 'a': c =    7; break;
3555          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 1822  while (!done) Line 3565  while (!done)
3565          c -= '0';          c -= '0';
3566          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3567            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3568          break;          break;
3569    
3570          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3571          if (*p == '{')          if (*p == '{')
3572            {            {
3573            unsigned char *pt = p;            pcre_uint8 *pt = p;
3574            c = 0;            c = 0;
3575            while (isxdigit(*(++pt)))  
3576              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3577              when isxdigit() is a macro that refers to its argument more than
3578              once. This is banned by the C Standard, but apparently happens in at
3579              least one MacOS environment. */
3580    
3581              for (pt++; isxdigit(*pt); pt++)
3582                {
3583                if (++i == 9)
3584                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3585                                   "using only the first eight.\n");
3586                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3587                }
3588            if (*pt == '}')            if (*pt == '}')
3589              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             if (use_utf8)  
               {  
               utn = ord2utf8(c, buff8);  
               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
               c = buff8[ii];   /* Last byte */  
               }  
             else  
              {  
              if (c > 255)  
                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "  
                  "UTF-8 mode is not enabled.\n"  
                  "** Truncation will probably give the wrong result.\n", c);  
              }  
3590              p = pt + 1;              p = pt + 1;
3591              break;              break;
3592              }              }
3593            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3594            }            }
 #endif  
3595    
3596          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3597            allows UTF-8 characters to be constructed byte by byte, and also allows
3598            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3599            Otherwise, pass it down to later code so that it can be turned into
3600            UTF-8 when running in 16-bit mode. */
3601    
3602          c = 0;          c = 0;
3603          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3604            {            {
3605            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3606            p++;            p++;
3607            }            }
3608            if (use_utf)
3609              {
3610              *q++ = c;
3611              continue;
3612              }
3613          break;          break;
3614    
3615          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 1885  while (!done) Line 3617  while (!done)
3617          continue;          continue;
3618    
3619          case '>':          case '>':
3620            if (*p == '-')
3621              {
3622              start_offset_sign = -1;
3623              p++;
3624              }
3625          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3626            start_offset *= start_offset_sign;
3627          continue;          continue;
3628    
3629          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1904  while (!done) Line 3642  while (!done)
3642            }            }
3643          else if (isalnum(*p))          else if (isalnum(*p))
3644            {            {
3645            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3646            }            }
3647          else if (*p == '+')          else if (*p == '+')
3648            {            {
# Line 1920  while (!done) Line 3651  while (!done)
3651            }            }
3652          else if (*p == '-')          else if (*p == '-')
3653            {            {
3654            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3655            p++;            p++;
3656            }            }
3657          else if (*p == '!')          else if (*p == '!')
# Line 1958  while (!done) Line 3689  while (!done)
3689  #endif  #endif
3690            use_dfa = 1;            use_dfa = 1;
3691          continue;          continue;
3692    #endif
3693    
3694    #if !defined NODFA
3695          case 'F':          case 'F':
3696          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
3697          continue;          continue;
# Line 1972  while (!done) Line 3705  while (!done)
3705            }            }
3706          else if (isalnum(*p))          else if (isalnum(*p))
3707            {            {
3708            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3709            while (isalnum(*p)) *npp++ = *p++;            }
3710            *npp++ = 0;          continue;
3711            *npp = 0;  
3712            n = pcre_get_stringnumber(re, (char *)getnamesptr);          case 'J':
3713            if (n < 0)          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3714              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);          if (extra != NULL
3715            getnamesptr = npp;              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3716                && extra->executable_jit != NULL)
3717              {
3718              if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3719              jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3720              PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3721            }            }
3722          continue;          continue;
3723    
# Line 2015  while (!done) Line 3753  while (!done)
3753            }            }
3754          use_size_offsets = n;          use_size_offsets = n;
3755          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
3756              else use_offsets = offsets + size_offsets_max - n;  /* To catch overruns */
3757          continue;          continue;
3758    
3759          case 'P':          case 'P':
# Line 2075  while (!done) Line 3814  while (!done)
3814            }            }
3815          continue;          continue;
3816          }          }
3817        *q++ = c;  
3818          /* We now have a character value in c that may be greater than 255. In
3819          16-bit mode, we always convert characters to UTF-8 so that values greater
3820          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3821          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3822          mode must have come from \x{...} or octal constructs because values from
3823          \x.. get this far only in non-UTF mode. */
3824    
3825    #if !defined NOUTF || defined SUPPORT_PCRE16
3826          if (use_pcre16 || use_utf)
3827            {
3828            pcre_uint8 buff8[8];
3829            int ii, utn;
3830            utn = ord2utf8(c, buff8);
3831            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3832            }
3833          else
3834    #endif
3835            {
3836            if (c > 255)
3837              {
3838              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3839                "and UTF-8 mode is not enabled.\n", c);
3840              fprintf(outfile, "** Truncation will probably give the wrong "
3841                "result.\n");
3842              }
3843            *q++ = c;
3844            }
3845        }        }
3846    
3847        /* Reached end of subject string */
3848    
3849      *q = 0;      *q = 0;
3850      len = q - dbuffer;      len = (int)(q - dbuffer);
3851    
3852      /* Move the data to the end of the buffer so that a read over the end of      /* Move the data to the end of the buffer so that a read over the end of
3853      the buffer will be seen by valgrind, even if it doesn't cause a crash. If      the buffer will be seen by valgrind, even if it doesn't cause a crash. If
# Line 2138  while (!done) Line 3907  while (!done)
3907            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3908              {              {
3909              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3910              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3911                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3912              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3913              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3914                {                {
3915                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3916                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3917                  outfile);                  outfile);
3918                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3919                }                }
# Line 2152  while (!done) Line 3921  while (!done)
3921            }            }
3922          }          }
3923        free(pmatch);        free(pmatch);
3924          goto NEXT_DATA;
3925        }        }
3926    
3927    #endif  /* !defined NOPOSIX */
3928    
3929      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3930    
3931      else  #ifdef SUPPORT_PCRE16
3932  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3933          {
3934          len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3935          switch(len)
3936            {
3937            case -1:
3938            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3939              "converted to UTF-16\n");
3940            goto NEXT_DATA;
3941    
3942            case -2:
3943            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3944              "cannot be converted to UTF-16\n");
3945            goto NEXT_DATA;
3946    
3947            case -3:
3948            fprintf(outfile, "**Failed: character value greater than 0xffff "
3949              "cannot be converted to 16-bit in non-UTF mode\n");
3950            goto NEXT_DATA;
3951    
3952            default:
3953            break;
3954            }
3955          bptr = (pcre_uint8 *)buffer16;
3956          }
3957    #endif
3958    
3959        /* Ensure that there is a JIT callback if we want to verify that JIT was
3960        actually used. If jit_stack == NULL, no stack has yet been assigned. */
3961    
3962        if (verify_jit && jit_stack == NULL && extra != NULL)
3963           { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
3964    
3965      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3966        {        {
3967        markptr = NULL;        markptr = NULL;
3968          jit_was_used = FALSE;
3969    
3970        if (timeitm > 0)        if (timeitm > 0)
3971          {          {
# Line 2172  while (!done) Line 3976  while (!done)
3976  #if !defined NODFA  #if !defined NODFA
3977          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
3978            {            {
3979            int workspace[1000];            if ((options & PCRE_DFA_RESTART) != 0)
3980                {
3981                fprintf(outfile, "Timing DFA restarts is not supported\n");
3982                break;
3983                }
3984              if (dfa_workspace == NULL)
3985                dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
3986            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3987              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,              {
3988                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3989                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets,
3990                  dfa_workspace, DFA_WS_DIMENSION);
3991                }
3992            }            }
3993          else          else
3994  #endif  #endif
3995    
3996          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3997            count = pcre_exec(re, extra, (char *)bptr, len,            {
3998              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3999                (options | g_notempty), use_offsets, use_size_offsets);
4000              }
4001          time_taken = clock() - start_time;          time_taken = clock() - start_time;
4002          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
4003            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2193  while (!done) Line 4006  while (!done)
4006    
4007        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
4008        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
4009        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
4010          running of pcre_exec(), so disable the JIT optimization. This makes it
4011          possible to run the same set of tests with and without JIT externally
4012          requested. */
4013    
4014        if (find_match_limit)        if (find_match_limit)
4015          {          {
4016          if (extra == NULL)          if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4017            {          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4018            extra = (pcre_extra *)malloc(sizeof(pcre_extra));          extra->flags = 0;
           extra->flags = 0;  
           }  
4019    
4020          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
4021            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2225  while (!done) Line 4039  while (!done)
4039            }            }
4040          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4041          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
4042          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4043            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
4044          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4045          }          }
# Line 2236  while (!done) Line 4050  while (!done)
4050  #if !defined NODFA  #if !defined NODFA
4051        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
4052          {          {
4053          int workspace[1000];          if (dfa_workspace == NULL)
4054          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,            dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4055            options | g_notempty, use_offsets, use_size_offsets, workspace,          if (dfa_matched++ == 0)
4056            sizeof(workspace)/sizeof(int));            dfa_workspace[0] = -1;  /* To catch bad restart */
4057            PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4058              (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4059              DFA_WS_DIMENSION);
4060          if (count == 0)          if (count == 0)
4061            {            {
4062            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 2250  while (!done) Line 4067  while (!done)
4067    
4068        else        else
4069          {          {
4070          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4071            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
4072          if (count == 0)          if (count == 0)
4073            {            {
4074            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
# Line 2264  while (!done) Line 4081  while (!done)
4081        if (count >= 0)        if (count >= 0)
4082          {          {
4083          int i, maxcount;          int i, maxcount;
4084            void *cnptr, *gnptr;
4085    
4086  #if !defined NODFA  #if !defined NODFA
4087          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
# Line 2285  while (!done) Line 4103  while (!done)
4103              }              }
4104            }            }
4105    
4106            /* do_allcaps requests showing of all captures in the pattern, to check
4107            unset ones at the end. */
4108    
4109            if (do_allcaps)
4110              {
4111              if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4112                goto SKIP_DATA;
4113              count++;   /* Allow for full match */
4114              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4115              }
4116    
4117            /* Output the captured substrings */
4118    
4119          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
4120            {            {
4121            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
4122                {
4123                if (use_offsets[i] != -1)
4124                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4125                    use_offsets[i], i);
4126                if (use_offsets[i+1] != -1)
4127                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4128                    use_offsets[i+1], i+1);
4129              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
4130                }
4131            else            else
4132              {              {
4133              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
4134              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr, use_offsets[i],
4135                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
4136                if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4137              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4138              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
4139                {                {
4140                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
4141                  {                PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4142                  fprintf(outfile, " 0+ ");                  outfile);
4143                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
4144                }                }
4145              }              }
4146            }            }
4147    
4148          if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);          if (markptr != NULL)
4149              {
4150              fprintf(outfile, "MK: ");
4151              PCHARSV(markptr, 0, -1, outfile);
4152              fprintf(outfile, "\n");
4153              }
4154    
4155          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4156            {            {
4157            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
4158              {              {
4159                int rc;
4160              char copybuffer[256];              char copybuffer[256];
4161              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4162                i, copybuffer, sizeof(copybuffer));                copybuffer, sizeof(copybuffer));
4163              if (rc < 0)              if (rc < 0)
4164                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4165              else              else
4166                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);                {
4167                  fprintf(outfile, "%2dC ", i);
4168                  PCHARSV(copybuffer, 0, rc, outfile);
4169                  fprintf(outfile, " (%d)\n", rc);
4170                  }
4171              }              }
4172            }            }
4173    
4174          for (copynamesptr = copynames;          cnptr = copynames;
4175               *copynamesptr != 0;          for (;;)
              copynamesptr += (int)strlen((char*)copynamesptr) + 1)  
4176            {            {
4177              int rc;
4178            char copybuffer[256];            char copybuffer[256];
4179            int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,  
4180              count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));            if (use_pcre16)
4181                {
4182                if (*(pcre_uint16 *)cnptr == 0) break;
4183                }
4184              else
4185                {
4186                if (*(pcre_uint8 *)cnptr == 0) break;
4187                }
4188    
4189              PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4190                cnptr, copybuffer, sizeof(copybuffer));
4191    
4192            if (rc < 0)            if (rc < 0)
4193              fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);              {
4194                fprintf(outfile, "copy substring ");
4195                PCHARSV(cnptr, 0, -1, outfile);
4196                fprintf(outfile, " failed %d\n", rc);
4197                }
4198            else            else
4199              fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);              {
4200                fprintf(outfile, "  C ");
4201                PCHARSV(copybuffer, 0, rc, outfile);
4202                fprintf(outfile, " (%d) ", rc);
4203                PCHARSV(cnptr, 0, -1, outfile);
4204                putc('\n', outfile);
4205                }
4206    
4207              cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4208            }            }
4209    
4210          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4211            {            {
4212            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
4213              {              {
4214                int rc;
4215              const char *substring;              const char *substring;
4216              int rc = pcre_get_substring((char *)bptr, use_offsets, count,              PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
               i, &substring);  
4217              if (rc < 0)              if (rc < 0)
4218                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
4219              else              else
4220                {                {
4221                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG ", i);
4222                pcre_free_substring(substring);                PCHARSV(substring, 0, rc, outfile);
4223                  fprintf(outfile, " (%d)\n", rc);
4224                  PCRE_FREE_SUBSTRING(substring);
4225                }                }
4226              }              }
4227            }            }
4228    
4229          for (getnamesptr = getnames;          gnptr = getnames;
4230               *getnamesptr != 0;          for (;;)
              getnamesptr += (int)strlen((char*)getnamesptr) + 1)  
4231            {            {
4232              int rc;
4233            const char *substring;            const char *substring;
4234            int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,  
4235              count, (char *)getnamesptr, &substring);            if (use_pcre16)
4236                {
4237                if (*(pcre_uint16 *)gnptr == 0) break;
4238                }
4239              else
4240                {
4241                if (*(pcre_uint8 *)gnptr == 0) break;
4242                }
4243    
4244              PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4245                gnptr, &substring);
4246            if (rc < 0)            if (rc < 0)
4247              fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);              {
4248                fprintf(outfile, "get substring ");
4249                PCHARSV(gnptr, 0, -1, outfile);
4250                fprintf(outfile, " failed %d\n", rc);
4251                }
4252            else            else
4253              {              {
4254              fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);              fprintf(outfile, "  G ");
4255              pcre_free_substring(substring);              PCHARSV(substring, 0, rc, outfile);
4256                fprintf(outfile, " (%d) ", rc);
4257                PCHARSV(gnptr, 0, -1, outfile);
4258                PCRE_FREE_SUBSTRING(substring);
4259                putc('\n', outfile);
4260              }              }
4261    
4262              gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4263            }            }
4264    
4265          if (getlist)          if (getlist)
4266            {            {
4267              int rc;
4268            const char **stringlist;            const char **stringlist;
4269            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,            PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
             &stringlist);  
4270            if (rc < 0)            if (rc < 0)
4271              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
4272            else            else
4273              {              {
4274              for (i = 0; i < count; i++)              for (i = 0; i < count; i++)
4275                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                {
4276                  fprintf(outfile, "%2dL ", i);
4277                  PCHARSV(stringlist[i], 0, -1, outfile);
4278                  putc('\n', outfile);
4279                  }
4280              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
4281                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
4282              /* free((void *)stringlist); */              PCRE_FREE_SUBSTRING_LIST(stringlist);
             pcre_free_substring_list(stringlist);  
4283              }              }
4284            }            }
4285          }          }
# Line 2394  while (!done) Line 4289  while (!done)
4289        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
4290          {          {
4291          if (markptr == NULL) fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
4292            else fprintf(outfile, "Partial match, mark=%s", markptr);          else
4293              {
4294              fprintf(outfile, "Partial match, mark=");
4295              PCHARSV(markptr, 0, -1, outfile);
4296              }
4297          if (use_size_offsets > 1)          if (use_size_offsets > 1)
4298            {            {
4299            fprintf(outfile, ": ");            fprintf(outfile, ": ");
4300            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],            PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4301              outfile);              outfile);
4302            }            }
4303            if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4304          fprintf(outfile, "\n");          fprintf(outfile, "\n");
4305          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
4306          }          }
# Line 2410  while (!done) Line 4310  while (!done)
4310        to advance the start offset, and continue. We won't be at the end of the        to advance the start offset, and continue. We won't be at the end of the