/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 336 by ph10, Sat Apr 12 15:59:03 2008 UTC revision 936 by ph10, Sat Feb 25 17:02:23 2012 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
51  #include "config.h"  #include "config.h"
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 59  POSSIBILITY OF SUCH DAMAGE.
59  #include <locale.h>  #include <locale.h>
60  #include <errno.h>  #include <errno.h>
61    
62  #ifdef SUPPORT_LIBREADLINE  /* Both libreadline and libedit are optionally supported. The user-supplied
63    original patch uses readline/readline.h for libedit, but in at least one system
64    it is installed as editline/readline.h, so the configuration code now looks for
65    that first, falling back to readline/readline.h. */
66    
67    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
68    #ifdef HAVE_UNISTD_H
69  #include <unistd.h>  #include <unistd.h>
70    #endif
71    #if defined(SUPPORT_LIBREADLINE)
72  #include <readline/readline.h>  #include <readline/readline.h>
73  #include <readline/history.h>  #include <readline/history.h>
74    #else
75    #if defined(HAVE_EDITLINE_READLINE_H)
76    #include <editline/readline.h>
77    #else
78    #include <readline/readline.h>
79    #endif
80    #endif
81  #endif  #endif
   
82    
83  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
84  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 69  input mode under Windows. */ Line 94  input mode under Windows. */
94  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
95  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
96    
97    #ifndef isatty
98    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
99    #endif                         /* though in some environments they seem to   */
100                                   /* be already defined, hence the #ifndefs.    */
101    #ifndef fileno
102    #define fileno _fileno
103    #endif
104    
105    /* A user sent this fix for Borland Builder 5 under Windows. */
106    
107    #ifdef __BORLANDC__
108    #define _setmode(handle, mode) setmode(handle, mode)
109    #endif
110    
111    /* Not Windows */
112    
113  #else  #else
114  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
115  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 76  input mode under Windows. */ Line 117  input mode under Windows. */
117  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
118  #endif  #endif
119    
120    #define PRIV(name) name
121    
122  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
123  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 87  here before pcre_internal.h so that the Line 129  here before pcre_internal.h so that the
129  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
130    
131  #include "pcre.h"  #include "pcre.h"
132    
133    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
134    /* Configure internal macros to 16 bit mode. */
135    #define COMPILE_PCRE16
136    #endif
137    
138  #include "pcre_internal.h"  #include "pcre_internal.h"
139    
140  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* The pcre_printint() function, which prints the internal form of a compiled
141  two copies, we include the source file here, changing the names of the external  regex, is held in a separate file so that (a) it can be compiled in either
142  symbols to prevent clashes. */  8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
143    when that is compiled in debug mode. */
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
144    
145  #include "pcre_tables.c"  #ifdef SUPPORT_PCRE8
146    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
147    #endif
148    #ifdef SUPPORT_PCRE16
149    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
150    #endif
151    
152  /* We also need the pcre_printint() function for printing out compiled  /* We need access to some of the data tables that PCRE uses. So as not to have
153  patterns. This function is in a separate file so that it can be included in  to keep two copies, we include the source file here, changing the names of the
154  pcre_compile.c when that module is compiled with debugging enabled.  external symbols to prevent clashes. */
155    
156  The definition of the macro PRINTABLE, which determines whether to print an  #define PCRE_INCLUDED
157    
158    #include "pcre_tables.c"
159    
160    /* The definition of the macro PRINTABLE, which determines whether to print an
161  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
162  contained in this file. We uses it here also, in cases when the locale has not  the same as in the printint.src file. We uses it here in cases when the locale
163  been explicitly changed, so as to get consistent output from systems that  has not been explicitly changed, so as to get consistent output from systems
164  differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
165    
166  #include "pcre_printint.src"  #ifdef EBCDIC
167    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
168    #else
169    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
170    #endif
171    
172  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
173    
174    /* Posix support is disabled in 16 bit only mode. */
175    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
176    #define NOPOSIX
177    #endif
178    
179  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
180  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 128  Makefile. */ Line 184  Makefile. */
184  #include "pcreposix.h"  #include "pcreposix.h"
185  #endif  #endif
186    
187  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
188  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
189  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
190  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
191  UTF8 support if PCRE is built without it. */  
192    #ifndef SUPPORT_UTF
193  #ifndef SUPPORT_UTF8  #ifndef NOUTF
194  #ifndef NOUTF8  #define NOUTF
 #define NOUTF8  
195  #endif  #endif
196  #endif  #endif
197    
198    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
199    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
200    only from one place and is handled differently). I couldn't dream up any way of
201    using a single macro to do this in a generic way, because of the many different
202    argument requirements. We know that at least one of SUPPORT_PCRE8 and
203    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
204    use these in the definitions of generic macros.
205    
206    **** Special note about the PCHARSxxx macros: the address of the string to be
207    printed is always given as two arguments: a base address followed by an offset.
208    The base address is cast to the correct data size for 8 or 16 bit data; the
209    offset is in units of this size. If the string were given as base+offset in one
210    argument, the casting might be incorrectly applied. */
211    
212    #ifdef SUPPORT_PCRE8
213    
214    #define PCHARS8(lv, p, offset, len, f) \
215      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
216    
217    #define PCHARSV8(p, offset, len, f) \
218      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
219    
220    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
221      p = read_capture_name8(p, cn8, re)
222    
223    #define STRLEN8(p) ((int)strlen((char *)p))
224    
225    #define SET_PCRE_CALLOUT8(callout) \
226      pcre_callout = callout
227    
228    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
229       pcre_assign_jit_stack(extra, callback, userdata)
230    
231    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
232      re = pcre_compile((char *)pat, options, error, erroffset, tables)
233    
234    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
235        namesptr, cbuffer, size) \
236      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
237        (char *)namesptr, cbuffer, size)
238    
239    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
240      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
241    
242    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
243        offsets, size_offsets, workspace, size_workspace) \
244      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
245        offsets, size_offsets, workspace, size_workspace)
246    
247    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
248        offsets, size_offsets) \
249      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
250        offsets, size_offsets)
251    
252    #define PCRE_FREE_STUDY8(extra) \
253      pcre_free_study(extra)
254    
255    #define PCRE_FREE_SUBSTRING8(substring) \
256      pcre_free_substring(substring)
257    
258    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
259      pcre_free_substring_list(listptr)
260    
261    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
262        getnamesptr, subsptr) \
263      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
264        (char *)getnamesptr, subsptr)
265    
266    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
267      n = pcre_get_stringnumber(re, (char *)ptr)
268    
269    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
270      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
271    
272    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
273      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
274    
275    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
276      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
277    
278    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
279      pcre_printint(re, outfile, debug_lengths)
280    
281    #define PCRE_STUDY8(extra, re, options, error) \
282      extra = pcre_study(re, options, error)
283    
284    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
285      pcre_jit_stack_alloc(startsize, maxsize)
286    
287    #define PCRE_JIT_STACK_FREE8(stack) \
288      pcre_jit_stack_free(stack)
289    
290    #endif /* SUPPORT_PCRE8 */
291    
292    /* -----------------------------------------------------------*/
293    
294    #ifdef SUPPORT_PCRE16
295    
296    #define PCHARS16(lv, p, offset, len, f) \
297      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
298    
299    #define PCHARSV16(p, offset, len, f) \
300      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
301    
302    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
303      p = read_capture_name16(p, cn16, re)
304    
305    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
306    
307    #define SET_PCRE_CALLOUT16(callout) \
308      pcre16_callout = (int (*)(pcre16_callout_block *))callout
309    
310    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
311      pcre16_assign_jit_stack((pcre16_extra *)extra, \
312        (pcre16_jit_callback)callback, userdata)
313    
314    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
315      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
316        tables)
317    
318    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
319        namesptr, cbuffer, size) \
320      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
321        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
322    
323    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
324      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
325        (PCRE_UCHAR16 *)cbuffer, size/2)
326    
327    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
328        offsets, size_offsets, workspace, size_workspace) \
329      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
330        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
331        workspace, size_workspace)
332    
333    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
334        offsets, size_offsets) \
335      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
336        len, start_offset, options, offsets, size_offsets)
337    
338    #define PCRE_FREE_STUDY16(extra) \
339      pcre16_free_study((pcre16_extra *)extra)
340    
341    #define PCRE_FREE_SUBSTRING16(substring) \
342      pcre16_free_substring((PCRE_SPTR16)substring)
343    
344    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
345      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
346    
347    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
348        getnamesptr, subsptr) \
349      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
350        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
351    
352    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
353      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
354    
355    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
356      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
357        (PCRE_SPTR16 *)(void*)subsptr)
358    
359    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
360      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
361        (PCRE_SPTR16 **)(void*)listptr)
362    
363    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
364      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
365        tables)
366    
367    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
368      pcre16_printint(re, outfile, debug_lengths)
369    
370    #define PCRE_STUDY16(extra, re, options, error) \
371      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
372    
373    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
374      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
375    
376    #define PCRE_JIT_STACK_FREE16(stack) \
377      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
378    
379    #endif /* SUPPORT_PCRE16 */
380    
381    
382    /* ----- Both modes are supported; a runtime test is needed, except for
383    pcre_config(), and the JIT stack functions, when it doesn't matter which
384    version is called. ----- */
385    
386    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
387    
388    #define CHAR_SIZE (use_pcre16? 2:1)
389    
390    #define PCHARS(lv, p, offset, len, f) \
391      if (use_pcre16) \
392        PCHARS16(lv, p, offset, len, f); \
393      else \
394        PCHARS8(lv, p, offset, len, f)
395    
396    #define PCHARSV(p, offset, len, f) \
397      if (use_pcre16) \
398        PCHARSV16(p, offset, len, f); \
399      else \
400        PCHARSV8(p, offset, len, f)
401    
402    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
403      if (use_pcre16) \
404        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
405      else \
406        READ_CAPTURE_NAME8(p, cn8, cn16, re)
407    
408    #define SET_PCRE_CALLOUT(callout) \
409      if (use_pcre16) \
410        SET_PCRE_CALLOUT16(callout); \
411      else \
412        SET_PCRE_CALLOUT8(callout)
413    
414    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
415    
416    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
417      if (use_pcre16) \
418        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
419      else \
420        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
421    
422    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
423      if (use_pcre16) \
424        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
425      else \
426        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
427    
428    #define PCRE_CONFIG pcre_config
429    
430    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
431        namesptr, cbuffer, size) \
432      if (use_pcre16) \
433        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
434          namesptr, cbuffer, size); \
435      else \
436        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
437          namesptr, cbuffer, size)
438    
439    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
440      if (use_pcre16) \
441        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
442      else \
443        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
444    
445    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
446        offsets, size_offsets, workspace, size_workspace) \
447      if (use_pcre16) \
448        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
449          offsets, size_offsets, workspace, size_workspace); \
450      else \
451        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
452          offsets, size_offsets, workspace, size_workspace)
453    
454    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
455        offsets, size_offsets) \
456      if (use_pcre16) \
457        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
458          offsets, size_offsets); \
459      else \
460        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
461          offsets, size_offsets)
462    
463    #define PCRE_FREE_STUDY(extra) \
464      if (use_pcre16) \
465        PCRE_FREE_STUDY16(extra); \
466      else \
467        PCRE_FREE_STUDY8(extra)
468    
469    #define PCRE_FREE_SUBSTRING(substring) \
470      if (use_pcre16) \
471        PCRE_FREE_SUBSTRING16(substring); \
472      else \
473        PCRE_FREE_SUBSTRING8(substring)
474    
475    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
476      if (use_pcre16) \
477        PCRE_FREE_SUBSTRING_LIST16(listptr); \
478      else \
479        PCRE_FREE_SUBSTRING_LIST8(listptr)
480    
481    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
482        getnamesptr, subsptr) \
483      if (use_pcre16) \
484        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
485          getnamesptr, subsptr); \
486      else \
487        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
488          getnamesptr, subsptr)
489    
490    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
491      if (use_pcre16) \
492        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
493      else \
494        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
495    
496    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
497      if (use_pcre16) \
498        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
499      else \
500        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
501    
502    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
503      if (use_pcre16) \
504        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
505      else \
506        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
507    
508    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
509      (use_pcre16 ? \
510         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
511        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
512    
513    #define PCRE_JIT_STACK_FREE(stack) \
514      if (use_pcre16) \
515        PCRE_JIT_STACK_FREE16(stack); \
516      else \
517        PCRE_JIT_STACK_FREE8(stack)
518    
519    #define PCRE_MAKETABLES \
520      (use_pcre16? pcre16_maketables() : pcre_maketables())
521    
522    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
523      if (use_pcre16) \
524        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
525      else \
526        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
527    
528    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
529      if (use_pcre16) \
530        PCRE_PRINTINT16(re, outfile, debug_lengths); \
531      else \
532        PCRE_PRINTINT8(re, outfile, debug_lengths)
533    
534    #define PCRE_STUDY(extra, re, options, error) \
535      if (use_pcre16) \
536        PCRE_STUDY16(extra, re, options, error); \
537      else \
538        PCRE_STUDY8(extra, re, options, error)
539    
540    /* ----- Only 8-bit mode is supported ----- */
541    
542    #elif defined SUPPORT_PCRE8
543    #define CHAR_SIZE                 1
544    #define PCHARS                    PCHARS8
545    #define PCHARSV                   PCHARSV8
546    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
547    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
548    #define STRLEN                    STRLEN8
549    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
550    #define PCRE_COMPILE              PCRE_COMPILE8
551    #define PCRE_CONFIG               pcre_config
552    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
553    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
554    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
555    #define PCRE_EXEC                 PCRE_EXEC8
556    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
557    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
558    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
559    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
560    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
561    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
562    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
563    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
564    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
565    #define PCRE_MAKETABLES           pcre_maketables()
566    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
567    #define PCRE_PRINTINT             PCRE_PRINTINT8
568    #define PCRE_STUDY                PCRE_STUDY8
569    
570    /* ----- Only 16-bit mode is supported ----- */
571    
572    #else
573    #define CHAR_SIZE                 2
574    #define PCHARS                    PCHARS16
575    #define PCHARSV                   PCHARSV16
576    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
577    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
578    #define STRLEN                    STRLEN16
579    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
580    #define PCRE_COMPILE              PCRE_COMPILE16
581    #define PCRE_CONFIG               pcre16_config
582    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
583    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
584    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
585    #define PCRE_EXEC                 PCRE_EXEC16
586    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
587    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
588    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
589    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
590    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
591    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
592    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
593    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
594    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
595    #define PCRE_MAKETABLES           pcre16_maketables()
596    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
597    #define PCRE_PRINTINT             PCRE_PRINTINT16
598    #define PCRE_STUDY                PCRE_STUDY16
599    #endif
600    
601    /* ----- End of mode-specific function call macros ----- */
602    
603    
604  /* Other parameters */  /* Other parameters */
605    
# Line 165  static int callout_fail_count; Line 625  static int callout_fail_count;
625  static int callout_fail_id;  static int callout_fail_id;
626  static int debug_lengths;  static int debug_lengths;
627  static int first_callout;  static int first_callout;
628    static int jit_was_used;
629  static int locale_set = 0;  static int locale_set = 0;
630  static int show_malloc;  static int show_malloc;
631  static int use_utf8;  static int use_utf;
632  static size_t gotten_store;  static size_t gotten_store;
633    static size_t first_gotten_store = 0;
634    static const unsigned char *last_callout_mark = NULL;
635    
636  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
637    
638  static int buffer_size = 50000;  static int buffer_size = 50000;
639  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
640  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
641  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
642    
643    /* Another buffer is needed translation to 16-bit character strings. It will
644    obtained and extended as required. */
645    
646    #ifdef SUPPORT_PCRE16
647    static int buffer16_size = 0;
648    static pcre_uint16 *buffer16 = NULL;
649    
650    #ifdef SUPPORT_PCRE8
651    
652    /* We need the table of operator lengths that is used for 16-bit compiling, in
653    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
654    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
655    appropriately for the 16-bit world. Just as a safety check, make sure that
656    COMPILE_PCRE16 is *not* set. */
657    
658    #ifdef COMPILE_PCRE16
659    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
660    #endif
661    
662    #if LINK_SIZE == 2
663    #undef LINK_SIZE
664    #define LINK_SIZE 1
665    #elif LINK_SIZE == 3 || LINK_SIZE == 4
666    #undef LINK_SIZE
667    #define LINK_SIZE 2
668    #else
669    #error LINK_SIZE must be either 2, 3, or 4
670    #endif
671    
672    #undef IMM2_SIZE
673    #define IMM2_SIZE 1
674    
675    #endif /* SUPPORT_PCRE8 */
676    
677    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
678    #endif  /* SUPPORT_PCRE16 */
679    
680    /* If we have 8-bit support, default use_pcre16 to false; if there is also
681    16-bit support, it can be changed by an option. If there is no 8-bit support,
682    there must be 16-bit support, so default it to 1. */
683    
684    #ifdef SUPPORT_PCRE8
685    static int use_pcre16 = 0;
686    #else
687    static int use_pcre16 = 1;
688    #endif
689    
690    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
691    
692    static int jit_study_bits[] =
693      {
694      PCRE_STUDY_JIT_COMPILE,
695      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
696      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
697      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
698      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
699      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
700      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
701        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
702    };
703    
704    /* Textual explanations for runtime error codes */
705    
706    static const char *errtexts[] = {
707      NULL,  /* 0 is no error */
708      NULL,  /* NOMATCH is handled specially */
709      "NULL argument passed",
710      "bad option value",
711      "magic number missing",
712      "unknown opcode - pattern overwritten?",
713      "no more memory",
714      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
715      "match limit exceeded",
716      "callout error code",
717      NULL,  /* BADUTF8/16 is handled specially */
718      NULL,  /* BADUTF8/16 offset is handled specially */
719      NULL,  /* PARTIAL is handled specially */
720      "not used - internal error",
721      "internal error - pattern overwritten?",
722      "bad count value",
723      "item unsupported for DFA matching",
724      "backreference condition or recursion test not supported for DFA matching",
725      "match limit not supported for DFA matching",
726      "workspace size exceeded in DFA matching",
727      "too much recursion for DFA matching",
728      "recursion limit exceeded",
729      "not used - internal error",
730      "invalid combination of newline options",
731      "bad offset value",
732      NULL,  /* SHORTUTF8/16 is handled specially */
733      "nested recursion at the same subject position",
734      "JIT stack limit reached",
735      "pattern compiled in wrong mode: 8-bit/16-bit error"
736    };
737    
738    
739    /*************************************************
740    *         Alternate character tables             *
741    *************************************************/
742    
743    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
744    using the default tables of the library. However, the T option can be used to
745    select alternate sets of tables, for different kinds of testing. Note also that
746    the L (locale) option also adjusts the tables. */
747    
748    /* This is the set of tables distributed as default with PCRE. It recognizes
749    only ASCII characters. */
750    
751    static const pcre_uint8 tables0[] = {
752    
753    /* This table is a lower casing table. */
754    
755        0,  1,  2,  3,  4,  5,  6,  7,
756        8,  9, 10, 11, 12, 13, 14, 15,
757       16, 17, 18, 19, 20, 21, 22, 23,
758       24, 25, 26, 27, 28, 29, 30, 31,
759       32, 33, 34, 35, 36, 37, 38, 39,
760       40, 41, 42, 43, 44, 45, 46, 47,
761       48, 49, 50, 51, 52, 53, 54, 55,
762       56, 57, 58, 59, 60, 61, 62, 63,
763       64, 97, 98, 99,100,101,102,103,
764      104,105,106,107,108,109,110,111,
765      112,113,114,115,116,117,118,119,
766      120,121,122, 91, 92, 93, 94, 95,
767       96, 97, 98, 99,100,101,102,103,
768      104,105,106,107,108,109,110,111,
769      112,113,114,115,116,117,118,119,
770      120,121,122,123,124,125,126,127,
771      128,129,130,131,132,133,134,135,
772      136,137,138,139,140,141,142,143,
773      144,145,146,147,148,149,150,151,
774      152,153,154,155,156,157,158,159,
775      160,161,162,163,164,165,166,167,
776      168,169,170,171,172,173,174,175,
777      176,177,178,179,180,181,182,183,
778      184,185,186,187,188,189,190,191,
779      192,193,194,195,196,197,198,199,
780      200,201,202,203,204,205,206,207,
781      208,209,210,211,212,213,214,215,
782      216,217,218,219,220,221,222,223,
783      224,225,226,227,228,229,230,231,
784      232,233,234,235,236,237,238,239,
785      240,241,242,243,244,245,246,247,
786      248,249,250,251,252,253,254,255,
787    
788    /* This table is a case flipping table. */
789    
790        0,  1,  2,  3,  4,  5,  6,  7,
791        8,  9, 10, 11, 12, 13, 14, 15,
792       16, 17, 18, 19, 20, 21, 22, 23,
793       24, 25, 26, 27, 28, 29, 30, 31,
794       32, 33, 34, 35, 36, 37, 38, 39,
795       40, 41, 42, 43, 44, 45, 46, 47,
796       48, 49, 50, 51, 52, 53, 54, 55,
797       56, 57, 58, 59, 60, 61, 62, 63,
798       64, 97, 98, 99,100,101,102,103,
799      104,105,106,107,108,109,110,111,
800      112,113,114,115,116,117,118,119,
801      120,121,122, 91, 92, 93, 94, 95,
802       96, 65, 66, 67, 68, 69, 70, 71,
803       72, 73, 74, 75, 76, 77, 78, 79,
804       80, 81, 82, 83, 84, 85, 86, 87,
805       88, 89, 90,123,124,125,126,127,
806      128,129,130,131,132,133,134,135,
807      136,137,138,139,140,141,142,143,
808      144,145,146,147,148,149,150,151,
809      152,153,154,155,156,157,158,159,
810      160,161,162,163,164,165,166,167,
811      168,169,170,171,172,173,174,175,
812      176,177,178,179,180,181,182,183,
813      184,185,186,187,188,189,190,191,
814      192,193,194,195,196,197,198,199,
815      200,201,202,203,204,205,206,207,
816      208,209,210,211,212,213,214,215,
817      216,217,218,219,220,221,222,223,
818      224,225,226,227,228,229,230,231,
819      232,233,234,235,236,237,238,239,
820      240,241,242,243,244,245,246,247,
821      248,249,250,251,252,253,254,255,
822    
823    /* This table contains bit maps for various character classes. Each map is 32
824    bytes long and the bits run from the least significant end of each byte. The
825    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
826    graph, print, punct, and cntrl. Other classes are built from combinations. */
827    
828      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
829      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
832    
833      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
834      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
835      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
837    
838      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
839      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
842    
843      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
845      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
847    
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
850      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
852    
853      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
854      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
855      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
856      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857    
858      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
859      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
860      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
861      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
862    
863      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
864      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
865      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
866      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
867    
868      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
869      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
870      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
871      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
872    
873      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
874      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
875      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
876      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
877    
878    /* This table identifies various classes of character by individual bits:
879      0x01   white space character
880      0x02   letter
881      0x04   decimal digit
882      0x08   hexadecimal digit
883      0x10   alphanumeric or '_'
884      0x80   regular expression metacharacter or binary zero
885    */
886    
887      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
888      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
889      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
890      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
891      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
892      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
893      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
894      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
895      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
896      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
897      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
898      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
899      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
900      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
901      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
902      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
903      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
904      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
905      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
906      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
907      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
908      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
909      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
910      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
911      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
912      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
913      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
914      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
915      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
916      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
917      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
918      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
919    
920    /* This is a set of tables that came orginally from a Windows user. It seems to
921    be at least an approximation of ISO 8859. In particular, there are characters
922    greater than 128 that are marked as spaces, letters, etc. */
923    
924    static const pcre_uint8 tables1[] = {
925    0,1,2,3,4,5,6,7,
926    8,9,10,11,12,13,14,15,
927    16,17,18,19,20,21,22,23,
928    24,25,26,27,28,29,30,31,
929    32,33,34,35,36,37,38,39,
930    40,41,42,43,44,45,46,47,
931    48,49,50,51,52,53,54,55,
932    56,57,58,59,60,61,62,63,
933    64,97,98,99,100,101,102,103,
934    104,105,106,107,108,109,110,111,
935    112,113,114,115,116,117,118,119,
936    120,121,122,91,92,93,94,95,
937    96,97,98,99,100,101,102,103,
938    104,105,106,107,108,109,110,111,
939    112,113,114,115,116,117,118,119,
940    120,121,122,123,124,125,126,127,
941    128,129,130,131,132,133,134,135,
942    136,137,138,139,140,141,142,143,
943    144,145,146,147,148,149,150,151,
944    152,153,154,155,156,157,158,159,
945    160,161,162,163,164,165,166,167,
946    168,169,170,171,172,173,174,175,
947    176,177,178,179,180,181,182,183,
948    184,185,186,187,188,189,190,191,
949    224,225,226,227,228,229,230,231,
950    232,233,234,235,236,237,238,239,
951    240,241,242,243,244,245,246,215,
952    248,249,250,251,252,253,254,223,
953    224,225,226,227,228,229,230,231,
954    232,233,234,235,236,237,238,239,
955    240,241,242,243,244,245,246,247,
956    248,249,250,251,252,253,254,255,
957    0,1,2,3,4,5,6,7,
958    8,9,10,11,12,13,14,15,
959    16,17,18,19,20,21,22,23,
960    24,25,26,27,28,29,30,31,
961    32,33,34,35,36,37,38,39,
962    40,41,42,43,44,45,46,47,
963    48,49,50,51,52,53,54,55,
964    56,57,58,59,60,61,62,63,
965    64,97,98,99,100,101,102,103,
966    104,105,106,107,108,109,110,111,
967    112,113,114,115,116,117,118,119,
968    120,121,122,91,92,93,94,95,
969    96,65,66,67,68,69,70,71,
970    72,73,74,75,76,77,78,79,
971    80,81,82,83,84,85,86,87,
972    88,89,90,123,124,125,126,127,
973    128,129,130,131,132,133,134,135,
974    136,137,138,139,140,141,142,143,
975    144,145,146,147,148,149,150,151,
976    152,153,154,155,156,157,158,159,
977    160,161,162,163,164,165,166,167,
978    168,169,170,171,172,173,174,175,
979    176,177,178,179,180,181,182,183,
980    184,185,186,187,188,189,190,191,
981    224,225,226,227,228,229,230,231,
982    232,233,234,235,236,237,238,239,
983    240,241,242,243,244,245,246,215,
984    248,249,250,251,252,253,254,223,
985    192,193,194,195,196,197,198,199,
986    200,201,202,203,204,205,206,207,
987    208,209,210,211,212,213,214,247,
988    216,217,218,219,220,221,222,255,
989    0,62,0,0,1,0,0,0,
990    0,0,0,0,0,0,0,0,
991    32,0,0,0,1,0,0,0,
992    0,0,0,0,0,0,0,0,
993    0,0,0,0,0,0,255,3,
994    126,0,0,0,126,0,0,0,
995    0,0,0,0,0,0,0,0,
996    0,0,0,0,0,0,0,0,
997    0,0,0,0,0,0,255,3,
998    0,0,0,0,0,0,0,0,
999    0,0,0,0,0,0,12,2,
1000    0,0,0,0,0,0,0,0,
1001    0,0,0,0,0,0,0,0,
1002    254,255,255,7,0,0,0,0,
1003    0,0,0,0,0,0,0,0,
1004    255,255,127,127,0,0,0,0,
1005    0,0,0,0,0,0,0,0,
1006    0,0,0,0,254,255,255,7,
1007    0,0,0,0,0,4,32,4,
1008    0,0,0,128,255,255,127,255,
1009    0,0,0,0,0,0,255,3,
1010    254,255,255,135,254,255,255,7,
1011    0,0,0,0,0,4,44,6,
1012    255,255,127,255,255,255,127,255,
1013    0,0,0,0,254,255,255,255,
1014    255,255,255,255,255,255,255,127,
1015    0,0,0,0,254,255,255,255,
1016    255,255,255,255,255,255,255,255,
1017    0,2,0,0,255,255,255,255,
1018    255,255,255,255,255,255,255,127,
1019    0,0,0,0,255,255,255,255,
1020    255,255,255,255,255,255,255,255,
1021    0,0,0,0,254,255,0,252,
1022    1,0,0,248,1,0,0,120,
1023    0,0,0,0,254,255,255,255,
1024    0,0,128,0,0,0,128,0,
1025    255,255,255,255,0,0,0,0,
1026    0,0,0,0,0,0,0,128,
1027    255,255,255,255,0,0,0,0,
1028    0,0,0,0,0,0,0,0,
1029    128,0,0,0,0,0,0,0,
1030    0,1,1,0,1,1,0,0,
1031    0,0,0,0,0,0,0,0,
1032    0,0,0,0,0,0,0,0,
1033    1,0,0,0,128,0,0,0,
1034    128,128,128,128,0,0,128,0,
1035    28,28,28,28,28,28,28,28,
1036    28,28,0,0,0,0,0,128,
1037    0,26,26,26,26,26,26,18,
1038    18,18,18,18,18,18,18,18,
1039    18,18,18,18,18,18,18,18,
1040    18,18,18,128,128,0,128,16,
1041    0,26,26,26,26,26,26,18,
1042    18,18,18,18,18,18,18,18,
1043    18,18,18,18,18,18,18,18,
1044    18,18,18,128,128,0,0,0,
1045    0,0,0,0,0,1,0,0,
1046    0,0,0,0,0,0,0,0,
1047    0,0,0,0,0,0,0,0,
1048    0,0,0,0,0,0,0,0,
1049    1,0,0,0,0,0,0,0,
1050    0,0,18,0,0,0,0,0,
1051    0,0,20,20,0,18,0,0,
1052    0,20,18,0,0,0,0,0,
1053    18,18,18,18,18,18,18,18,
1054    18,18,18,18,18,18,18,18,
1055    18,18,18,18,18,18,18,0,
1056    18,18,18,18,18,18,18,18,
1057    18,18,18,18,18,18,18,18,
1058    18,18,18,18,18,18,18,18,
1059    18,18,18,18,18,18,18,0,
1060    18,18,18,18,18,18,18,18
1061    };
1062    
1063    
1064    
1065    
1066    #ifndef HAVE_STRERROR
1067    /*************************************************
1068    *     Provide strerror() for non-ANSI libraries  *
1069    *************************************************/
1070    
1071    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1072    in their libraries, but can provide the same facility by this simple
1073    alternative function. */
1074    
1075    extern int   sys_nerr;
1076    extern char *sys_errlist[];
1077    
1078    char *
1079    strerror(int n)
1080    {
1081    if (n < 0 || n >= sys_nerr) return "unknown error number";
1082    return sys_errlist[n];
1083    }
1084    #endif /* HAVE_STRERROR */
1085    
1086    
1087    /*************************************************
1088    *         JIT memory callback                    *
1089    *************************************************/
1090    
1091    static pcre_jit_stack* jit_callback(void *arg)
1092    {
1093    jit_was_used = TRUE;
1094    return (pcre_jit_stack *)arg;
1095    }
1096    
1097    
1098    #if !defined NOUTF || defined SUPPORT_PCRE16
1099    /*************************************************
1100    *            Convert UTF-8 string to value       *
1101    *************************************************/
1102    
1103    /* This function takes one or more bytes that represents a UTF-8 character,
1104    and returns the value of the character.
1105    
1106    Argument:
1107      utf8bytes   a pointer to the byte vector
1108      vptr        a pointer to an int to receive the value
1109    
1110    Returns:      >  0 => the number of bytes consumed
1111                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1112    */
1113    
1114    static int
1115    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1116    {
1117    int c = *utf8bytes++;
1118    int d = c;
1119    int i, j, s;
1120    
1121    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1122      {
1123      if ((d & 0x80) == 0) break;
1124      d <<= 1;
1125      }
1126    
1127    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1128    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1129    
1130    /* i now has a value in the range 1-5 */
1131    
1132    s = 6*i;
1133    d = (c & utf8_table3[i]) << s;
1134    
1135    for (j = 0; j < i; j++)
1136      {
1137      c = *utf8bytes++;
1138      if ((c & 0xc0) != 0x80) return -(j+1);
1139      s -= 6;
1140      d |= (c & 0x3f) << s;
1141      }
1142    
1143    /* Check that encoding was the correct unique one */
1144    
1145    for (j = 0; j < utf8_table1_size; j++)
1146      if (d <= utf8_table1[j]) break;
1147    if (j != i) return -(i+1);
1148    
1149    /* Valid value */
1150    
1151    *vptr = d;
1152    return i+1;
1153    }
1154    #endif /* NOUTF || SUPPORT_PCRE16 */
1155    
1156    
1157    
1158    #if !defined NOUTF || defined SUPPORT_PCRE16
1159    /*************************************************
1160    *       Convert character value to UTF-8         *
1161    *************************************************/
1162    
1163    /* This function takes an integer value in the range 0 - 0x7fffffff
1164    and encodes it as a UTF-8 character in 0 to 6 bytes.
1165    
1166    Arguments:
1167      cvalue     the character value
1168      utf8bytes  pointer to buffer for result - at least 6 bytes long
1169    
1170    Returns:     number of characters placed in the buffer
1171    */
1172    
1173    static int
1174    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1175    {
1176    register int i, j;
1177    for (i = 0; i < utf8_table1_size; i++)
1178      if (cvalue <= utf8_table1[i]) break;
1179    utf8bytes += i;
1180    for (j = i; j > 0; j--)
1181     {
1182     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1183     cvalue >>= 6;
1184     }
1185    *utf8bytes = utf8_table2[i] | cvalue;
1186    return i + 1;
1187    }
1188    #endif
1189    
1190    
1191    #ifdef SUPPORT_PCRE16
1192    /*************************************************
1193    *         Convert a string to 16-bit             *
1194    *************************************************/
1195    
1196    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1197    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1198    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1199    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1200    result is always left in buffer16.
1201    
1202    Note that this function does not object to surrogate values. This is
1203    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1204    for the purpose of testing that they are correctly faulted.
1205    
1206    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1207    in UTF-8 so that values greater than 255 can be handled.
1208    
1209    Arguments:
1210      data       TRUE if converting a data line; FALSE for a regex
1211      p          points to a byte string
1212      utf        true if UTF-8 (to be converted to UTF-16)
1213      len        number of bytes in the string (excluding trailing zero)
1214    
1215    Returns:     number of 16-bit data items used (excluding trailing zero)
1216                 OR -1 if a UTF-8 string is malformed
1217                 OR -2 if a value > 0x10ffff is encountered
1218                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1219    */
1220    
1221    static int
1222    to16(int data, pcre_uint8 *p, int utf, int len)
1223    {
1224    pcre_uint16 *pp;
1225    
1226    if (buffer16_size < 2*len + 2)
1227      {
1228      if (buffer16 != NULL) free(buffer16);
1229      buffer16_size = 2*len + 2;
1230      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1231      if (buffer16 == NULL)
1232        {
1233        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1234        exit(1);
1235        }
1236      }
1237    
1238    pp = buffer16;
1239    
1240    if (!utf && !data)
1241      {
1242      while (len-- > 0) *pp++ = *p++;
1243      }
1244    
1245    else
1246      {
1247      int c = 0;
1248      while (len > 0)
1249        {
1250        int chlen = utf82ord(p, &c);
1251        if (chlen <= 0) return -1;
1252        if (c > 0x10ffff) return -2;
1253        p += chlen;
1254        len -= chlen;
1255        if (c < 0x10000) *pp++ = c; else
1256          {
1257          if (!utf) return -3;
1258          c -= 0x10000;
1259          *pp++ = 0xD800 | (c >> 10);
1260          *pp++ = 0xDC00 | (c & 0x3ff);
1261          }
1262        }
1263      }
1264    
1265    *pp = 0;
1266    return pp - buffer16;
1267    }
1268    #endif
1269    
1270    
1271  /*************************************************  /*************************************************
# Line 202  Returns:       pointer to the start of n Line 1291  Returns:       pointer to the start of n
1291                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1292  */  */
1293    
1294  static uschar *  static pcre_uint8 *
1295  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1296  {  {
1297  uschar *here = start;  pcre_uint8 *here = start;
1298    
1299  for (;;)  for (;;)
1300    {    {
1301    int rlen = buffer_size - (here - buffer);    size_t rlen = (size_t)(buffer_size - (here - buffer));
1302    
1303    if (rlen > 1000)    if (rlen > 1000)
1304      {      {
1305      int dlen;      int dlen;
1306    
1307      /* If libreadline support is required, use readline() to read a line if the      /* If libreadline or libedit support is required, use readline() to read a
1308      input is a terminal. Note that readline() removes the trailing newline, so      line if the input is a terminal. Note that readline() removes the trailing
1309      we must put it back again, to be compatible with fgets(). */      newline, so we must put it back again, to be compatible with fgets(). */
1310    
1311  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1312      if (isatty(fileno(f)))      if (isatty(fileno(f)))
1313        {        {
1314        size_t len;        size_t len;
# Line 239  for (;;) Line 1328  for (;;)
1328      /* Read the next line by normal means, prompting if the file is stdin. */      /* Read the next line by normal means, prompting if the file is stdin. */
1329    
1330        {        {
1331        if (f == stdin) printf(prompt);        if (f == stdin) printf("%s", prompt);
1332        if (fgets((char *)here, rlen,  f) == NULL)        if (fgets((char *)here, rlen,  f) == NULL)
1333          return (here == start)? NULL : start;          return (here == start)? NULL : start;
1334        }        }
# Line 252  for (;;) Line 1341  for (;;)
1341    else    else
1342      {      {
1343      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1344      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1345      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1346      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1347    
1348      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1349        {        {
# Line 285  return NULL;  /* Control never gets here Line 1374  return NULL;  /* Control never gets here
1374    
1375    
1376    
   
   
   
   
1377  /*************************************************  /*************************************************
1378  *          Read number from string               *  *          Read number from string               *
1379  *************************************************/  *************************************************/
# Line 305  Returns:        the unsigned long Line 1390  Returns:        the unsigned long
1390  */  */
1391    
1392  static int  static int
1393  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1394  {  {
1395  int result = 0;  int result = 0;
1396  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 316  return(result); Line 1401  return(result);
1401    
1402    
1403    
   
1404  /*************************************************  /*************************************************
1405  *            Convert UTF-8 string to value       *  *             Print one character                *
1406  *************************************************/  *************************************************/
1407    
1408  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
1409    
1410  Argument:  static int pchar(int c, FILE *f)
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
   
 Returns:      >  0 => the number of bytes consumed  
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1411  {  {
1412  int c = *utf8bytes++;  if (PRINTOK(c))
1413  int d = c;    {
1414  int i, j, s;    if (f != NULL) fprintf(f, "%c", c);
1415      return 1;
1416      }
1417    
1418  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  if (c < 0x100)
1419    {    {
1420    if ((d & 0x80) == 0) break;    if (use_utf)
1421    d <<= 1;      {
1422        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1423        return 6;
1424        }
1425      else
1426        {
1427        if (f != NULL) fprintf(f, "\\x%02x", c);
1428        return 4;
1429        }
1430    }    }
1431    
1432  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1433  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  return (c <= 0x000000ff)? 6 :
1434           (c <= 0x00000fff)? 7 :
1435           (c <= 0x0000ffff)? 8 :
1436           (c <= 0x000fffff)? 9 : 10;
1437    }
1438    
 /* i now has a value in the range 1-5 */  
1439    
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
1440    
1441  for (j = 0; j < i; j++)  #ifdef SUPPORT_PCRE8
1442    {  /*************************************************
1443    c = *utf8bytes++;  *         Print 8-bit character string           *
1444    if ((c & 0xc0) != 0x80) return -(j+1);  *************************************************/
   s -= 6;  
   d |= (c & 0x3f) << s;  
   }  
1445    
1446  /* Check that encoding was the correct unique one */  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1447    If handed a NULL file, just counts chars without printing. */
1448    
1449  for (j = 0; j < utf8_table1_size; j++)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1450    if (d <= utf8_table1[j]) break;  {
1451  if (j != i) return -(i+1);  int c = 0;
1452    int yield = 0;
1453    
1454  /* Valid value */  if (length < 0)
1455      length = strlen((char *)p);
1456    
1457  *vptr = d;  while (length-- > 0)
1458  return i+1;    {
1459  }  #if !defined NOUTF
1460      if (use_utf)
1461        {
1462        int rc = utf82ord(p, &c);
1463        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1464          {
1465          length -= rc - 1;
1466          p += rc;
1467          yield += pchar(c, f);
1468          continue;
1469          }
1470        }
1471    #endif
1472      c = *p++;
1473      yield += pchar(c, f);
1474      }
1475    
1476    return yield;
1477    }
1478  #endif  #endif
1479    
1480    
1481    
1482    #ifdef SUPPORT_PCRE16
1483  /*************************************************  /*************************************************
1484  *       Convert character value to UTF-8         *  *    Find length of 0-terminated 16-bit string   *
1485  *************************************************/  *************************************************/
1486    
1487  /* This function takes an integer value in the range 0 - 0x7fffffff  static int strlen16(PCRE_SPTR16 p)
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 ord2utf8(int cvalue, uschar *utf8bytes)  
1488  {  {
1489  register int i, j;  int len = 0;
1490  for (i = 0; i < utf8_table1_size; i++)  while (*p++ != 0) len++;
1491    if (cvalue <= utf8_table1[i]) break;  return len;
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1492  }  }
1493    #endif  /* SUPPORT_PCRE16 */
 #endif  
   
1494    
1495    
1496    #ifdef SUPPORT_PCRE16
1497  /*************************************************  /*************************************************
1498  *             Print character string             *  *           Print 16-bit character string        *
1499  *************************************************/  *************************************************/
1500    
1501  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1502  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1503    
1504  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1505  {  {
 int c = 0;  
1506  int yield = 0;  int yield = 0;
1507    
1508    if (length < 0)
1509      length = strlen16(p);
1510    
1511  while (length-- > 0)  while (length-- > 0)
1512    {    {
1513  #if !defined NOUTF8    int c = *p++ & 0xffff;
1514    if (use_utf8)  #if !defined NOUTF
1515      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1516      {      {
1517      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
1518        if (d >= 0xDC00 && d < 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
1519        {        {
1520        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1521        p += rc;        length--;
1522        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
1523        }        }
1524      }      }
1525  #endif  #endif
1526      yield += pchar(c, f);
1527      }
1528    
1529     /* Not UTF-8, or malformed UTF-8  */  return yield;
1530    }
1531    #endif  /* SUPPORT_PCRE16 */
1532    
1533    c = *p++;  
1534    if (PRINTHEX(c))  
1535      {  #ifdef SUPPORT_PCRE8
1536      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1537      yield++;  *     Read a capture name (8-bit) and check it   *
1538      }  *************************************************/
1539    else  
1540      {  static pcre_uint8 *
1541      if (f != NULL) fprintf(f, "\\x%02x", c);  read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1542      yield += 4;  {
1543      }  pcre_uint8 *npp = *pp;
1544    while (isalnum(*p)) *npp++ = *p++;
1545    *npp++ = 0;
1546    *npp = 0;
1547    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1548      {
1549      fprintf(outfile, "no parentheses with name \"");
1550      PCHARSV(*pp, 0, -1, outfile);
1551      fprintf(outfile, "\"\n");
1552    }    }
1553    
1554  return yield;  *pp = npp;
1555    return p;
1556    }
1557    #endif  /* SUPPORT_PCRE8 */
1558    
1559    
1560    
1561    #ifdef SUPPORT_PCRE16
1562    /*************************************************
1563    *     Read a capture name (16-bit) and check it  *
1564    *************************************************/
1565    
1566    /* Note that the text being read is 8-bit. */
1567    
1568    static pcre_uint8 *
1569    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1570    {
1571    pcre_uint16 *npp = *pp;
1572    while (isalnum(*p)) *npp++ = *p++;
1573    *npp++ = 0;
1574    *npp = 0;
1575    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1576      {
1577      fprintf(outfile, "no parentheses with name \"");
1578      PCHARSV(*pp, 0, -1, outfile);
1579      fprintf(outfile, "\"\n");
1580      }
1581    *pp = npp;
1582    return p;
1583  }  }
1584    #endif  /* SUPPORT_PCRE16 */
1585    
1586    
1587    
# Line 503  if (callout_extra) Line 1610  if (callout_extra)
1610      else      else
1611        {        {
1612        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1613        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1614          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1615        fprintf(f, "\n");        fprintf(f, "\n");
1616        }        }
# Line 516  printed lengths of the substrings. */ Line 1623  printed lengths of the substrings. */
1623    
1624  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1625    
1626  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1627  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1628    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1629    
1630  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1631    
1632  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1633    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1634    
1635  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 559  fprintf(outfile, "%.*s", (cb->next_item_ Line 1666  fprintf(outfile, "%.*s", (cb->next_item_
1666  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1667  first_callout = 0;  first_callout = 0;
1668    
1669    if (cb->mark != last_callout_mark)
1670      {
1671      if (cb->mark == NULL)
1672        fprintf(outfile, "Latest Mark: <unset>\n");
1673      else
1674        {
1675        fprintf(outfile, "Latest Mark: ");
1676        PCHARSV(cb->mark, 0, -1, outfile);
1677        putc('\n', outfile);
1678        }
1679      last_callout_mark = cb->mark;
1680      }
1681    
1682  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1683    {    {
1684    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 578  return (cb->callout_number != callout_fa Line 1698  return (cb->callout_number != callout_fa
1698  *            Local malloc functions              *  *            Local malloc functions              *
1699  *************************************************/  *************************************************/
1700    
1701  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1702  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1703    show_malloc variable is set only during matching. */
1704    
1705  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1706  {  {
1707  void *block = malloc(size);  void *block = malloc(size);
1708  gotten_store = size;  gotten_store = size;
1709    if (first_gotten_store == 0) first_gotten_store = size;
1710  if (show_malloc)  if (show_malloc)
1711    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1712  return block;  return block;
# Line 597  if (show_malloc) Line 1719  if (show_malloc)
1719  free(block);  free(block);
1720  }  }
1721    
   
1722  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1723    
1724  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 620  free(block); Line 1741  free(block);
1741  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1742  *************************************************/  *************************************************/
1743    
1744  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1745    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1746    value, but the code is defensive.
1747    
1748    Arguments:
1749      re        compiled regex
1750      study     study data
1751      option    PCRE_INFO_xxx option
1752      ptr       where to put the data
1753    
1754    Returns:    0 when OK, < 0 on error
1755    */
1756    
1757  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static int
1758    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1759  {  {
1760  int rc;  int rc;
1761  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1762    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1763    #ifdef SUPPORT_PCRE16
1764      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1765    #else
1766      rc = PCRE_ERROR_BADMODE;
1767    #endif
1768    else
1769    #ifdef SUPPORT_PCRE8
1770      rc = pcre_fullinfo(re, study, option, ptr);
1771    #else
1772      rc = PCRE_ERROR_BADMODE;
1773    #endif
1774    
1775    if (rc < 0)
1776      {
1777      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1778        use_pcre16? "16" : "", option);
1779      if (rc == PCRE_ERROR_BADMODE)
1780        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1781          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1782      }
1783    
1784    return rc;
1785  }  }
1786    
1787    
1788    
1789  /*************************************************  /*************************************************
1790  *         Byte flipping function                 *  *             Swap byte functions                *
1791  *************************************************/  *************************************************/
1792    
1793  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1794  byteflip(unsigned long int value, int n)  value, respectively.
1795    
1796    Arguments:
1797      value        any number
1798    
1799    Returns:       the byte swapped value
1800    */
1801    
1802    static pcre_uint32
1803    swap_uint32(pcre_uint32 value)
1804  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1805  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1806         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1807         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1808         ((value & 0xff000000) >> 24);         (value >> 24);
1809    }
1810    
1811    static pcre_uint16
1812    swap_uint16(pcre_uint16 value)
1813    {
1814    return (value >> 8) | (value << 8);
1815  }  }
1816    
1817    
1818    
1819    /*************************************************
1820    *        Flip bytes in a compiled pattern        *
1821    *************************************************/
1822    
1823    /* This function is called if the 'F' option was present on a pattern that is
1824    to be written to a file. We flip the bytes of all the integer fields in the
1825    regex data block and the study block. In 16-bit mode this also flips relevant
1826    bytes in the pattern itself. This is to make it possible to test PCRE's
1827    ability to reload byte-flipped patterns, e.g. those compiled on a different
1828    architecture. */
1829    
1830    static void
1831    regexflip(pcre *ere, pcre_extra *extra)
1832    {
1833    REAL_PCRE *re = (REAL_PCRE *)ere;
1834    #ifdef SUPPORT_PCRE16
1835    int op;
1836    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1837    int length = re->name_count * re->name_entry_size;
1838    #ifdef SUPPORT_UTF
1839    BOOL utf = (re->options & PCRE_UTF16) != 0;
1840    BOOL utf16_char = FALSE;
1841    #endif /* SUPPORT_UTF */
1842    #endif /* SUPPORT_PCRE16 */
1843    
1844    /* Always flip the bytes in the main data block and study blocks. */
1845    
1846    re->magic_number = REVERSED_MAGIC_NUMBER;
1847    re->size = swap_uint32(re->size);
1848    re->options = swap_uint32(re->options);
1849    re->flags = swap_uint16(re->flags);
1850    re->top_bracket = swap_uint16(re->top_bracket);
1851    re->top_backref = swap_uint16(re->top_backref);
1852    re->first_char = swap_uint16(re->first_char);
1853    re->req_char = swap_uint16(re->req_char);
1854    re->name_table_offset = swap_uint16(re->name_table_offset);
1855    re->name_entry_size = swap_uint16(re->name_entry_size);
1856    re->name_count = swap_uint16(re->name_count);
1857    
1858    if (extra != NULL)
1859      {
1860      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1861      rsd->size = swap_uint32(rsd->size);
1862      rsd->flags = swap_uint32(rsd->flags);
1863      rsd->minlength = swap_uint32(rsd->minlength);
1864      }
1865    
1866    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1867    in the name table, if present, and then in the pattern itself. */
1868    
1869    #ifdef SUPPORT_PCRE16
1870    if (!use_pcre16) return;
1871    
1872    while(TRUE)
1873      {
1874      /* Swap previous characters. */
1875      while (length-- > 0)
1876        {
1877        *ptr = swap_uint16(*ptr);
1878        ptr++;
1879        }
1880    #ifdef SUPPORT_UTF
1881      if (utf16_char)
1882        {
1883        if ((ptr[-1] & 0xfc00) == 0xd800)
1884          {
1885          /* We know that there is only one extra character in UTF-16. */
1886          *ptr = swap_uint16(*ptr);
1887          ptr++;
1888          }
1889        }
1890      utf16_char = FALSE;
1891    #endif /* SUPPORT_UTF */
1892    
1893      /* Get next opcode. */
1894    
1895      length = 0;
1896      op = *ptr;
1897      *ptr++ = swap_uint16(op);
1898    
1899      switch (op)
1900        {
1901        case OP_END:
1902        return;
1903    
1904    #ifdef SUPPORT_UTF
1905        case OP_CHAR:
1906        case OP_CHARI:
1907        case OP_NOT:
1908        case OP_NOTI:
1909        case OP_STAR:
1910        case OP_MINSTAR:
1911        case OP_PLUS:
1912        case OP_MINPLUS:
1913        case OP_QUERY:
1914        case OP_MINQUERY:
1915        case OP_UPTO:
1916        case OP_MINUPTO:
1917        case OP_EXACT:
1918        case OP_POSSTAR:
1919        case OP_POSPLUS:
1920        case OP_POSQUERY:
1921        case OP_POSUPTO:
1922        case OP_STARI:
1923        case OP_MINSTARI:
1924        case OP_PLUSI:
1925        case OP_MINPLUSI:
1926        case OP_QUERYI:
1927        case OP_MINQUERYI:
1928        case OP_UPTOI:
1929        case OP_MINUPTOI:
1930        case OP_EXACTI:
1931        case OP_POSSTARI:
1932        case OP_POSPLUSI:
1933        case OP_POSQUERYI:
1934        case OP_POSUPTOI:
1935        case OP_NOTSTAR:
1936        case OP_NOTMINSTAR:
1937        case OP_NOTPLUS:
1938        case OP_NOTMINPLUS:
1939        case OP_NOTQUERY:
1940        case OP_NOTMINQUERY:
1941        case OP_NOTUPTO:
1942        case OP_NOTMINUPTO:
1943        case OP_NOTEXACT:
1944        case OP_NOTPOSSTAR:
1945        case OP_NOTPOSPLUS:
1946        case OP_NOTPOSQUERY:
1947        case OP_NOTPOSUPTO:
1948        case OP_NOTSTARI:
1949        case OP_NOTMINSTARI:
1950        case OP_NOTPLUSI:
1951        case OP_NOTMINPLUSI:
1952        case OP_NOTQUERYI:
1953        case OP_NOTMINQUERYI:
1954        case OP_NOTUPTOI:
1955        case OP_NOTMINUPTOI:
1956        case OP_NOTEXACTI:
1957        case OP_NOTPOSSTARI:
1958        case OP_NOTPOSPLUSI:
1959        case OP_NOTPOSQUERYI:
1960        case OP_NOTPOSUPTOI:
1961        if (utf) utf16_char = TRUE;
1962    #endif
1963        /* Fall through. */
1964    
1965        default:
1966        length = OP_lengths16[op] - 1;
1967        break;
1968    
1969        case OP_CLASS:
1970        case OP_NCLASS:
1971        /* Skip the character bit map. */
1972        ptr += 32/sizeof(pcre_uint16);
1973        length = 0;
1974        break;
1975    
1976        case OP_XCLASS:
1977        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1978        if (LINK_SIZE > 1)
1979          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1980            - (1 + LINK_SIZE + 1));
1981        else
1982          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1983    
1984        /* Reverse the size of the XCLASS instance. */
1985        *ptr = swap_uint16(*ptr);
1986        ptr++;
1987        if (LINK_SIZE > 1)
1988          {
1989          *ptr = swap_uint16(*ptr);
1990          ptr++;
1991          }
1992    
1993        op = *ptr;
1994        *ptr = swap_uint16(op);
1995        ptr++;
1996        if ((op & XCL_MAP) != 0)
1997          {
1998          /* Skip the character bit map. */
1999          ptr += 32/sizeof(pcre_uint16);
2000          length -= 32/sizeof(pcre_uint16);
2001          }
2002        break;
2003        }
2004      }
2005    /* Control should never reach here in 16 bit mode. */
2006    #endif /* SUPPORT_PCRE16 */
2007    }
2008    
2009    
2010    
2011  /*************************************************  /*************************************************
2012  *        Check match or recursion limit          *  *        Check match or recursion limit          *
2013  *************************************************/  *************************************************/
2014    
2015  static int  static int
2016  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2017    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
2018    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
2019  {  {
# Line 668  for (;;) Line 2028  for (;;)
2028    {    {
2029    *limit = mid;    *limit = mid;
2030    
2031    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2032      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2033    
2034    if (count == errnumber)    if (count == errnumber)
# Line 713  Returns:    < 0, = 0, or > 0, according Line 2073  Returns:    < 0, = 0, or > 0, according
2073  */  */
2074    
2075  static int  static int
2076  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2077  {  {
2078  while (n--)  while (n--)
2079    {    {
# Line 729  return 0; Line 2089  return 0;
2089  *         Check newline indicator                *  *         Check newline indicator                *
2090  *************************************************/  *************************************************/
2091    
2092  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
2093  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
2094    
2095  Arguments:  Arguments:
2096    p           points after the leading '<'    p           points after the leading '<'
# Line 741  Returns:      appropriate PCRE_NEWLINE_x Line 2100  Returns:      appropriate PCRE_NEWLINE_x
2100  */  */
2101    
2102  static int  static int
2103  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2104  {  {
2105  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2106  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2107  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2108  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2109  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2110  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2111  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2112  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2113  return 0;  return 0;
2114  }  }
# Line 765  usage(void) Line 2124  usage(void)
2124  {  {
2125  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2126  printf("Input and output default to stdin and stdout.\n");  printf("Input and output default to stdin and stdout.\n");
2127  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2128  printf("If input is a terminal, readline() is used to read from it.\n");  printf("If input is a terminal, readline() is used to read from it.\n");
2129  #else  #else
2130  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2131  #endif  #endif
2132  printf("\nOptions:\n");  printf("\nOptions:\n");
2133  printf("  -b       show compiled code (bytecode)\n");  #ifdef SUPPORT_PCRE16
2134    printf("  -16      use the 16-bit library\n");
2135    #endif
2136    printf("  -b       show compiled code\n");
2137  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2138    printf("  -C arg   show a specific compile-time option\n");
2139    printf("           and exit with its value. The arg can be:\n");
2140    printf("     linksize     internal link size [2, 3, 4]\n");
2141    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2142    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2143    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2144    printf("     ucp          Unicode Properties supported [0, 1]\n");
2145    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2146    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2147  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2148  #if !defined NODFA  #if !defined NODFA
2149  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
2150  #endif  #endif
2151  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
2152  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
2153           "  -M       find MATCH_LIMIT minimum for each subject\n"
2154         "  -m       output memory used information\n"         "  -m       output memory used information\n"
2155         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
2156  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 786  printf("  -p       use POSIX interface\n Line 2158  printf("  -p       use POSIX interface\n
2158  #endif  #endif
2159  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
2160  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2161  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
2162           "  -s+      force each pattern to be studied, using JIT if available\n"
2163           "  -s++     ditto, verifying when JIT was actually used\n"
2164           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2165           "             where 1 <= n <= 7 selects JIT options\n"
2166           "  -s++n    ditto, verifying when JIT was actually used\n"
2167         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2168  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2169  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 806  options, followed by a set of test data, Line 2183  options, followed by a set of test data,
2183  int main(int argc, char **argv)  int main(int argc, char **argv)
2184  {  {
2185  FILE *infile = stdin;  FILE *infile = stdin;
2186    const char *version;
2187  int options = 0;  int options = 0;
2188  int study_options = 0;  int study_options = 0;
2189    int default_find_match_limit = FALSE;
2190  int op = 1;  int op = 1;
2191  int timeit = 0;  int timeit = 0;
2192  int timeitm = 0;  int timeitm = 0;
2193  int showinfo = 0;  int showinfo = 0;
2194  int showstore = 0;  int showstore = 0;
2195    int force_study = -1;
2196    int force_study_options = 0;
2197  int quiet = 0;  int quiet = 0;
2198  int size_offsets = 45;  int size_offsets = 45;
2199  int size_offsets_max;  int size_offsets_max;
# Line 823  int posix = 0; Line 2204  int posix = 0;
2204  int debug = 0;  int debug = 0;
2205  int done = 0;  int done = 0;
2206  int all_use_dfa = 0;  int all_use_dfa = 0;
2207    int verify_jit = 0;
2208  int yield = 0;  int yield = 0;
2209  int stack_size;  int stack_size;
2210    
2211  /* These vectors store, end-to-end, a list of captured substring names. Assume  pcre_jit_stack *jit_stack = NULL;
 that 1024 is plenty long enough for the few names we'll be testing. */  
   
 uschar copynames[1024];  
 uschar getnames[1024];  
2212    
2213  uschar *copynamesptr;  /* These vectors store, end-to-end, a list of zero-terminated captured
2214  uschar *getnamesptr;  substring names, each list itself being terminated by an empty name. Assume
2215    that 1024 is plenty long enough for the few names we'll be testing. It is
2216    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2217    for the actual memory, to ensure alignment. */
2218    
2219    pcre_uint16 copynames[1024];
2220    pcre_uint16 getnames[1024];
2221    
2222    #ifdef SUPPORT_PCRE16
2223    pcre_uint16 *cn16ptr;
2224    pcre_uint16 *gn16ptr;
2225    #endif
2226    
2227  /* Get buffers from malloc() so that Electric Fence will check their misuse  #ifdef SUPPORT_PCRE8
2228  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2229    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2230    pcre_uint8 *cn8ptr;
2231    pcre_uint8 *gn8ptr;
2232    #endif
2233    
2234  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2235  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
2236  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
2237    
2238    buffer = (pcre_uint8 *)malloc(buffer_size);
2239    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2240    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2241    
2242  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2243    
# Line 855  it set 0x8000, but then I was advised th Line 2252  it set 0x8000, but then I was advised th
2252  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2253  #endif  #endif
2254    
2255    /* Get the version number: both pcre_version() and pcre16_version() give the
2256    same answer. We just need to ensure that we call one that is available. */
2257    
2258    #ifdef SUPPORT_PCRE8
2259    version = pcre_version();
2260    #else
2261    version = pcre16_version();
2262    #endif
2263    
2264  /* Scan options */  /* Scan options */
2265    
2266  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2267    {    {
2268    unsigned char *endptr;    pcre_uint8 *endptr;
2269      char *arg = argv[op];
2270    
2271    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(arg, "-m") == 0) showstore = 1;
2272      showstore = 1;    else if (strcmp(arg, "-s") == 0) force_study = 0;
2273    else if (strcmp(argv[op], "-q") == 0) quiet = 1;  
2274    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strncmp(arg, "-s+", 3) == 0)
2275    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;      {
2276    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;      arg += 3;
2277        if (*arg == '+') { arg++; verify_jit = TRUE; }
2278        force_study = 1;
2279        if (*arg == 0)
2280          force_study_options = jit_study_bits[6];
2281        else if (*arg >= '1' && *arg <= '7')
2282          force_study_options = jit_study_bits[*arg - '1'];
2283        else goto BAD_ARG;
2284        }
2285      else if (strcmp(arg, "-16") == 0)
2286        {
2287    #ifdef SUPPORT_PCRE16
2288        use_pcre16 = 1;
2289    #else
2290        printf("** This version of PCRE was built without 16-bit support\n");
2291        exit(1);
2292    #endif
2293        }
2294      else if (strcmp(arg, "-q") == 0) quiet = 1;
2295      else if (strcmp(arg, "-b") == 0) debug = 1;
2296      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2297      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2298      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2299  #if !defined NODFA  #if !defined NODFA
2300    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2301  #endif  #endif
2302    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2303        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2304          *endptr == 0))          *endptr == 0))
2305      {      {
2306      op++;      op++;
2307      argc--;      argc--;
2308      }      }
2309    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2310      {      {
2311      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2312      int temp;      int temp;
2313      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2314                       *endptr == 0))                       *endptr == 0))
2315        {        {
2316        timeitm = temp;        timeitm = temp;
# Line 891  while (argc > 1 && argv[op][0] == '-') Line 2320  while (argc > 1 && argv[op][0] == '-')
2320      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2321      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2322      }      }
2323    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2324        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2325          *endptr == 0))          *endptr == 0))
2326      {      {
2327  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2328      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2329      exit(1);      exit(1);
2330  #else  #else
# Line 914  while (argc > 1 && argv[op][0] == '-') Line 2343  while (argc > 1 && argv[op][0] == '-')
2343  #endif  #endif
2344      }      }
2345  #if !defined NOPOSIX  #if !defined NOPOSIX
2346    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2347  #endif  #endif
2348    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2349      {      {
2350      int rc;      int rc;
2351      printf("PCRE version %s\n", pcre_version());      unsigned long int lrc;
2352    
2353        if (argc > 2)
2354          {
2355          if (strcmp(argv[op + 1], "linksize") == 0)
2356            {
2357            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2358            printf("%d\n", rc);
2359            yield = rc;
2360            goto EXIT;
2361            }
2362          if (strcmp(argv[op + 1], "pcre8") == 0)
2363            {
2364    #ifdef SUPPORT_PCRE8
2365            printf("1\n");
2366            yield = 1;
2367    #else
2368            printf("0\n");
2369            yield = 0;
2370    #endif
2371            goto EXIT;
2372            }
2373          if (strcmp(argv[op + 1], "pcre16") == 0)
2374            {
2375    #ifdef SUPPORT_PCRE16
2376            printf("1\n");
2377            yield = 1;
2378    #else
2379            printf("0\n");
2380            yield = 0;
2381    #endif
2382            goto EXIT;
2383            }
2384          if (strcmp(argv[op + 1], "utf") == 0)
2385            {
2386    #ifdef SUPPORT_PCRE8
2387            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2388            printf("%d\n", rc);
2389            yield = rc;
2390    #else
2391            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2392            printf("%d\n", rc);
2393            yield = rc;
2394    #endif
2395            goto EXIT;
2396            }
2397          if (strcmp(argv[op + 1], "ucp") == 0)
2398            {
2399            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2400            printf("%d\n", rc);
2401            yield = rc;
2402            goto EXIT;
2403            }
2404          if (strcmp(argv[op + 1], "jit") == 0)
2405            {
2406            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2407            printf("%d\n", rc);
2408            yield = rc;
2409            goto EXIT;
2410            }
2411          if (strcmp(argv[op + 1], "newline") == 0)
2412            {
2413            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2414            /* Note that these values are always the ASCII values, even
2415            in EBCDIC environments. CR is 13 and NL is 10. */
2416            printf("%s\n", (rc == 13)? "CR" :
2417              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2418              (rc == -2)? "ANYCRLF" :
2419              (rc == -1)? "ANY" : "???");
2420            goto EXIT;
2421            }
2422          printf("Unknown -C option: %s\n", argv[op + 1]);
2423          goto EXIT;
2424          }
2425    
2426        printf("PCRE version %s\n", version);
2427      printf("Compiled with\n");      printf("Compiled with\n");
2428    
2429    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2430    are set, either both UTFs are supported or both are not supported. */
2431    
2432    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2433        printf("  8-bit and 16-bit support\n");
2434        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2435        if (rc)
2436          printf("  UTF-8 and UTF-16 support\n");
2437        else
2438          printf("  No UTF-8 or UTF-16 support\n");
2439    #elif defined SUPPORT_PCRE8
2440        printf("  8-bit support only\n");
2441      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2442      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2443      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2444        printf("  16-bit support only\n");
2445        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2446        printf("  %sUTF-16 support\n", rc? "" : "No ");
2447    #endif
2448    
2449        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2450      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2451      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2452      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      if (rc)
2453        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        {
2454          const char *arch;
2455          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2456          printf("  Just-in-time compiler support: %s\n", arch);
2457          }
2458        else
2459          printf("  No just-in-time compiler support\n");
2460        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2461        /* Note that these values are always the ASCII values, even
2462        in EBCDIC environments. CR is 13 and NL is 10. */
2463        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2464          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2465        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
2466        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2467      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2468      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2469                                       "all Unicode newlines");                                       "all Unicode newlines");
2470      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2471      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2472      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2473      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2474      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2475      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
2476      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2477      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
2478      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2479      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
2480        if (showstore)
2481          {
2482          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2483          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2484          }
2485        printf("\n");
2486      goto EXIT;      goto EXIT;
2487      }      }
2488    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
2489             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
2490      {      {
2491      usage();      usage();
2492      goto EXIT;      goto EXIT;
2493      }      }
2494    else    else
2495      {      {
2496      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
2497        printf("** Unknown or malformed option %s\n", arg);
2498      usage();      usage();
2499      yield = 1;      yield = 1;
2500      goto EXIT;      goto EXIT;
# Line 1000  if (argc > 2) Line 2541  if (argc > 2)
2541    
2542  /* Set alternative malloc function */  /* Set alternative malloc function */
2543    
2544    #ifdef SUPPORT_PCRE8
2545  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2546  pcre_free = new_free;  pcre_free = new_free;
2547  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2548  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2549    #endif
2550    
2551    #ifdef SUPPORT_PCRE16
2552    pcre16_malloc = new_malloc;
2553    pcre16_free = new_free;
2554    pcre16_stack_malloc = stack_malloc;
2555    pcre16_stack_free = stack_free;
2556    #endif
2557    
2558  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2559    
2560  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2561    
2562  /* Main loop */  /* Main loop */
2563    
# Line 1022  while (!done) Line 2572  while (!done)
2572  #endif  #endif
2573    
2574    const char *error;    const char *error;
2575    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
2576    unsigned char *to_file = NULL;    pcre_uint8 *p, *pp, *ppp;
2577    const unsigned char *tables = NULL;    pcre_uint8 *to_file = NULL;
2578      const pcre_uint8 *tables = NULL;
2579      unsigned long int get_options;
2580    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2581    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2582      int do_allcaps = 0;
2583      int do_mark = 0;
2584    int do_study = 0;    int do_study = 0;
2585      int no_force_study = 0;
2586    int do_debug = debug;    int do_debug = debug;
2587    int do_G = 0;    int do_G = 0;
2588    int do_g = 0;    int do_g = 0;
2589    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2590    int do_showrest = 0;    int do_showrest = 0;
2591      int do_showcaprest = 0;
2592    int do_flip = 0;    int do_flip = 0;
2593    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2594    
2595    use_utf8 = 0;    use_utf = 0;
2596    debug_lengths = 1;    debug_lengths = 1;
2597    
2598    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1051  while (!done) Line 2607  while (!done)
2607    
2608    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2609      {      {
2610      unsigned long int magic, get_options;      pcre_uint32 magic;
2611      uschar sbuf[8];      pcre_uint8 sbuf[8];
2612      FILE *f;      FILE *f;
2613    
2614      p++;      p++;
2615        if (*p == '!')
2616          {
2617          do_debug = TRUE;
2618          do_showinfo = TRUE;
2619          p++;
2620          }
2621    
2622      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2623      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2624      *pp = 0;      *pp = 0;
# Line 1067  while (!done) Line 2630  while (!done)
2630        continue;        continue;
2631        }        }
2632    
2633        first_gotten_store = 0;
2634      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2635    
2636      true_size =      true_size =
# Line 1074  while (!done) Line 2638  while (!done)
2638      true_study_size =      true_study_size =
2639        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2640    
2641      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
2642      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2643    
2644      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2645    
2646      magic = ((real_pcre *)re)->magic_number;      magic = ((REAL_PCRE *)re)->magic_number;
2647      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2648        {        {
2649        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2650          {          {
2651          do_flip = 1;          do_flip = 1;
2652          }          }
# Line 1094  while (!done) Line 2658  while (!done)
2658          }          }
2659        }        }
2660    
2661      fprintf(outfile, "Compiled regex%s loaded from %s\n",      /* We hide the byte-invert info for little and big endian tests. */
2662        do_flip? " (byte-inverted)" : "", p);      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2663          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
     /* Need to know if UTF-8 for printing data strings */  
2664    
2665      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      /* Now see if there is any following study data. */
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
   
     /* Now see if there is any following study data */  
2666    
2667      if (true_study_size != 0)      if (true_study_size != 0)
2668        {        {
# Line 1118  while (!done) Line 2678  while (!done)
2678          {          {
2679          FAIL_READ:          FAIL_READ:
2680          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2681          if (extra != NULL) new_free(extra);          if (extra != NULL)
2682              {
2683              PCRE_FREE_STUDY(extra);
2684              }
2685          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
2686          fclose(f);          fclose(f);
2687          continue;          continue;
# Line 1128  while (!done) Line 2691  while (!done)
2691        }        }
2692      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2693    
2694        /* Flip the necessary bytes. */
2695        if (do_flip)
2696          {
2697          int rc;
2698          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2699          if (rc == PCRE_ERROR_BADMODE)
2700            {
2701            /* Simulate the result of the function call below. */
2702            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2703              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2704            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2705              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2706            continue;
2707            }
2708          }
2709    
2710        /* Need to know if UTF-8 for printing data strings. */
2711    
2712        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2713        use_utf = (get_options & PCRE_UTF8) != 0;
2714    
2715      fclose(f);      fclose(f);
2716      goto SHOW_INFO;      goto SHOW_INFO;
2717      }      }
2718    
2719    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2720    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2721    
2722    delimiter = *p++;    delimiter = *p++;
2723    
# Line 1144  while (!done) Line 2728  while (!done)
2728      }      }
2729    
2730    pp = p;    pp = p;
2731    poffset = p - buffer;    poffset = (int)(p - buffer);
2732    
2733    for(;;)    for(;;)
2734      {      {
# Line 1198  while (!done) Line 2782  while (!done)
2782        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2783        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2784    
2785        case '+': do_showrest = 1; break;        case '+':
2786          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2787          break;
2788    
2789          case '=': do_allcaps = 1; break;
2790        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2791        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2792        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1208  while (!done) Line 2796  while (!done)
2796        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2797        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2798        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
2799          case 'K': do_mark = 1; break;
2800        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2801        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2802    
# Line 1215  while (!done) Line 2804  while (!done)
2804        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2805  #endif  #endif
2806    
2807        case 'S': do_study = 1; break;        case 'S':
2808          if (do_study == 0)
2809            {
2810            do_study = 1;
2811            if (*pp == '+')
2812              {
2813              if (*(++pp) == '+')
2814                {
2815                verify_jit = TRUE;
2816                pp++;
2817                }
2818              if (*pp >= '1' && *pp <= '7')
2819                study_options |= jit_study_bits[*pp++ - '1'];
2820              else
2821                study_options |= jit_study_bits[6];
2822              }
2823            }
2824          else
2825            {
2826            do_study = 0;
2827            no_force_study = 1;
2828            }
2829          break;
2830    
2831        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2832          case 'W': options |= PCRE_UCP; break;
2833        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2834          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2835        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2836        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2837        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2838    
2839          case 'T':
2840          switch (*pp++)
2841            {
2842            case '0': tables = tables0; break;
2843            case '1': tables = tables1; break;
2844    
2845            case '\r':
2846            case '\n':
2847            case ' ':
2848            case 0:
2849            fprintf(outfile, "** Missing table number after /T\n");
2850            goto SKIP_DATA;
2851    
2852            default:
2853            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2854            goto SKIP_DATA;
2855            }
2856          break;
2857    
2858        case 'L':        case 'L':
2859        ppp = pp;        ppp = pp;
2860        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1234  while (!done) Line 2867  while (!done)
2867          goto SKIP_DATA;          goto SKIP_DATA;
2868          }          }
2869        locale_set = 1;        locale_set = 1;
2870        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2871        pp = ppp;        pp = ppp;
2872        break;        break;
2873    
# Line 1247  while (!done) Line 2880  while (!done)
2880    
2881        case '<':        case '<':
2882          {          {
2883          if (strncmp((char *)pp, "JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2884            {            {
2885            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
2886            pp += 3;            pp += 3;
2887            }            }
2888          else          else
2889            {            {
2890            int x = check_newline(pp, outfile);            int x = check_newline(pp, outfile);
2891            if (x == 0) goto SKIP_DATA;            if (x == 0) goto SKIP_DATA;
2892            options |= x;            options |= x;
2893            while (*pp++ != '>');            while (*pp++ != '>');
2894            }            }
2895          }          }
2896        break;        break;
2897    
# Line 1275  while (!done) Line 2908  while (!done)
2908    
2909    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2910    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2911    local character tables. */    local character tables. Neither does it have 16-bit support. */
2912    
2913  #if !defined NOPOSIX  #if !defined NOPOSIX
2914    if (posix || do_posix)    if (posix || do_posix)
# Line 1288  while (!done) Line 2921  while (!done)
2921      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2922      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2923      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2924        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2925        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2926    
2927        first_gotten_store = 0;
2928      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2929    
2930      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1308  while (!done) Line 2944  while (!done)
2944  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2945    
2946      {      {
2947        /* In 16-bit mode, convert the input. */
2948    
2949    #ifdef SUPPORT_PCRE16
2950        if (use_pcre16)
2951          {
2952          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2953            {
2954            case -1:
2955            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2956              "converted to UTF-16\n");
2957            goto SKIP_DATA;
2958    
2959            case -2:
2960            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2961              "cannot be converted to UTF-16\n");
2962            goto SKIP_DATA;
2963    
2964            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2965            fprintf(outfile, "**Failed: character value greater than 0xffff "
2966              "cannot be converted to 16-bit in non-UTF mode\n");
2967            goto SKIP_DATA;
2968    
2969            default:
2970            break;
2971            }
2972          p = (pcre_uint8 *)buffer16;
2973          }
2974    #endif
2975    
2976        /* Compile many times when timing */
2977    
2978      if (timeit > 0)      if (timeit > 0)
2979        {        {
2980        register int i;        register int i;
# Line 1315  while (!done) Line 2982  while (!done)
2982        clock_t start_time = clock();        clock_t start_time = clock();
2983        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2984          {          {
2985          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2986          if (re != NULL) free(re);          if (re != NULL) free(re);
2987          }          }
2988        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1324  while (!done) Line 2991  while (!done)
2991            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2992        }        }
2993    
2994      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2995        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2996    
2997      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2998      if non-interactive. */      if non-interactive. */
# Line 1351  while (!done) Line 3019  while (!done)
3019        goto CONTINUE;        goto CONTINUE;
3020        }        }
3021    
3022      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
3023      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
3024      returns only limited data. Check that it agrees with the newer one. */      lines. */
3025    
3026      if (log_store)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3027        fprintf(outfile, "Memory allocation (code space): %d\n",        goto SKIP_DATA;
3028          (int)(gotten_store -      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
3029    
3030      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3031      and remember the store that was got. */      and remember the store that was got. */
3032    
3033      true_size = ((real_pcre *)re)->size;      true_size = ((REAL_PCRE *)re)->size;
3034      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3035    
3036      /* If /S was present, study the regexp to generate additional info to      /* Output code size information if requested */
3037      help with the matching. */  
3038        if (log_store)
3039          fprintf(outfile, "Memory allocation (code space): %d\n",
3040            (int)(first_gotten_store -
3041                  sizeof(REAL_PCRE) -
3042                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3043    
3044        /* If -s or /S was present, study the regex to generate additional info to
3045        help with the matching, unless the pattern has the SS option, which
3046        suppresses the effect of /S (used for a few test patterns where studying is
3047        never sensible). */
3048    
3049      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
3050        {        {
3051        if (timeit > 0)        if (timeit > 0)
3052          {          {
# Line 1378  while (!done) Line 3054  while (!done)
3054          clock_t time_taken;          clock_t time_taken;
3055          clock_t start_time = clock();          clock_t start_time = clock();
3056          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3057            extra = pcre_study(re, study_options, &error);            {
3058              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3059              }
3060          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3061          if (extra != NULL) free(extra);          if (extra != NULL)
3062              {
3063              PCRE_FREE_STUDY(extra);
3064              }
3065          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3066            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3067              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3068          }          }
3069        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3070        if (error != NULL)        if (error != NULL)
3071          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3072        else if (extra != NULL)        else if (extra != NULL)
3073            {
3074          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3075            if (log_store)
3076              {
3077              size_t jitsize;
3078              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3079                  jitsize != 0)
3080                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3081              }
3082            }
3083        }        }
3084    
3085      /* If the 'F' option was present, we flip the bytes of all the integer      /* If /K was present, we set up for handling MARK data. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
3086    
3087      if (do_flip)      if (do_mark)
3088        {        {
3089        real_pcre *rre = (real_pcre *)re;        if (extra == NULL)
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
3090          {          {
3091          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3092          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          extra->flags = 0;
         rsd->options = byteflip(rsd->options, sizeof(rsd->options));  
3093          }          }
3094          extra->mark = &markptr;
3095          extra->flags |= PCRE_EXTRA_MARK;
3096        }        }
3097    
3098      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
3099    
3100      SHOW_INFO:      SHOW_INFO:
3101    
3102      if (do_debug)      if (do_debug)
3103        {        {
3104        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3105        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3106        }        }
3107    
3108        /* We already have the options in get_options (see above) */
3109    
3110      if (do_showinfo)      if (do_showinfo)
3111        {        {
3112        unsigned long int get_options, all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3113        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3114          hascrorlf;          hascrorlf, maxlookbehind;
3115        int nameentrysize, namecount;        int nameentrysize, namecount;
3116        const uschar *nametable;        const pcre_uint8 *nametable;
3117    
3118        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3119        new_info(re, NULL, PCRE_INFO_SIZE, &size);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3120        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3121        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3122        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3123        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3124        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3125        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3126        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3127        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3128        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3129        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3130              != 0)
3131  #if !defined NOINFOCHECK          goto SKIP_DATA;
       old_count = pcre_info(re, &old_options, &old_first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3132    
3133        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3134          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1495  while (!done) Line 3143  while (!done)
3143          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3144          while (namecount-- > 0)          while (namecount-- > 0)
3145            {            {
3146            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3147              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3148              GET2(nametable, 0));  #else
3149              int imm2_size = IMM2_SIZE;
3150    #endif
3151              int length = (int)STRLEN(nametable + imm2_size);
3152              fprintf(outfile, "  ");
3153              PCHARSV(nametable, imm2_size, length, outfile);
3154              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3155    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3156              fprintf(outfile, "%3d\n", use_pcre16?
3157                 (int)(((PCRE_SPTR16)nametable)[0])
3158                :((int)nametable[0] << 8) | (int)nametable[1]);
3159              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3160    #else
3161              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3162    #ifdef SUPPORT_PCRE8
3163            nametable += nameentrysize;            nametable += nameentrysize;
3164    #else
3165              nametable += nameentrysize * 2;
3166    #endif
3167    #endif
3168            }            }
3169          }          }
3170    
3171        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3172        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3173    
3174        all_options = ((real_pcre *)re)->options;        all_options = ((REAL_PCRE *)re)->options;
3175        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3176    
3177        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3178          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3179            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3180            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3181            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1522  while (!done) Line 3188  while (!done)
3188            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3189            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3190            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3191            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3192            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3193              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3194              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3195            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3196    
3197        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1564  while (!done) Line 3232  while (!done)
3232          }          }
3233        else        else
3234          {          {
3235          int ch = first_char & 255;          const char *caseless =
3236          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3237            "" : " (caseless)";            "" : " (caseless)";
3238          if (PRINTHEX(ch))  
3239            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3240              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3241          else          else
3242            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3243              fprintf(outfile, "First char = ");
3244              pchar(first_char, outfile);
3245              fprintf(outfile, "%s\n", caseless);
3246              }
3247          }          }
3248    
3249        if (need_char < 0)        if (need_char < 0)
# Line 1579  while (!done) Line 3252  while (!done)
3252          }          }
3253        else        else
3254          {          {
3255          int ch = need_char & 255;          const char *caseless =
3256          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3257            "" : " (caseless)";            "" : " (caseless)";
3258          if (PRINTHEX(ch))  
3259            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3260              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3261          else          else
3262            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3263              fprintf(outfile, "Need char = ");
3264              pchar(need_char, outfile);
3265              fprintf(outfile, "%s\n", caseless);
3266              }
3267          }          }
3268    
3269          if (maxlookbehind > 0)
3270            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3271    
3272        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3273        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3274        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
3275        flipped.) */        flipped.) If study was forced by an external -s, don't show this
3276          information unless -i or -d was also present. This means that, except
3277          when auto-callouts are involved, the output from runs with and without
3278          -s should be identical. */
3279    
3280        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3281          {          {
3282          if (extra == NULL)          if (extra == NULL)
3283            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3284          else          else
3285            {            {
3286            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3287            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
3288    
3289            if (start_bits == NULL)            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3290              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3291            else  
3292              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3293              {              {
3294              int i;              if (start_bits == NULL)
3295              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3296              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3297                {                {
3298                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3299                  int c = 24;
3300                  fprintf(outfile, "Starting byte set: ");
3301                  for (i = 0; i < 256; i++)
3302                  {                  {
3303                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
3304                    {                    {
3305                    fprintf(outfile, "%c ", i);                    if (c > 75)
3306                    c += 2;                      {
3307                    }                      fprintf(outfile, "\n  ");
3308                  else                      c = 2;
3309                    {                      }
3310                    fprintf(outfile, "\\x%02x ", i);                    if (PRINTOK(i) && i != ' ')
3311                    c += 5;                      {
3312                        fprintf(outfile, "%c ", i);
3313                        c += 2;
3314                        }
3315                      else
3316                        {
3317                        fprintf(outfile, "\\x%02x ", i);
3318                        c += 5;
3319                        }
3320                    }                    }
3321                  }                  }
3322                  fprintf(outfile, "\n");
3323                }                }
3324              fprintf(outfile, "\n");              }
3325              }
3326    
3327            /* Show this only if the JIT was set by /S, not by -s. */
3328    
3329            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3330              {
3331              int jit;
3332              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3333                {
3334                if (jit)
3335                  fprintf(outfile, "JIT study was successful\n");
3336                else
3337    #ifdef SUPPORT_JIT
3338                  fprintf(outfile, "JIT study was not successful\n");
3339    #else
3340                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3341    #endif
3342              }              }
3343            }            }
3344          }          }
# Line 1649  while (!done) Line 3357  while (!done)
3357          }          }
3358        else        else
3359          {          {
3360          uschar sbuf[8];          pcre_uint8 sbuf[8];
3361          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3362          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3363          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3364          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3365            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3366          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3367          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3368          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3369          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3370            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3371    
3372          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3373              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1667  while (!done) Line 3376  while (!done)
3376            }            }
3377          else          else
3378            {            {
3379            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3380    
3381              /* If there is study data, write it. */
3382    
3383            if (extra != NULL)            if (extra != NULL)
3384              {              {
3385              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1677  while (!done) Line 3389  while (!done)
3389                  strerror(errno));                  strerror(errno));
3390                }                }
3391              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
3392              }              }
3393            }            }
3394          fclose(f);          fclose(f);
3395          }          }
3396    
3397        new_free(re);        new_free(re);
3398        if (extra != NULL) new_free(extra);        if (extra != NULL)
3399        if (tables != NULL) new_free((void *)tables);          {
3400            PCRE_FREE_STUDY(extra);
3401            }
3402          if (locale_set)
3403            {
3404            new_free((void *)tables);
3405            setlocale(LC_CTYPE, "C");
3406            locale_set = 0;
3407            }
3408        continue;  /* With next regex */        continue;  /* With next regex */
3409        }        }
3410      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1694  while (!done) Line 3413  while (!done)
3413    
3414    for (;;)    for (;;)
3415      {      {
3416      uschar *q;      pcre_uint8 *q;
3417      uschar *bptr;      pcre_uint8 *bptr;
3418      int *use_offsets = offsets;      int *use_offsets = offsets;
3419      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3420      int callout_data = 0;      int callout_data = 0;
3421      int callout_data_set = 0;      int callout_data_set = 0;
3422      int count, c;      int count, c;
3423      int copystrings = 0;      int copystrings = 0;
3424      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
3425      int getstrings = 0;      int getstrings = 0;
3426      int getlist = 0;      int getlist = 0;
3427      int gmatched = 0;      int gmatched = 0;
3428      int start_offset = 0;      int start_offset = 0;
3429        int start_offset_sign = 1;
3430      int g_notempty = 0;      int g_notempty = 0;
3431      int use_dfa = 0;      int use_dfa = 0;
3432    
     options = 0;  
   
3433      *copynames = 0;      *copynames = 0;
3434      *getnames = 0;      *getnames = 0;
3435    
3436      copynamesptr = copynames;  #ifdef SUPPORT_PCRE16
3437      getnamesptr = getnames;      cn16ptr = copynames;
3438        gn16ptr = getnames;
3439    #endif
3440    #ifdef SUPPORT_PCRE8
3441        cn8ptr = copynames8;
3442        gn8ptr = getnames8;
3443    #endif
3444    
3445      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3446      first_callout = 1;      first_callout = 1;
3447        last_callout_mark = NULL;
3448      callout_extra = 0;      callout_extra = 0;
3449      callout_count = 0;      callout_count = 0;
3450      callout_fail_count = 999999;      callout_fail_count = 999999;
3451      callout_fail_id = -1;      callout_fail_id = -1;
3452      show_malloc = 0;      show_malloc = 0;
3453        options = 0;
3454    
3455      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3456        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 1734  while (!done) Line 3460  while (!done)
3460        {        {
3461        if (extend_inputline(infile, buffer + len, "data> ") == NULL)        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3462          {          {
3463          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
3464              {
3465              fprintf(outfile, "\n");
3466              break;
3467              }
3468          done = 1;          done = 1;
3469          goto CONTINUE;          goto CONTINUE;
3470          }          }
# Line 1756  while (!done) Line 3486  while (!done)
3486        int i = 0;        int i = 0;
3487        int n = 0;        int n = 0;
3488    
3489        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3490          In non-UTF mode, allow the value of the byte to fall through to later,
3491          where values greater than 127 are turned into UTF-8 when running in
3492          16-bit mode. */
3493    
3494          if (c != '\\')
3495            {
3496            if (use_utf)
3497              {
3498              *q++ = c;
3499              continue;
3500              }
3501            }
3502    
3503          /* Handle backslash escapes */
3504    
3505          else switch ((c = *p++))
3506          {          {
3507          case 'a': c =    7; break;          case 'a': c =    7; break;
3508          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 1772  while (!done) Line 3518  while (!done)
3518          c -= '0';          c -= '0';
3519          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3520            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3521          break;          break;
3522    
3523          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3524          if (*p == '{')          if (*p == '{')
3525            {            {
3526            unsigned char *pt = p;            pcre_uint8 *pt = p;
3527            c = 0;            c = 0;
3528            while (isxdigit(*(++pt)))  
3529              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3530              when isxdigit() is a macro that refers to its argument more than
3531              once. This is banned by the C Standard, but apparently happens in at
3532              least one MacOS environment. */
3533    
3534              for (pt++; isxdigit(*pt); pt++)
3535                {
3536                if (++i == 9)
3537                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3538                                   "using only the first eight.\n");
3539                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3540                }
3541            if (*pt == '}')            if (*pt == '}')
3542              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             utn = ord2utf8(c, buff8);  
             for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
             c = buff8[ii];   /* Last byte */  
3543              p = pt + 1;              p = pt + 1;
3544              break;              break;
3545              }              }
3546            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3547            }            }
 #endif  
3548    
3549          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3550            allows UTF-8 characters to be constructed byte by byte, and also allows
3551            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3552            Otherwise, pass it down to later code so that it can be turned into
3553            UTF-8 when running in 16-bit mode. */
3554    
3555          c = 0;          c = 0;
3556          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3557            {            {
3558            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3559            p++;            p++;
3560            }            }
3561            if (use_utf)
3562              {
3563              *q++ = c;
3564              continue;
3565              }
3566          break;          break;
3567    
3568          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 1825  while (!done) Line 3570  while (!done)
3570          continue;          continue;
3571    
3572          case '>':          case '>':
3573            if (*p == '-')
3574              {
3575              start_offset_sign = -1;
3576              p++;
3577              }
3578          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3579            start_offset *= start_offset_sign;
3580          continue;          continue;
3581    
3582          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1844  while (!done) Line 3595  while (!done)
3595            }            }
3596          else if (isalnum(*p))          else if (isalnum(*p))
3597            {            {
3598            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3599            }            }
3600          else if (*p == '+')          else if (*p == '+')
3601            {            {
# Line 1860  while (!done) Line 3604  while (!done)
3604            }            }
3605          else if (*p == '-')          else if (*p == '-')
3606            {            {
3607            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3608            p++;            p++;
3609            }            }
3610          else if (*p == '!')          else if (*p == '!')
# Line 1898  while (!done) Line 3642  while (!done)
3642  #endif  #endif
3643            use_dfa = 1;            use_dfa = 1;
3644          continue;          continue;
3645    #endif
3646    
3647    #if !defined NODFA
3648          case 'F':          case 'F':
3649          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
3650          continue;          continue;
# Line 1912  while (!done) Line 3658  while (!done)
3658            }            }
3659          else if (isalnum(*p))          else if (isalnum(*p))
3660            {            {
3661            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3662            while (isalnum(*p)) *npp++ = *p++;            }
3663            *npp++ = 0;          continue;
3664            *npp = 0;  
3665            n = pcre_get_stringnumber(re, (char *)getnamesptr);          case 'J':
3666            if (n < 0)          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3667              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);          if (extra != NULL
3668            getnamesptr = npp;              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3669                && extra->executable_jit != NULL)
3670              {
3671              if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3672              jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3673              PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3674            }            }
3675          continue;          continue;
3676    
# Line 1932  while (!done) Line 3683  while (!done)
3683          continue;          continue;
3684    
3685          case 'N':          case 'N':
3686          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
3687              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3688            else
3689              options |= PCRE_NOTEMPTY;
3690          continue;          continue;
3691    
3692          case 'O':          case 'O':
# Line 1955  while (!done) Line 3709  while (!done)
3709          continue;          continue;
3710    
3711          case 'P':          case 'P':
3712          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3713              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3714          continue;          continue;
3715    
3716          case 'Q':          case 'Q':
# Line 1990  while (!done) Line 3745  while (!done)
3745          show_malloc = 1;          show_malloc = 1;
3746          continue;          continue;
3747    
3748            case 'Y':
3749            options |= PCRE_NO_START_OPTIMIZE;
3750            continue;
3751    
3752          case 'Z':          case 'Z':
3753          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
3754          continue;          continue;
# Line 2007  while (!done) Line 3766  while (!done)
3766            }            }
3767          continue;          continue;
3768          }          }
3769        *q++ = c;  
3770          /* We now have a character value in c that may be greater than 255. In
3771          16-bit mode, we always convert characters to UTF-8 so that values greater
3772          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3773          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3774          mode must have come from \x{...} or octal constructs because values from
3775          \x.. get this far only in non-UTF mode. */
3776    
3777    #if !defined NOUTF || defined SUPPORT_PCRE16
3778          if (use_pcre16 || use_utf)
3779            {
3780            pcre_uint8 buff8[8];
3781            int ii, utn;
3782            utn = ord2utf8(c, buff8);
3783            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3784            }
3785          else
3786    #endif
3787            {
3788            if (c > 255)
3789              {
3790              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3791                "and UTF-8 mode is not enabled.\n", c);
3792              fprintf(outfile, "** Truncation will probably give the wrong "
3793                "result.\n");
3794              }
3795            *q++ = c;
3796            }
3797        }        }
3798    
3799        /* Reached end of subject string */
3800    
3801      *q = 0;      *q = 0;
3802      len = q - dbuffer;      len = (int)(q - dbuffer);
3803    
3804        /* Move the data to the end of the buffer so that a read over the end of
3805        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3806        we are using the POSIX interface, we must include the terminating zero. */
3807    
3808    #if !defined NOPOSIX
3809        if (posix || do_posix)
3810          {
3811          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3812          bptr += buffer_size - len - 1;
3813          }
3814        else
3815    #endif
3816          {
3817          memmove(bptr + buffer_size - len, bptr, len);
3818          bptr += buffer_size - len;
3819          }
3820    
3821      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
3822        {        {
# Line 2031  while (!done) Line 3837  while (!done)
3837          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3838        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3839        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3840          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3841    
3842        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3843    
# Line 2052  while (!done) Line 3859  while (!done)
3859            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3860              {              {
3861              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3862              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3863                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3864              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3865              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3866                {                {
3867                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3868                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3869                  outfile);                  outfile);
3870                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3871                }                }
# Line 2066  while (!done) Line 3873  while (!done)
3873            }            }
3874          }          }
3875        free(pmatch);        free(pmatch);
3876          goto NEXT_DATA;
3877        }        }
3878    
3879    #endif  /* !defined NOPOSIX */
3880    
3881      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3882    
3883      else  #ifdef SUPPORT_PCRE16
3884  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3885          {
3886          len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3887          switch(len)
3888            {
3889            case -1:
3890            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3891              "converted to UTF-16\n");
3892            goto NEXT_DATA;
3893    
3894            case -2:
3895            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3896              "cannot be converted to UTF-16\n");
3897            goto NEXT_DATA;
3898    
3899            case -3:
3900            fprintf(outfile, "**Failed: character value greater than 0xffff "
3901              "cannot be converted to 16-bit in non-UTF mode\n");
3902            goto NEXT_DATA;
3903    
3904            default:
3905            break;
3906            }
3907          bptr = (pcre_uint8 *)buffer16;
3908          }
3909    #endif
3910    
3911        /* Ensure that there is a JIT callback if we want to verify that JIT was
3912        actually used. If jit_stack == NULL, no stack has yet been assigned. */
3913    
3914        if (verify_jit && jit_stack == NULL && extra != NULL)
3915           { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
3916    
3917      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3918        {        {
3919          markptr = NULL;
3920          jit_was_used = FALSE;
3921    
3922        if (timeitm > 0)        if (timeitm > 0)
3923          {          {
3924          register int i;          register int i;
# Line 2086  while (!done) Line 3930  while (!done)
3930            {            {
3931            int workspace[1000];            int workspace[1000];
3932            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3933              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              {
3934                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3935                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets, workspace,
3936                  (sizeof(workspace)/sizeof(int)));
3937                }
3938            }            }
3939          else          else
3940  #endif  #endif
3941    
3942          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3943            count = pcre_exec(re, extra, (char *)bptr, len,            {
3944              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3945                (options | g_notempty), use_offsets, use_size_offsets);
3946              }
3947          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3948          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3949            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2105  while (!done) Line 3952  while (!done)
3952    
3953        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
3954        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
3955        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
3956          running of pcre_exec(), so disable the JIT optimization. This makes it
3957          possible to run the same set of tests with and without JIT externally
3958          requested. */
3959    
3960        if (find_match_limit)        if (find_match_limit)
3961          {          {
# Line 2114  while (!done) Line 3964  while (!done)
3964            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3965            extra->flags = 0;            extra->flags = 0;
3966            }            }
3967            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3968    
3969          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
3970            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2137  while (!done) Line 3988  while (!done)
3988            }            }
3989          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3990          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3991          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3992            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3993          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3994          }          }
# Line 2149  while (!done) Line 4000  while (!done)
4000        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
4001          {          {
4002          int workspace[1000];          int workspace[1000];
4003          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4004            options | g_notempty, use_offsets, use_size_offsets, workspace,            (options | g_notempty), use_offsets, use_size_offsets, workspace,
4005            sizeof(workspace)/sizeof(int));            (sizeof(workspace)/sizeof(int)));
4006          if (count == 0)          if (count == 0)
4007            {            {
4008            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 2162  while (!done) Line 4013  while (!done)
4013    
4014        else        else
4015          {          {
4016          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4017            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
4018          if (count == 0)          if (count == 0)
4019            {            {
4020            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
4021            count = use_size_offsets/3;            count = use_size_offsets/3;
4022            }            }
4023          }          }
4024    
4025        /* Matched */        /* Matched */
4026    
4027        if (count >= 0)        if (count >= 0)
4028          {          {
4029          int i, maxcount;          int i, maxcount;
4030            void *cnptr, *gnptr;
4031    
4032  #if !defined NODFA  #if !defined NODFA
4033          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
# Line 2197  while (!done) Line 4049  while (!done)
4049              }              }
4050            }            }
4051    
4052            /* do_allcaps requests showing of all captures in the pattern, to check
4053            unset ones at the end. */
4054    
4055            if (do_allcaps)
4056              {
4057              if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4058                goto SKIP_DATA;
4059              count++;   /* Allow for full match */
4060              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4061              }
4062    
4063            /* Output the captured substrings */
4064    
4065          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
4066            {            {
4067            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
4068                {
4069                if (use_offsets[i] != -1)
4070                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4071                    use_offsets[i], i);
4072                if (use_offsets[i+1] != -1)
4073                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4074                    use_offsets[i+1], i+1);
4075              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
4076                }
4077            else            else
4078              {              {
4079              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
4080              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr, use_offsets[i],
4081                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
4082                if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4083              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4084              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
4085                {                {
4086                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
4087                  {                PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4088                  fprintf(outfile, " 0+ ");                  outfile);
4089                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
4090                }                }
4091              }              }
4092            }            }
4093    
4094            if (markptr != NULL)
4095              {
4096