/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 336 by ph10, Sat Apr 12 15:59:03 2008 UTC revision 1033 by ph10, Mon Sep 10 11:02:48 2012 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
50  #include "config.h"  #include "config.h"
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 58  POSSIBILITY OF SUCH DAMAGE.
58  #include <locale.h>  #include <locale.h>
59  #include <errno.h>  #include <errno.h>
60    
61  #ifdef SUPPORT_LIBREADLINE  /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67    #ifdef HAVE_UNISTD_H
68  #include <unistd.h>  #include <unistd.h>
69    #endif
70    #if defined(SUPPORT_LIBREADLINE)
71  #include <readline/readline.h>  #include <readline/readline.h>
72  #include <readline/history.h>  #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80  #endif  #endif
   
81    
82  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
83  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 69  input mode under Windows. */ Line 93  input mode under Windows. */
93  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
94  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
95    
96    #ifndef isatty
97    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
98    #endif                         /* though in some environments they seem to   */
99                                   /* be already defined, hence the #ifndefs.    */
100    #ifndef fileno
101    #define fileno _fileno
102    #endif
103    
104    /* A user sent this fix for Borland Builder 5 under Windows. */
105    
106    #ifdef __BORLANDC__
107    #define _setmode(handle, mode) setmode(handle, mode)
108    #endif
109    
110    /* Not Windows */
111    
112  #else  #else
113  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
114  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
115    #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
116    #define INPUT_MODE   "r"
117    #define OUTPUT_MODE  "w"
118    #else
119  #define INPUT_MODE   "rb"  #define INPUT_MODE   "rb"
120  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
121  #endif  #endif
122    #endif
123    
124    #define PRIV(name) name
125    
126  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
127  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 87  here before pcre_internal.h so that the Line 133  here before pcre_internal.h so that the
133  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
134    
135  #include "pcre.h"  #include "pcre.h"
136    
137    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
138    /* Configure internal macros to 16 bit mode. */
139    #define COMPILE_PCRE16
140    #endif
141    
142  #include "pcre_internal.h"  #include "pcre_internal.h"
143    
144  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* The pcre_printint() function, which prints the internal form of a compiled
145  two copies, we include the source file here, changing the names of the external  regex, is held in a separate file so that (a) it can be compiled in either
146  symbols to prevent clashes. */  8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
147    when that is compiled in debug mode. */
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
148    
149  #include "pcre_tables.c"  #ifdef SUPPORT_PCRE8
150    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151    #endif
152    #ifdef SUPPORT_PCRE16
153    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154    #endif
155    
156    /* We need access to some of the data tables that PCRE uses. So as not to have
157    to keep two copies, we include the source file here, changing the names of the
158    external symbols to prevent clashes. */
159    
160  /* We also need the pcre_printint() function for printing out compiled  #define PCRE_INCLUDED
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled.  
161    
162  The definition of the macro PRINTABLE, which determines whether to print an  #include "pcre_tables.c"
163    
164    /* The definition of the macro PRINTABLE, which determines whether to print an
165  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
166  contained in this file. We uses it here also, in cases when the locale has not  the same as in the printint.src file. We uses it here in cases when the locale
167  been explicitly changed, so as to get consistent output from systems that  has not been explicitly changed, so as to get consistent output from systems
168  differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
169    
170  #include "pcre_printint.src"  #ifdef EBCDIC
171    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
172    #else
173    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
174    #endif
175    
176  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
177    
178    /* Posix support is disabled in 16 bit only mode. */
179    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
180    #define NOPOSIX
181    #endif
182    
183  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
184  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 128  Makefile. */ Line 188  Makefile. */
188  #include "pcreposix.h"  #include "pcreposix.h"
189  #endif  #endif
190    
191  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
192  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
193  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
194  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
195  UTF8 support if PCRE is built without it. */  
196    #ifndef SUPPORT_UTF
197  #ifndef SUPPORT_UTF8  #ifndef NOUTF
198  #ifndef NOUTF8  #define NOUTF
 #define NOUTF8  
199  #endif  #endif
200  #endif  #endif
201    
202    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
203    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
204    only from one place and is handled differently). I couldn't dream up any way of
205    using a single macro to do this in a generic way, because of the many different
206    argument requirements. We know that at least one of SUPPORT_PCRE8 and
207    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
208    use these in the definitions of generic macros.
209    
210    **** Special note about the PCHARSxxx macros: the address of the string to be
211    printed is always given as two arguments: a base address followed by an offset.
212    The base address is cast to the correct data size for 8 or 16 bit data; the
213    offset is in units of this size. If the string were given as base+offset in one
214    argument, the casting might be incorrectly applied. */
215    
216    #ifdef SUPPORT_PCRE8
217    
218    #define PCHARS8(lv, p, offset, len, f) \
219      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
220    
221    #define PCHARSV8(p, offset, len, f) \
222      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
223    
224    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
225      p = read_capture_name8(p, cn8, re)
226    
227    #define STRLEN8(p) ((int)strlen((char *)p))
228    
229    #define SET_PCRE_CALLOUT8(callout) \
230      pcre_callout = callout
231    
232    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
233       pcre_assign_jit_stack(extra, callback, userdata)
234    
235    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
236      re = pcre_compile((char *)pat, options, error, erroffset, tables)
237    
238    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
239        namesptr, cbuffer, size) \
240      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
241        (char *)namesptr, cbuffer, size)
242    
243    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
244      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
245    
246    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
247        offsets, size_offsets, workspace, size_workspace) \
248      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
249        offsets, size_offsets, workspace, size_workspace)
250    
251    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
252        offsets, size_offsets) \
253      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
254        offsets, size_offsets)
255    
256    #define PCRE_FREE_STUDY8(extra) \
257      pcre_free_study(extra)
258    
259    #define PCRE_FREE_SUBSTRING8(substring) \
260      pcre_free_substring(substring)
261    
262    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
263      pcre_free_substring_list(listptr)
264    
265    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
266        getnamesptr, subsptr) \
267      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
268        (char *)getnamesptr, subsptr)
269    
270    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
271      n = pcre_get_stringnumber(re, (char *)ptr)
272    
273    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
274      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
275    
276    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
277      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
278    
279    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
280      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
281    
282    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
283      pcre_printint(re, outfile, debug_lengths)
284    
285    #define PCRE_STUDY8(extra, re, options, error) \
286      extra = pcre_study(re, options, error)
287    
288    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
289      pcre_jit_stack_alloc(startsize, maxsize)
290    
291    #define PCRE_JIT_STACK_FREE8(stack) \
292      pcre_jit_stack_free(stack)
293    
294    #endif /* SUPPORT_PCRE8 */
295    
296    /* -----------------------------------------------------------*/
297    
298    #ifdef SUPPORT_PCRE16
299    
300    #define PCHARS16(lv, p, offset, len, f) \
301      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
302    
303    #define PCHARSV16(p, offset, len, f) \
304      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
305    
306    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
307      p = read_capture_name16(p, cn16, re)
308    
309    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
310    
311    #define SET_PCRE_CALLOUT16(callout) \
312      pcre16_callout = (int (*)(pcre16_callout_block *))callout
313    
314    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
315      pcre16_assign_jit_stack((pcre16_extra *)extra, \
316        (pcre16_jit_callback)callback, userdata)
317    
318    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
319      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
320        tables)
321    
322    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
323        namesptr, cbuffer, size) \
324      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
325        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
326    
327    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
328      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
329        (PCRE_UCHAR16 *)cbuffer, size/2)
330    
331    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
332        offsets, size_offsets, workspace, size_workspace) \
333      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
334        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
335        workspace, size_workspace)
336    
337    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338        offsets, size_offsets) \
339      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
340        len, start_offset, options, offsets, size_offsets)
341    
342    #define PCRE_FREE_STUDY16(extra) \
343      pcre16_free_study((pcre16_extra *)extra)
344    
345    #define PCRE_FREE_SUBSTRING16(substring) \
346      pcre16_free_substring((PCRE_SPTR16)substring)
347    
348    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
349      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
350    
351    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
352        getnamesptr, subsptr) \
353      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
354        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
355    
356    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
357      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
358    
359    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
360      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
361        (PCRE_SPTR16 *)(void*)subsptr)
362    
363    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
364      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
365        (PCRE_SPTR16 **)(void*)listptr)
366    
367    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
368      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
369        tables)
370    
371    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
372      pcre16_printint(re, outfile, debug_lengths)
373    
374    #define PCRE_STUDY16(extra, re, options, error) \
375      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
376    
377    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
378      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
379    
380    #define PCRE_JIT_STACK_FREE16(stack) \
381      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
382    
383    #endif /* SUPPORT_PCRE16 */
384    
385    
386    /* ----- Both modes are supported; a runtime test is needed, except for
387    pcre_config(), and the JIT stack functions, when it doesn't matter which
388    version is called. ----- */
389    
390    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
391    
392    #define CHAR_SIZE (use_pcre16? 2:1)
393    
394    #define PCHARS(lv, p, offset, len, f) \
395      if (use_pcre16) \
396        PCHARS16(lv, p, offset, len, f); \
397      else \
398        PCHARS8(lv, p, offset, len, f)
399    
400    #define PCHARSV(p, offset, len, f) \
401      if (use_pcre16) \
402        PCHARSV16(p, offset, len, f); \
403      else \
404        PCHARSV8(p, offset, len, f)
405    
406    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
407      if (use_pcre16) \
408        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
409      else \
410        READ_CAPTURE_NAME8(p, cn8, cn16, re)
411    
412    #define SET_PCRE_CALLOUT(callout) \
413      if (use_pcre16) \
414        SET_PCRE_CALLOUT16(callout); \
415      else \
416        SET_PCRE_CALLOUT8(callout)
417    
418    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
419    
420    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
421      if (use_pcre16) \
422        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
423      else \
424        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
425    
426    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
427      if (use_pcre16) \
428        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
429      else \
430        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
431    
432    #define PCRE_CONFIG pcre_config
433    
434    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
435        namesptr, cbuffer, size) \
436      if (use_pcre16) \
437        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
438          namesptr, cbuffer, size); \
439      else \
440        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
441          namesptr, cbuffer, size)
442    
443    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
444      if (use_pcre16) \
445        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
446      else \
447        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
448    
449    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
450        offsets, size_offsets, workspace, size_workspace) \
451      if (use_pcre16) \
452        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
453          offsets, size_offsets, workspace, size_workspace); \
454      else \
455        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
456          offsets, size_offsets, workspace, size_workspace)
457    
458    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
459        offsets, size_offsets) \
460      if (use_pcre16) \
461        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
462          offsets, size_offsets); \
463      else \
464        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
465          offsets, size_offsets)
466    
467    #define PCRE_FREE_STUDY(extra) \
468      if (use_pcre16) \
469        PCRE_FREE_STUDY16(extra); \
470      else \
471        PCRE_FREE_STUDY8(extra)
472    
473    #define PCRE_FREE_SUBSTRING(substring) \
474      if (use_pcre16) \
475        PCRE_FREE_SUBSTRING16(substring); \
476      else \
477        PCRE_FREE_SUBSTRING8(substring)
478    
479    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
480      if (use_pcre16) \
481        PCRE_FREE_SUBSTRING_LIST16(listptr); \
482      else \
483        PCRE_FREE_SUBSTRING_LIST8(listptr)
484    
485    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
486        getnamesptr, subsptr) \
487      if (use_pcre16) \
488        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
489          getnamesptr, subsptr); \
490      else \
491        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
492          getnamesptr, subsptr)
493    
494    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
495      if (use_pcre16) \
496        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
497      else \
498        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
499    
500    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
501      if (use_pcre16) \
502        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
503      else \
504        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
505    
506    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
507      if (use_pcre16) \
508        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
509      else \
510        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
511    
512    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
513      (use_pcre16 ? \
514         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
515        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
516    
517    #define PCRE_JIT_STACK_FREE(stack) \
518      if (use_pcre16) \
519        PCRE_JIT_STACK_FREE16(stack); \
520      else \
521        PCRE_JIT_STACK_FREE8(stack)
522    
523    #define PCRE_MAKETABLES \
524      (use_pcre16? pcre16_maketables() : pcre_maketables())
525    
526    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
527      if (use_pcre16) \
528        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
529      else \
530        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
531    
532    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
533      if (use_pcre16) \
534        PCRE_PRINTINT16(re, outfile, debug_lengths); \
535      else \
536        PCRE_PRINTINT8(re, outfile, debug_lengths)
537    
538    #define PCRE_STUDY(extra, re, options, error) \
539      if (use_pcre16) \
540        PCRE_STUDY16(extra, re, options, error); \
541      else \
542        PCRE_STUDY8(extra, re, options, error)
543    
544    /* ----- Only 8-bit mode is supported ----- */
545    
546    #elif defined SUPPORT_PCRE8
547    #define CHAR_SIZE                 1
548    #define PCHARS                    PCHARS8
549    #define PCHARSV                   PCHARSV8
550    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
551    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
552    #define STRLEN                    STRLEN8
553    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
554    #define PCRE_COMPILE              PCRE_COMPILE8
555    #define PCRE_CONFIG               pcre_config
556    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
557    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
558    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
559    #define PCRE_EXEC                 PCRE_EXEC8
560    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
561    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
562    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
563    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
564    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
565    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
566    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
567    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
568    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
569    #define PCRE_MAKETABLES           pcre_maketables()
570    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
571    #define PCRE_PRINTINT             PCRE_PRINTINT8
572    #define PCRE_STUDY                PCRE_STUDY8
573    
574    /* ----- Only 16-bit mode is supported ----- */
575    
576    #else
577    #define CHAR_SIZE                 2
578    #define PCHARS                    PCHARS16
579    #define PCHARSV                   PCHARSV16
580    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
581    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
582    #define STRLEN                    STRLEN16
583    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
584    #define PCRE_COMPILE              PCRE_COMPILE16
585    #define PCRE_CONFIG               pcre16_config
586    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
587    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
588    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
589    #define PCRE_EXEC                 PCRE_EXEC16
590    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
591    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
592    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
593    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
594    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
595    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
596    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
597    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
598    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
599    #define PCRE_MAKETABLES           pcre16_maketables()
600    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
601    #define PCRE_PRINTINT             PCRE_PRINTINT16
602    #define PCRE_STUDY                PCRE_STUDY16
603    #endif
604    
605    /* ----- End of mode-specific function call macros ----- */
606    
607    
608  /* Other parameters */  /* Other parameters */
609    
# Line 151  UTF8 support if PCRE is built without it Line 615  UTF8 support if PCRE is built without it
615  #endif  #endif
616  #endif  #endif
617    
618    #if !defined NODFA
619    #define DFA_WS_DIMENSION 1000
620    #endif
621    
622  /* This is the default loop count for timing. */  /* This is the default loop count for timing. */
623    
624  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
# Line 165  static int callout_fail_count; Line 633  static int callout_fail_count;
633  static int callout_fail_id;  static int callout_fail_id;
634  static int debug_lengths;  static int debug_lengths;
635  static int first_callout;  static int first_callout;
636    static int jit_was_used;
637  static int locale_set = 0;  static int locale_set = 0;
638  static int show_malloc;  static int show_malloc;
639  static int use_utf8;  static int use_utf;
640  static size_t gotten_store;  static size_t gotten_store;
641    static size_t first_gotten_store = 0;
642    static const unsigned char *last_callout_mark = NULL;
643    
644  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
645    
646  static int buffer_size = 50000;  static int buffer_size = 50000;
647  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
648  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
649  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
650    
651    /* Another buffer is needed translation to 16-bit character strings. It will
652    obtained and extended as required. */
653    
654    #ifdef SUPPORT_PCRE16
655    static int buffer16_size = 0;
656    static pcre_uint16 *buffer16 = NULL;
657    
658    #ifdef SUPPORT_PCRE8
659    
660    /* We need the table of operator lengths that is used for 16-bit compiling, in
661    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
662    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
663    appropriately for the 16-bit world. Just as a safety check, make sure that
664    COMPILE_PCRE16 is *not* set. */
665    
666    #ifdef COMPILE_PCRE16
667    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
668    #endif
669    
670    #if LINK_SIZE == 2
671    #undef LINK_SIZE
672    #define LINK_SIZE 1
673    #elif LINK_SIZE == 3 || LINK_SIZE == 4
674    #undef LINK_SIZE
675    #define LINK_SIZE 2
676    #else
677    #error LINK_SIZE must be either 2, 3, or 4
678    #endif
679    
680    #undef IMM2_SIZE
681    #define IMM2_SIZE 1
682    
683    #endif /* SUPPORT_PCRE8 */
684    
685    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
686    #endif  /* SUPPORT_PCRE16 */
687    
688    /* If we have 8-bit support, default use_pcre16 to false; if there is also
689    16-bit support, it can be changed by an option. If there is no 8-bit support,
690    there must be 16-bit support, so default it to 1. */
691    
692    #ifdef SUPPORT_PCRE8
693    static int use_pcre16 = 0;
694    #else
695    static int use_pcre16 = 1;
696    #endif
697    
698    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
699    
700    static int jit_study_bits[] =
701      {
702      PCRE_STUDY_JIT_COMPILE,
703      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
704      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
705      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
706      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
707      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
708      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
709        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
710    };
711    
712    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
713      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
714    
715    /* Textual explanations for runtime error codes */
716    
717    static const char *errtexts[] = {
718      NULL,  /* 0 is no error */
719      NULL,  /* NOMATCH is handled specially */
720      "NULL argument passed",
721      "bad option value",
722      "magic number missing",
723      "unknown opcode - pattern overwritten?",
724      "no more memory",
725      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
726      "match limit exceeded",
727      "callout error code",
728      NULL,  /* BADUTF8/16 is handled specially */
729      NULL,  /* BADUTF8/16 offset is handled specially */
730      NULL,  /* PARTIAL is handled specially */
731      "not used - internal error",
732      "internal error - pattern overwritten?",
733      "bad count value",
734      "item unsupported for DFA matching",
735      "backreference condition or recursion test not supported for DFA matching",
736      "match limit not supported for DFA matching",
737      "workspace size exceeded in DFA matching",
738      "too much recursion for DFA matching",
739      "recursion limit exceeded",
740      "not used - internal error",
741      "invalid combination of newline options",
742      "bad offset value",
743      NULL,  /* SHORTUTF8/16 is handled specially */
744      "nested recursion at the same subject position",
745      "JIT stack limit reached",
746      "pattern compiled in wrong mode: 8-bit/16-bit error",
747      "pattern compiled with other endianness",
748      "invalid data in workspace for DFA restart"
749    };
750    
751    
752    /*************************************************
753    *         Alternate character tables             *
754    *************************************************/
755    
756    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
757    using the default tables of the library. However, the T option can be used to
758    select alternate sets of tables, for different kinds of testing. Note also that
759    the L (locale) option also adjusts the tables. */
760    
761    /* This is the set of tables distributed as default with PCRE. It recognizes
762    only ASCII characters. */
763    
764    static const pcre_uint8 tables0[] = {
765    
766    /* This table is a lower casing table. */
767    
768        0,  1,  2,  3,  4,  5,  6,  7,
769        8,  9, 10, 11, 12, 13, 14, 15,
770       16, 17, 18, 19, 20, 21, 22, 23,
771       24, 25, 26, 27, 28, 29, 30, 31,
772       32, 33, 34, 35, 36, 37, 38, 39,
773       40, 41, 42, 43, 44, 45, 46, 47,
774       48, 49, 50, 51, 52, 53, 54, 55,
775       56, 57, 58, 59, 60, 61, 62, 63,
776       64, 97, 98, 99,100,101,102,103,
777      104,105,106,107,108,109,110,111,
778      112,113,114,115,116,117,118,119,
779      120,121,122, 91, 92, 93, 94, 95,
780       96, 97, 98, 99,100,101,102,103,
781      104,105,106,107,108,109,110,111,
782      112,113,114,115,116,117,118,119,
783      120,121,122,123,124,125,126,127,
784      128,129,130,131,132,133,134,135,
785      136,137,138,139,140,141,142,143,
786      144,145,146,147,148,149,150,151,
787      152,153,154,155,156,157,158,159,
788      160,161,162,163,164,165,166,167,
789      168,169,170,171,172,173,174,175,
790      176,177,178,179,180,181,182,183,
791      184,185,186,187,188,189,190,191,
792      192,193,194,195,196,197,198,199,
793      200,201,202,203,204,205,206,207,
794      208,209,210,211,212,213,214,215,
795      216,217,218,219,220,221,222,223,
796      224,225,226,227,228,229,230,231,
797      232,233,234,235,236,237,238,239,
798      240,241,242,243,244,245,246,247,
799      248,249,250,251,252,253,254,255,
800    
801    /* This table is a case flipping table. */
802    
803        0,  1,  2,  3,  4,  5,  6,  7,
804        8,  9, 10, 11, 12, 13, 14, 15,
805       16, 17, 18, 19, 20, 21, 22, 23,
806       24, 25, 26, 27, 28, 29, 30, 31,
807       32, 33, 34, 35, 36, 37, 38, 39,
808       40, 41, 42, 43, 44, 45, 46, 47,
809       48, 49, 50, 51, 52, 53, 54, 55,
810       56, 57, 58, 59, 60, 61, 62, 63,
811       64, 97, 98, 99,100,101,102,103,
812      104,105,106,107,108,109,110,111,
813      112,113,114,115,116,117,118,119,
814      120,121,122, 91, 92, 93, 94, 95,
815       96, 65, 66, 67, 68, 69, 70, 71,
816       72, 73, 74, 75, 76, 77, 78, 79,
817       80, 81, 82, 83, 84, 85, 86, 87,
818       88, 89, 90,123,124,125,126,127,
819      128,129,130,131,132,133,134,135,
820      136,137,138,139,140,141,142,143,
821      144,145,146,147,148,149,150,151,
822      152,153,154,155,156,157,158,159,
823      160,161,162,163,164,165,166,167,
824      168,169,170,171,172,173,174,175,
825      176,177,178,179,180,181,182,183,
826      184,185,186,187,188,189,190,191,
827      192,193,194,195,196,197,198,199,
828      200,201,202,203,204,205,206,207,
829      208,209,210,211,212,213,214,215,
830      216,217,218,219,220,221,222,223,
831      224,225,226,227,228,229,230,231,
832      232,233,234,235,236,237,238,239,
833      240,241,242,243,244,245,246,247,
834      248,249,250,251,252,253,254,255,
835    
836    /* This table contains bit maps for various character classes. Each map is 32
837    bytes long and the bits run from the least significant end of each byte. The
838    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
839    graph, print, punct, and cntrl. Other classes are built from combinations. */
840    
841      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
842      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
843      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845    
846      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
847      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850    
851      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
852      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
853      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
855    
856      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
858      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
859      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
860    
861      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
862      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
863      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
864      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
865    
866      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
867      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
868      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
869      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
870    
871      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
872      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
873      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
874      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
875    
876      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
877      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
878      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
879      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
880    
881      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
882      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
883      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
884      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
885    
886      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
887      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
888      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
889      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
890    
891    /* This table identifies various classes of character by individual bits:
892      0x01   white space character
893      0x02   letter
894      0x04   decimal digit
895      0x08   hexadecimal digit
896      0x10   alphanumeric or '_'
897      0x80   regular expression metacharacter or binary zero
898    */
899    
900      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
901      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
902      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
903      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
904      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
905      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
906      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
907      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
908      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
909      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
910      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
911      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
912      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
913      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
914      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
915      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
916      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
917      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
918      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
919      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
920      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
921      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
922      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
923      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
924      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
925      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
926      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
927      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
928      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
929      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
930      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
931      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
932    
933    /* This is a set of tables that came orginally from a Windows user. It seems to
934    be at least an approximation of ISO 8859. In particular, there are characters
935    greater than 128 that are marked as spaces, letters, etc. */
936    
937    static const pcre_uint8 tables1[] = {
938    0,1,2,3,4,5,6,7,
939    8,9,10,11,12,13,14,15,
940    16,17,18,19,20,21,22,23,
941    24,25,26,27,28,29,30,31,
942    32,33,34,35,36,37,38,39,
943    40,41,42,43,44,45,46,47,
944    48,49,50,51,52,53,54,55,
945    56,57,58,59,60,61,62,63,
946    64,97,98,99,100,101,102,103,
947    104,105,106,107,108,109,110,111,
948    112,113,114,115,116,117,118,119,
949    120,121,122,91,92,93,94,95,
950    96,97,98,99,100,101,102,103,
951    104,105,106,107,108,109,110,111,
952    112,113,114,115,116,117,118,119,
953    120,121,122,123,124,125,126,127,
954    128,129,130,131,132,133,134,135,
955    136,137,138,139,140,141,142,143,
956    144,145,146,147,148,149,150,151,
957    152,153,154,155,156,157,158,159,
958    160,161,162,163,164,165,166,167,
959    168,169,170,171,172,173,174,175,
960    176,177,178,179,180,181,182,183,
961    184,185,186,187,188,189,190,191,
962    224,225,226,227,228,229,230,231,
963    232,233,234,235,236,237,238,239,
964    240,241,242,243,244,245,246,215,
965    248,249,250,251,252,253,254,223,
966    224,225,226,227,228,229,230,231,
967    232,233,234,235,236,237,238,239,
968    240,241,242,243,244,245,246,247,
969    248,249,250,251,252,253,254,255,
970    0,1,2,3,4,5,6,7,
971    8,9,10,11,12,13,14,15,
972    16,17,18,19,20,21,22,23,
973    24,25,26,27,28,29,30,31,
974    32,33,34,35,36,37,38,39,
975    40,41,42,43,44,45,46,47,
976    48,49,50,51,52,53,54,55,
977    56,57,58,59,60,61,62,63,
978    64,97,98,99,100,101,102,103,
979    104,105,106,107,108,109,110,111,
980    112,113,114,115,116,117,118,119,
981    120,121,122,91,92,93,94,95,
982    96,65,66,67,68,69,70,71,
983    72,73,74,75,76,77,78,79,
984    80,81,82,83,84,85,86,87,
985    88,89,90,123,124,125,126,127,
986    128,129,130,131,132,133,134,135,
987    136,137,138,139,140,141,142,143,
988    144,145,146,147,148,149,150,151,
989    152,153,154,155,156,157,158,159,
990    160,161,162,163,164,165,166,167,
991    168,169,170,171,172,173,174,175,
992    176,177,178,179,180,181,182,183,
993    184,185,186,187,188,189,190,191,
994    224,225,226,227,228,229,230,231,
995    232,233,234,235,236,237,238,239,
996    240,241,242,243,244,245,246,215,
997    248,249,250,251,252,253,254,223,
998    192,193,194,195,196,197,198,199,
999    200,201,202,203,204,205,206,207,
1000    208,209,210,211,212,213,214,247,
1001    216,217,218,219,220,221,222,255,
1002    0,62,0,0,1,0,0,0,
1003    0,0,0,0,0,0,0,0,
1004    32,0,0,0,1,0,0,0,
1005    0,0,0,0,0,0,0,0,
1006    0,0,0,0,0,0,255,3,
1007    126,0,0,0,126,0,0,0,
1008    0,0,0,0,0,0,0,0,
1009    0,0,0,0,0,0,0,0,
1010    0,0,0,0,0,0,255,3,
1011    0,0,0,0,0,0,0,0,
1012    0,0,0,0,0,0,12,2,
1013    0,0,0,0,0,0,0,0,
1014    0,0,0,0,0,0,0,0,
1015    254,255,255,7,0,0,0,0,
1016    0,0,0,0,0,0,0,0,
1017    255,255,127,127,0,0,0,0,
1018    0,0,0,0,0,0,0,0,
1019    0,0,0,0,254,255,255,7,
1020    0,0,0,0,0,4,32,4,
1021    0,0,0,128,255,255,127,255,
1022    0,0,0,0,0,0,255,3,
1023    254,255,255,135,254,255,255,7,
1024    0,0,0,0,0,4,44,6,
1025    255,255,127,255,255,255,127,255,
1026    0,0,0,0,254,255,255,255,
1027    255,255,255,255,255,255,255,127,
1028    0,0,0,0,254,255,255,255,
1029    255,255,255,255,255,255,255,255,
1030    0,2,0,0,255,255,255,255,
1031    255,255,255,255,255,255,255,127,
1032    0,0,0,0,255,255,255,255,
1033    255,255,255,255,255,255,255,255,
1034    0,0,0,0,254,255,0,252,
1035    1,0,0,248,1,0,0,120,
1036    0,0,0,0,254,255,255,255,
1037    0,0,128,0,0,0,128,0,
1038    255,255,255,255,0,0,0,0,
1039    0,0,0,0,0,0,0,128,
1040    255,255,255,255,0,0,0,0,
1041    0,0,0,0,0,0,0,0,
1042    128,0,0,0,0,0,0,0,
1043    0,1,1,0,1,1,0,0,
1044    0,0,0,0,0,0,0,0,
1045    0,0,0,0,0,0,0,0,
1046    1,0,0,0,128,0,0,0,
1047    128,128,128,128,0,0,128,0,
1048    28,28,28,28,28,28,28,28,
1049    28,28,0,0,0,0,0,128,
1050    0,26,26,26,26,26,26,18,
1051    18,18,18,18,18,18,18,18,
1052    18,18,18,18,18,18,18,18,
1053    18,18,18,128,128,0,128,16,
1054    0,26,26,26,26,26,26,18,
1055    18,18,18,18,18,18,18,18,
1056    18,18,18,18,18,18,18,18,
1057    18,18,18,128,128,0,0,0,
1058    0,0,0,0,0,1,0,0,
1059    0,0,0,0,0,0,0,0,
1060    0,0,0,0,0,0,0,0,
1061    0,0,0,0,0,0,0,0,
1062    1,0,0,0,0,0,0,0,
1063    0,0,18,0,0,0,0,0,
1064    0,0,20,20,0,18,0,0,
1065    0,20,18,0,0,0,0,0,
1066    18,18,18,18,18,18,18,18,
1067    18,18,18,18,18,18,18,18,
1068    18,18,18,18,18,18,18,0,
1069    18,18,18,18,18,18,18,18,
1070    18,18,18,18,18,18,18,18,
1071    18,18,18,18,18,18,18,18,
1072    18,18,18,18,18,18,18,0,
1073    18,18,18,18,18,18,18,18
1074    };
1075    
1076    
1077    
1078    
1079    #ifndef HAVE_STRERROR
1080    /*************************************************
1081    *     Provide strerror() for non-ANSI libraries  *
1082    *************************************************/
1083    
1084    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1085    in their libraries, but can provide the same facility by this simple
1086    alternative function. */
1087    
1088    extern int   sys_nerr;
1089    extern char *sys_errlist[];
1090    
1091    char *
1092    strerror(int n)
1093    {
1094    if (n < 0 || n >= sys_nerr) return "unknown error number";
1095    return sys_errlist[n];
1096    }
1097    #endif /* HAVE_STRERROR */
1098    
1099    
1100    
1101    /*************************************************
1102    *       Print newline configuration              *
1103    *************************************************/
1104    
1105    /*
1106    Arguments:
1107      rc         the return code from PCRE_CONFIG_NEWLINE
1108      isc        TRUE if called from "-C newline"
1109    Returns:     nothing
1110    */
1111    
1112    static void
1113    print_newline_config(int rc, BOOL isc)
1114    {
1115    const char *s = NULL;
1116    if (!isc) printf("  Newline sequence is ");
1117    switch(rc)
1118      {
1119      case CHAR_CR: s = "CR"; break;
1120      case CHAR_LF: s = "LF"; break;
1121      case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1122      case -1: s = "ANY"; break;
1123      case -2: s = "ANYCRLF"; break;
1124    
1125      default:
1126      printf("a non-standard value: 0x%04x\n", rc);
1127      return;
1128      }
1129    
1130    printf("%s\n", s);
1131    }
1132    
1133    
1134    
1135    /*************************************************
1136    *         JIT memory callback                    *
1137    *************************************************/
1138    
1139    static pcre_jit_stack* jit_callback(void *arg)
1140    {
1141    jit_was_used = TRUE;
1142    return (pcre_jit_stack *)arg;
1143    }
1144    
1145    
1146    #if !defined NOUTF || defined SUPPORT_PCRE16
1147    /*************************************************
1148    *            Convert UTF-8 string to value       *
1149    *************************************************/
1150    
1151    /* This function takes one or more bytes that represents a UTF-8 character,
1152    and returns the value of the character.
1153    
1154    Argument:
1155      utf8bytes   a pointer to the byte vector
1156      vptr        a pointer to an int to receive the value
1157    
1158    Returns:      >  0 => the number of bytes consumed
1159                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1160    */
1161    
1162    static int
1163    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1164    {
1165    int c = *utf8bytes++;
1166    int d = c;
1167    int i, j, s;
1168    
1169    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1170      {
1171      if ((d & 0x80) == 0) break;
1172      d <<= 1;
1173      }
1174    
1175    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1176    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1177    
1178    /* i now has a value in the range 1-5 */
1179    
1180    s = 6*i;
1181    d = (c & utf8_table3[i]) << s;
1182    
1183    for (j = 0; j < i; j++)
1184      {
1185      c = *utf8bytes++;
1186      if ((c & 0xc0) != 0x80) return -(j+1);
1187      s -= 6;
1188      d |= (c & 0x3f) << s;
1189      }
1190    
1191    /* Check that encoding was the correct unique one */
1192    
1193    for (j = 0; j < utf8_table1_size; j++)
1194      if (d <= utf8_table1[j]) break;
1195    if (j != i) return -(i+1);
1196    
1197    /* Valid value */
1198    
1199    *vptr = d;
1200    return i+1;
1201    }
1202    #endif /* NOUTF || SUPPORT_PCRE16 */
1203    
1204    
1205    
1206    #if !defined NOUTF || defined SUPPORT_PCRE16
1207    /*************************************************
1208    *       Convert character value to UTF-8         *
1209    *************************************************/
1210    
1211    /* This function takes an integer value in the range 0 - 0x7fffffff
1212    and encodes it as a UTF-8 character in 0 to 6 bytes.
1213    
1214    Arguments:
1215      cvalue     the character value
1216      utf8bytes  pointer to buffer for result - at least 6 bytes long
1217    
1218    Returns:     number of characters placed in the buffer
1219    */
1220    
1221    static int
1222    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1223    {
1224    register int i, j;
1225    for (i = 0; i < utf8_table1_size; i++)
1226      if (cvalue <= utf8_table1[i]) break;
1227    utf8bytes += i;
1228    for (j = i; j > 0; j--)
1229     {
1230     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1231     cvalue >>= 6;
1232     }
1233    *utf8bytes = utf8_table2[i] | cvalue;
1234    return i + 1;
1235    }
1236    #endif
1237    
1238    
1239    #ifdef SUPPORT_PCRE16
1240    /*************************************************
1241    *         Convert a string to 16-bit             *
1242    *************************************************/
1243    
1244    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1245    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1246    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1247    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1248    result is always left in buffer16.
1249    
1250    Note that this function does not object to surrogate values. This is
1251    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1252    for the purpose of testing that they are correctly faulted.
1253    
1254    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1255    in UTF-8 so that values greater than 255 can be handled.
1256    
1257    Arguments:
1258      data       TRUE if converting a data line; FALSE for a regex
1259      p          points to a byte string
1260      utf        true if UTF-8 (to be converted to UTF-16)
1261      len        number of bytes in the string (excluding trailing zero)
1262    
1263    Returns:     number of 16-bit data items used (excluding trailing zero)
1264                 OR -1 if a UTF-8 string is malformed
1265                 OR -2 if a value > 0x10ffff is encountered
1266                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1267    */
1268    
1269    static int
1270    to16(int data, pcre_uint8 *p, int utf, int len)
1271    {
1272    pcre_uint16 *pp;
1273    
1274    if (buffer16_size < 2*len + 2)
1275      {
1276      if (buffer16 != NULL) free(buffer16);
1277      buffer16_size = 2*len + 2;
1278      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1279      if (buffer16 == NULL)
1280        {
1281        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1282        exit(1);
1283        }
1284      }
1285    
1286    pp = buffer16;
1287    
1288    if (!utf && !data)
1289      {
1290      while (len-- > 0) *pp++ = *p++;
1291      }
1292    
1293    else
1294      {
1295      int c = 0;
1296      while (len > 0)
1297        {
1298        int chlen = utf82ord(p, &c);
1299        if (chlen <= 0) return -1;
1300        if (c > 0x10ffff) return -2;
1301        p += chlen;
1302        len -= chlen;
1303        if (c < 0x10000) *pp++ = c; else
1304          {
1305          if (!utf) return -3;
1306          c -= 0x10000;
1307          *pp++ = 0xD800 | (c >> 10);
1308          *pp++ = 0xDC00 | (c & 0x3ff);
1309          }
1310        }
1311      }
1312    
1313    *pp = 0;
1314    return pp - buffer16;
1315    }
1316    #endif
1317    
1318    
1319  /*************************************************  /*************************************************
1320  *        Read or extend an input line            *  *        Read or extend an input line            *
1321  *************************************************/  *************************************************/
# Line 202  Returns:       pointer to the start of n Line 1339  Returns:       pointer to the start of n
1339                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1340  */  */
1341    
1342  static uschar *  static pcre_uint8 *
1343  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1344  {  {
1345  uschar *here = start;  pcre_uint8 *here = start;
1346    
1347  for (;;)  for (;;)
1348    {    {
1349    int rlen = buffer_size - (here - buffer);    size_t rlen = (size_t)(buffer_size - (here - buffer));
1350    
1351    if (rlen > 1000)    if (rlen > 1000)
1352      {      {
1353      int dlen;      int dlen;
1354    
1355      /* If libreadline support is required, use readline() to read a line if the      /* If libreadline or libedit support is required, use readline() to read a
1356      input is a terminal. Note that readline() removes the trailing newline, so      line if the input is a terminal. Note that readline() removes the trailing
1357      we must put it back again, to be compatible with fgets(). */      newline, so we must put it back again, to be compatible with fgets(). */
1358    
1359  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1360      if (isatty(fileno(f)))      if (isatty(fileno(f)))
1361        {        {
1362        size_t len;        size_t len;
# Line 239  for (;;) Line 1376  for (;;)
1376      /* Read the next line by normal means, prompting if the file is stdin. */      /* Read the next line by normal means, prompting if the file is stdin. */
1377    
1378        {        {
1379        if (f == stdin) printf(prompt);        if (f == stdin) printf("%s", prompt);
1380        if (fgets((char *)here, rlen,  f) == NULL)        if (fgets((char *)here, rlen,  f) == NULL)
1381          return (here == start)? NULL : start;          return (here == start)? NULL : start;
1382        }        }
# Line 252  for (;;) Line 1389  for (;;)
1389    else    else
1390      {      {
1391      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1392      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1393      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1394      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1395    
1396      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1397        {        {
# Line 285  return NULL;  /* Control never gets here Line 1422  return NULL;  /* Control never gets here
1422    
1423    
1424    
   
   
   
   
1425  /*************************************************  /*************************************************
1426  *          Read number from string               *  *          Read number from string               *
1427  *************************************************/  *************************************************/
# Line 305  Returns:        the unsigned long Line 1438  Returns:        the unsigned long
1438  */  */
1439    
1440  static int  static int
1441  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1442  {  {
1443  int result = 0;  int result = 0;
1444  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 316  return(result); Line 1449  return(result);
1449    
1450    
1451    
   
 /*************************************************  
 *            Convert UTF-8 string to value       *  
 *************************************************/  
   
 /* This function takes one or more bytes that represents a UTF-8 character,  
 and returns the value of the character.  
   
 Argument:  
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
   
 Returns:      >  0 => the number of bytes consumed  
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 utf82ord(unsigned char *utf8bytes, int *vptr)  
 {  
 int c = *utf8bytes++;  
 int d = c;  
 int i, j, s;  
   
 for (i = -1; i < 6; i++)               /* i is number of additional bytes */  
   {  
   if ((d & 0x80) == 0) break;  
   d <<= 1;  
   }  
   
 if (i == -1) { *vptr = c; return 1; }  /* ascii character */  
 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  
   
 /* i now has a value in the range 1-5 */  
   
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
   
 for (j = 0; j < i; j++)  
   {  
   c = *utf8bytes++;  
   if ((c & 0xc0) != 0x80) return -(j+1);  
   s -= 6;  
   d |= (c & 0x3f) << s;  
   }  
   
 /* Check that encoding was the correct unique one */  
   
 for (j = 0; j < utf8_table1_size; j++)  
   if (d <= utf8_table1[j]) break;  
 if (j != i) return -(i+1);  
   
 /* Valid value */  
   
 *vptr = d;  
 return i+1;  
 }  
   
 #endif  
   
   
   
1452  /*************************************************  /*************************************************
1453  *       Convert character value to UTF-8         *  *             Print one character                *
1454  *************************************************/  *************************************************/
1455    
1456  /* This function takes an integer value in the range 0 - 0x7fffffff  /* Print a single character either literally, or as a hex escape. */
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
1457    
1458  Arguments:  static int pchar(int c, FILE *f)
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 ord2utf8(int cvalue, uschar *utf8bytes)  
1459  {  {
1460  register int i, j;  if (PRINTOK(c))
1461  for (i = 0; i < utf8_table1_size; i++)    {
1462    if (cvalue <= utf8_table1[i]) break;    if (f != NULL) fprintf(f, "%c", c);
1463  utf8bytes += i;    return 1;
1464  for (j = i; j > 0; j--)    }
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
1465    
1466  #endif  if (c < 0x100)
1467      {
1468      if (use_utf)
1469        {
1470        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1471        return 6;
1472        }
1473      else
1474        {
1475        if (f != NULL) fprintf(f, "\\x%02x", c);
1476        return 4;
1477        }
1478      }
1479    
1480    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1481    return (c <= 0x000000ff)? 6 :
1482           (c <= 0x00000fff)? 7 :
1483           (c <= 0x0000ffff)? 8 :
1484           (c <= 0x000fffff)? 9 : 10;
1485    }
1486    
1487    
1488    
1489    #ifdef SUPPORT_PCRE8
1490  /*************************************************  /*************************************************
1491  *             Print character string             *  *         Print 8-bit character string           *
1492  *************************************************/  *************************************************/
1493    
1494  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1495  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1496    
1497  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1498  {  {
1499  int c = 0;  int c = 0;
1500  int yield = 0;  int yield = 0;
1501    
1502    if (length < 0)
1503      length = strlen((char *)p);
1504    
1505  while (length-- > 0)  while (length-- > 0)
1506    {    {
1507  #if !defined NOUTF8  #if !defined NOUTF
1508    if (use_utf8)    if (use_utf)
1509      {      {
1510      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
   
1511      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1512        {        {
1513        length -= rc - 1;        length -= rc - 1;
1514        p += rc;        p += rc;
1515        if (PRINTHEX(c))        yield += pchar(c, f);
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
1516        continue;        continue;
1517        }        }
1518      }      }
1519  #endif  #endif
1520      c = *p++;
1521      yield += pchar(c, f);
1522      }
1523    
1524    return yield;
1525    }
1526    #endif
1527    
    /* Not UTF-8, or malformed UTF-8  */  
1528    
1529    c = *p++;  
1530    if (PRINTHEX(c))  #ifdef SUPPORT_PCRE16
1531      {  /*************************************************
1532      if (f != NULL) fprintf(f, "%c", c);  *    Find length of 0-terminated 16-bit string   *
1533      yield++;  *************************************************/
1534      }  
1535    else  static int strlen16(PCRE_SPTR16 p)
1536    {
1537    int len = 0;
1538    while (*p++ != 0) len++;
1539    return len;
1540    }
1541    #endif  /* SUPPORT_PCRE16 */
1542    
1543    
1544    #ifdef SUPPORT_PCRE16
1545    /*************************************************
1546    *           Print 16-bit character string        *
1547    *************************************************/
1548    
1549    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1550    If handed a NULL file, just counts chars without printing. */
1551    
1552    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1553    {
1554    int yield = 0;
1555    
1556    if (length < 0)
1557      length = strlen16(p);
1558    
1559    while (length-- > 0)
1560      {
1561      int c = *p++ & 0xffff;
1562    #if !defined NOUTF
1563      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1564      {      {
1565      if (f != NULL) fprintf(f, "\\x%02x", c);      int d = *p & 0xffff;
1566      yield += 4;      if (d >= 0xDC00 && d < 0xDFFF)
1567          {
1568          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1569          length--;
1570          p++;
1571          }
1572      }      }
1573    #endif
1574      yield += pchar(c, f);
1575    }    }
1576    
1577  return yield;  return yield;
1578  }  }
1579    #endif  /* SUPPORT_PCRE16 */
1580    
1581    
1582    
1583    #ifdef SUPPORT_PCRE8
1584    /*************************************************
1585    *     Read a capture name (8-bit) and check it   *
1586    *************************************************/
1587    
1588    static pcre_uint8 *
1589    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1590    {
1591    pcre_uint8 *npp = *pp;
1592    while (isalnum(*p)) *npp++ = *p++;
1593    *npp++ = 0;
1594    *npp = 0;
1595    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1596      {
1597      fprintf(outfile, "no parentheses with name \"");
1598      PCHARSV(*pp, 0, -1, outfile);
1599      fprintf(outfile, "\"\n");
1600      }
1601    
1602    *pp = npp;
1603    return p;
1604    }
1605    #endif  /* SUPPORT_PCRE8 */
1606    
1607    
1608    
1609    #ifdef SUPPORT_PCRE16
1610    /*************************************************
1611    *     Read a capture name (16-bit) and check it  *
1612    *************************************************/
1613    
1614    /* Note that the text being read is 8-bit. */
1615    
1616    static pcre_uint8 *
1617    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1618    {
1619    pcre_uint16 *npp = *pp;
1620    while (isalnum(*p)) *npp++ = *p++;
1621    *npp++ = 0;
1622    *npp = 0;
1623    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1624      {
1625      fprintf(outfile, "no parentheses with name \"");
1626      PCHARSV(*pp, 0, -1, outfile);
1627      fprintf(outfile, "\"\n");
1628      }
1629    *pp = npp;
1630    return p;
1631    }
1632    #endif  /* SUPPORT_PCRE16 */
1633    
1634    
1635    
# Line 503  if (callout_extra) Line 1658  if (callout_extra)
1658      else      else
1659        {        {
1660        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1661        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1662          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1663        fprintf(f, "\n");        fprintf(f, "\n");
1664        }        }
# Line 516  printed lengths of the substrings. */ Line 1671  printed lengths of the substrings. */
1671    
1672  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1673    
1674  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1675  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1676    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1677    
1678  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1679    
1680  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1681    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1682    
1683  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 559  fprintf(outfile, "%.*s", (cb->next_item_ Line 1714  fprintf(outfile, "%.*s", (cb->next_item_
1714  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1715  first_callout = 0;  first_callout = 0;
1716    
1717    if (cb->mark != last_callout_mark)
1718      {
1719      if (cb->mark == NULL)
1720        fprintf(outfile, "Latest Mark: <unset>\n");
1721      else
1722        {
1723        fprintf(outfile, "Latest Mark: ");
1724        PCHARSV(cb->mark, 0, -1, outfile);
1725        putc('\n', outfile);
1726        }
1727      last_callout_mark = cb->mark;
1728      }
1729    
1730  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1731    {    {
1732    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 578  return (cb->callout_number != callout_fa Line 1746  return (cb->callout_number != callout_fa
1746  *            Local malloc functions              *  *            Local malloc functions              *
1747  *************************************************/  *************************************************/
1748    
1749  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1750  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1751    show_malloc variable is set only during matching. */
1752    
1753  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1754  {  {
1755  void *block = malloc(size);  void *block = malloc(size);
1756  gotten_store = size;  gotten_store = size;
1757    if (first_gotten_store == 0) first_gotten_store = size;
1758  if (show_malloc)  if (show_malloc)
1759    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1760  return block;  return block;
# Line 597  if (show_malloc) Line 1767  if (show_malloc)
1767  free(block);  free(block);
1768  }  }
1769    
   
1770  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1771    
1772  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 620  free(block); Line 1789  free(block);
1789  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1790  *************************************************/  *************************************************/
1791    
1792  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1793    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1794    value, but the code is defensive.
1795    
1796    Arguments:
1797      re        compiled regex
1798      study     study data
1799      option    PCRE_INFO_xxx option
1800      ptr       where to put the data
1801    
1802    Returns:    0 when OK, < 0 on error
1803    */
1804    
1805  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static int
1806    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1807  {  {
1808  int rc;  int rc;
1809  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1810    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1811    #ifdef SUPPORT_PCRE16
1812      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1813    #else
1814      rc = PCRE_ERROR_BADMODE;
1815    #endif
1816    else
1817    #ifdef SUPPORT_PCRE8
1818      rc = pcre_fullinfo(re, study, option, ptr);
1819    #else
1820      rc = PCRE_ERROR_BADMODE;
1821    #endif
1822    
1823    if (rc < 0)
1824      {
1825      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1826        use_pcre16? "16" : "", option);
1827      if (rc == PCRE_ERROR_BADMODE)
1828        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1829          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1830      }
1831    
1832    return rc;
1833  }  }
1834    
1835    
1836    
1837  /*************************************************  /*************************************************
1838  *         Byte flipping function                 *  *             Swap byte functions                *
1839  *************************************************/  *************************************************/
1840    
1841  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1842  byteflip(unsigned long int value, int n)  value, respectively.
1843    
1844    Arguments:
1845      value        any number
1846    
1847    Returns:       the byte swapped value
1848    */
1849    
1850    static pcre_uint32
1851    swap_uint32(pcre_uint32 value)
1852  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1853  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1854         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1855         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1856         ((value & 0xff000000) >> 24);         (value >> 24);
1857    }
1858    
1859    static pcre_uint16
1860    swap_uint16(pcre_uint16 value)
1861    {
1862    return (value >> 8) | (value << 8);
1863  }  }
1864    
1865    
1866    
1867    /*************************************************
1868    *        Flip bytes in a compiled pattern        *
1869    *************************************************/
1870    
1871    /* This function is called if the 'F' option was present on a pattern that is
1872    to be written to a file. We flip the bytes of all the integer fields in the
1873    regex data block and the study block. In 16-bit mode this also flips relevant
1874    bytes in the pattern itself. This is to make it possible to test PCRE's
1875    ability to reload byte-flipped patterns, e.g. those compiled on a different
1876    architecture. */
1877    
1878    static void
1879    regexflip(pcre *ere, pcre_extra *extra)
1880    {
1881    REAL_PCRE *re = (REAL_PCRE *)ere;
1882    #ifdef SUPPORT_PCRE16
1883    int op;
1884    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1885    int length = re->name_count * re->name_entry_size;
1886    #ifdef SUPPORT_UTF
1887    BOOL utf = (re->options & PCRE_UTF16) != 0;
1888    BOOL utf16_char = FALSE;
1889    #endif /* SUPPORT_UTF */
1890    #endif /* SUPPORT_PCRE16 */
1891    
1892    /* Always flip the bytes in the main data block and study blocks. */
1893    
1894    re->magic_number = REVERSED_MAGIC_NUMBER;
1895    re->size = swap_uint32(re->size);
1896    re->options = swap_uint32(re->options);
1897    re->flags = swap_uint16(re->flags);
1898    re->top_bracket = swap_uint16(re->top_bracket);
1899    re->top_backref = swap_uint16(re->top_backref);
1900    re->first_char = swap_uint16(re->first_char);
1901    re->req_char = swap_uint16(re->req_char);
1902    re->name_table_offset = swap_uint16(re->name_table_offset);
1903    re->name_entry_size = swap_uint16(re->name_entry_size);
1904    re->name_count = swap_uint16(re->name_count);
1905    
1906    if (extra != NULL)
1907      {
1908      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1909      rsd->size = swap_uint32(rsd->size);
1910      rsd->flags = swap_uint32(rsd->flags);
1911      rsd->minlength = swap_uint32(rsd->minlength);
1912      }
1913    
1914    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1915    in the name table, if present, and then in the pattern itself. */
1916    
1917    #ifdef SUPPORT_PCRE16
1918    if (!use_pcre16) return;
1919    
1920    while(TRUE)
1921      {
1922      /* Swap previous characters. */
1923      while (length-- > 0)
1924        {
1925        *ptr = swap_uint16(*ptr);
1926        ptr++;
1927        }
1928    #ifdef SUPPORT_UTF
1929      if (utf16_char)
1930        {
1931        if ((ptr[-1] & 0xfc00) == 0xd800)
1932          {
1933          /* We know that there is only one extra character in UTF-16. */
1934          *ptr = swap_uint16(*ptr);
1935          ptr++;
1936          }
1937        }
1938      utf16_char = FALSE;
1939    #endif /* SUPPORT_UTF */
1940    
1941      /* Get next opcode. */
1942    
1943      length = 0;
1944      op = *ptr;
1945      *ptr++ = swap_uint16(op);
1946    
1947      switch (op)
1948        {
1949        case OP_END:
1950        return;
1951    
1952    #ifdef SUPPORT_UTF
1953        case OP_CHAR:
1954        case OP_CHARI:
1955        case OP_NOT:
1956        case OP_NOTI:
1957        case OP_STAR:
1958        case OP_MINSTAR:
1959        case OP_PLUS:
1960        case OP_MINPLUS:
1961        case OP_QUERY:
1962        case OP_MINQUERY:
1963        case OP_UPTO:
1964        case OP_MINUPTO:
1965        case OP_EXACT:
1966        case OP_POSSTAR:
1967        case OP_POSPLUS:
1968        case OP_POSQUERY:
1969        case OP_POSUPTO:
1970        case OP_STARI:
1971        case OP_MINSTARI:
1972        case OP_PLUSI:
1973        case OP_MINPLUSI:
1974        case OP_QUERYI:
1975        case OP_MINQUERYI:
1976        case OP_UPTOI:
1977        case OP_MINUPTOI:
1978        case OP_EXACTI:
1979        case OP_POSSTARI:
1980        case OP_POSPLUSI:
1981        case OP_POSQUERYI:
1982        case OP_POSUPTOI:
1983        case OP_NOTSTAR:
1984        case OP_NOTMINSTAR:
1985        case OP_NOTPLUS:
1986        case OP_NOTMINPLUS:
1987        case OP_NOTQUERY:
1988        case OP_NOTMINQUERY:
1989        case OP_NOTUPTO:
1990        case OP_NOTMINUPTO:
1991        case OP_NOTEXACT:
1992        case OP_NOTPOSSTAR:
1993        case OP_NOTPOSPLUS:
1994        case OP_NOTPOSQUERY:
1995        case OP_NOTPOSUPTO:
1996        case OP_NOTSTARI:
1997        case OP_NOTMINSTARI:
1998        case OP_NOTPLUSI:
1999        case OP_NOTMINPLUSI:
2000        case OP_NOTQUERYI:
2001        case OP_NOTMINQUERYI:
2002        case OP_NOTUPTOI:
2003        case OP_NOTMINUPTOI:
2004        case OP_NOTEXACTI:
2005        case OP_NOTPOSSTARI:
2006        case OP_NOTPOSPLUSI:
2007        case OP_NOTPOSQUERYI:
2008        case OP_NOTPOSUPTOI:
2009        if (utf) utf16_char = TRUE;
2010    #endif
2011        /* Fall through. */
2012    
2013        default:
2014        length = OP_lengths16[op] - 1;
2015        break;
2016    
2017        case OP_CLASS:
2018        case OP_NCLASS:
2019        /* Skip the character bit map. */
2020        ptr += 32/sizeof(pcre_uint16);
2021        length = 0;
2022        break;
2023    
2024        case OP_XCLASS:
2025        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2026        if (LINK_SIZE > 1)
2027          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2028            - (1 + LINK_SIZE + 1));
2029        else
2030          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2031    
2032        /* Reverse the size of the XCLASS instance. */
2033        *ptr = swap_uint16(*ptr);
2034        ptr++;
2035        if (LINK_SIZE > 1)
2036          {
2037          *ptr = swap_uint16(*ptr);
2038          ptr++;
2039          }
2040    
2041        op = *ptr;
2042        *ptr = swap_uint16(op);
2043        ptr++;
2044        if ((op & XCL_MAP) != 0)
2045          {
2046          /* Skip the character bit map. */
2047          ptr += 32/sizeof(pcre_uint16);
2048          length -= 32/sizeof(pcre_uint16);
2049          }
2050        break;
2051        }
2052      }
2053    /* Control should never reach here in 16 bit mode. */
2054    #endif /* SUPPORT_PCRE16 */
2055    }
2056    
2057    
2058    
2059  /*************************************************  /*************************************************
2060  *        Check match or recursion limit          *  *        Check match or recursion limit          *
2061  *************************************************/  *************************************************/
2062    
2063  static int  static int
2064  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2065    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
2066    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
2067  {  {
# Line 668  for (;;) Line 2076  for (;;)
2076    {    {
2077    *limit = mid;    *limit = mid;
2078    
2079    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2080      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2081    
2082    if (count == errnumber)    if (count == errnumber)
# Line 713  Returns:    < 0, = 0, or > 0, according Line 2121  Returns:    < 0, = 0, or > 0, according
2121  */  */
2122    
2123  static int  static int
2124  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2125  {  {
2126  while (n--)  while (n--)
2127    {    {
# Line 729  return 0; Line 2137  return 0;
2137  *         Check newline indicator                *  *         Check newline indicator                *
2138  *************************************************/  *************************************************/
2139    
2140  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
2141  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
2142    
2143  Arguments:  Arguments:
2144    p           points after the leading '<'    p           points after the leading '<'
# Line 741  Returns:      appropriate PCRE_NEWLINE_x Line 2148  Returns:      appropriate PCRE_NEWLINE_x
2148  */  */
2149    
2150  static int  static int
2151  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2152  {  {
2153  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2154  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2155  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2156  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2157  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2158  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2159  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2160  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2161  return 0;  return 0;
2162  }  }
# Line 765  usage(void) Line 2172  usage(void)
2172  {  {
2173  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2174  printf("Input and output default to stdin and stdout.\n");  printf("Input and output default to stdin and stdout.\n");
2175  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2176  printf("If input is a terminal, readline() is used to read from it.\n");  printf("If input is a terminal, readline() is used to read from it.\n");
2177  #else  #else
2178  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2179  #endif  #endif
2180  printf("\nOptions:\n");  printf("\nOptions:\n");
2181  printf("  -b       show compiled code (bytecode)\n");  #ifdef SUPPORT_PCRE16
2182    printf("  -16      use the 16-bit library\n");
2183    #endif
2184    printf("  -b       show compiled code\n");
2185  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2186    printf("  -C arg   show a specific compile-time option\n");
2187    printf("           and exit with its value. The arg can be:\n");
2188    printf("     linksize     internal link size [2, 3, 4]\n");
2189    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2190    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2191    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2192    printf("     ucp          Unicode Properties supported [0, 1]\n");
2193    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2194    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2195  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2196  #if !defined NODFA  #if !defined NODFA
2197  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
2198  #endif  #endif
2199  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
2200  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
2201           "  -M       find MATCH_LIMIT minimum for each subject\n"
2202         "  -m       output memory used information\n"         "  -m       output memory used information\n"
2203         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
2204  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 786  printf("  -p       use POSIX interface\n Line 2206  printf("  -p       use POSIX interface\n
2206  #endif  #endif
2207  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
2208  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2209  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
2210           "  -s+      force each pattern to be studied, using JIT if available\n"
2211           "  -s++     ditto, verifying when JIT was actually used\n"
2212           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2213           "             where 1 <= n <= 7 selects JIT options\n"
2214           "  -s++n    ditto, verifying when JIT was actually used\n"
2215         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2216  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2217  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 806  options, followed by a set of test data, Line 2231  options, followed by a set of test data,
2231  int main(int argc, char **argv)  int main(int argc, char **argv)
2232  {  {
2233  FILE *infile = stdin;  FILE *infile = stdin;
2234    const char *version;
2235  int options = 0;  int options = 0;
2236  int study_options = 0;  int study_options = 0;
2237    int default_find_match_limit = FALSE;
2238  int op = 1;  int op = 1;
2239  int timeit = 0;  int timeit = 0;
2240  int timeitm = 0;  int timeitm = 0;
2241  int showinfo = 0;  int showinfo = 0;
2242  int showstore = 0;  int showstore = 0;
2243    int force_study = -1;
2244    int force_study_options = 0;
2245  int quiet = 0;  int quiet = 0;
2246  int size_offsets = 45;  int size_offsets = 45;
2247  int size_offsets_max;  int size_offsets_max;
2248  int *offsets = NULL;  int *offsets = NULL;
 #if !defined NOPOSIX  
 int posix = 0;  
 #endif  
2249  int debug = 0;  int debug = 0;
2250  int done = 0;  int done = 0;
2251  int all_use_dfa = 0;  int all_use_dfa = 0;
2252    int verify_jit = 0;
2253  int yield = 0;  int yield = 0;
2254  int stack_size;  int stack_size;
2255    
2256  /* These vectors store, end-to-end, a list of captured substring names. Assume  #if !defined NOPOSIX
2257  that 1024 is plenty long enough for the few names we'll be testing. */  int posix = 0;
2258    #endif
2259    #if !defined NODFA
2260    int *dfa_workspace = NULL;
2261    #endif
2262    
2263  uschar copynames[1024];  pcre_jit_stack *jit_stack = NULL;
 uschar getnames[1024];  
2264    
2265  uschar *copynamesptr;  /* These vectors store, end-to-end, a list of zero-terminated captured
2266  uschar *getnamesptr;  substring names, each list itself being terminated by an empty name. Assume
2267    that 1024 is plenty long enough for the few names we'll be testing. It is
2268    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2269    for the actual memory, to ensure alignment. */
2270    
2271    pcre_uint16 copynames[1024];
2272    pcre_uint16 getnames[1024];
2273    
2274    #ifdef SUPPORT_PCRE16
2275    pcre_uint16 *cn16ptr;
2276    pcre_uint16 *gn16ptr;
2277    #endif
2278    
2279  /* Get buffers from malloc() so that Electric Fence will check their misuse  #ifdef SUPPORT_PCRE8
2280  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2281    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2282    pcre_uint8 *cn8ptr;
2283    pcre_uint8 *gn8ptr;
2284    #endif
2285    
2286  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2287  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
2288  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
2289    
2290    buffer = (pcre_uint8 *)malloc(buffer_size);
2291    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2292    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2293    
2294  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2295    
# Line 855  it set 0x8000, but then I was advised th Line 2304  it set 0x8000, but then I was advised th
2304  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2305  #endif  #endif
2306    
2307    /* Get the version number: both pcre_version() and pcre16_version() give the
2308    same answer. We just need to ensure that we call one that is available. */
2309    
2310    #ifdef SUPPORT_PCRE8
2311    version = pcre_version();
2312    #else
2313    version = pcre16_version();
2314    #endif
2315    
2316  /* Scan options */  /* Scan options */
2317    
2318  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2319    {    {
2320    unsigned char *endptr;    pcre_uint8 *endptr;
2321      char *arg = argv[op];
2322    
2323      if (strcmp(arg, "-m") == 0) showstore = 1;
2324      else if (strcmp(arg, "-s") == 0) force_study = 0;
2325    
2326    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    else if (strncmp(arg, "-s+", 3) == 0)
2327      showstore = 1;      {
2328    else if (strcmp(argv[op], "-q") == 0) quiet = 1;      arg += 3;
2329    else if (strcmp(argv[op], "-b") == 0) debug = 1;      if (*arg == '+') { arg++; verify_jit = TRUE; }
2330    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;      force_study = 1;
2331    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;      if (*arg == 0)
2332          force_study_options = jit_study_bits[6];
2333        else if (*arg >= '1' && *arg <= '7')
2334          force_study_options = jit_study_bits[*arg - '1'];
2335        else goto BAD_ARG;
2336        }
2337      else if (strcmp(arg, "-16") == 0)
2338        {
2339    #ifdef SUPPORT_PCRE16
2340        use_pcre16 = 1;
2341    #else
2342        printf("** This version of PCRE was built without 16-bit support\n");
2343        exit(1);
2344    #endif
2345        }
2346      else if (strcmp(arg, "-q") == 0) quiet = 1;
2347      else if (strcmp(arg, "-b") == 0) debug = 1;
2348      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2349      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2350      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2351  #if !defined NODFA  #if !defined NODFA
2352    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2353  #endif  #endif
2354    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2355        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2356          *endptr == 0))          *endptr == 0))
2357      {      {
2358      op++;      op++;
2359      argc--;      argc--;
2360      }      }
2361    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2362      {      {
2363      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2364      int temp;      int temp;
2365      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2366                       *endptr == 0))                       *endptr == 0))
2367        {        {
2368        timeitm = temp;        timeitm = temp;
# Line 891  while (argc > 1 && argv[op][0] == '-') Line 2372  while (argc > 1 && argv[op][0] == '-')
2372      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2373      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2374      }      }
2375    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2376        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2377          *endptr == 0))          *endptr == 0))
2378      {      {
2379  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2380      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2381      exit(1);      exit(1);
2382  #else  #else
# Line 914  while (argc > 1 && argv[op][0] == '-') Line 2395  while (argc > 1 && argv[op][0] == '-')
2395  #endif  #endif
2396      }      }
2397  #if !defined NOPOSIX  #if !defined NOPOSIX
2398    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2399  #endif  #endif
2400    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2401      {      {
2402      int rc;      int rc;
2403      printf("PCRE version %s\n", pcre_version());      unsigned long int lrc;
2404    
2405        if (argc > 2)
2406          {
2407          if (strcmp(argv[op + 1], "linksize") == 0)
2408            {
2409            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2410            printf("%d\n", rc);
2411            yield = rc;
2412            }
2413          else if (strcmp(argv[op + 1], "pcre8") == 0)
2414            {
2415    #ifdef SUPPORT_PCRE8
2416            printf("1\n");
2417            yield = 1;
2418    #else
2419            printf("0\n");
2420            yield = 0;
2421    #endif
2422            }
2423          else if (strcmp(argv[op + 1], "pcre16") == 0)
2424            {
2425    #ifdef SUPPORT_PCRE16
2426            printf("1\n");
2427            yield = 1;
2428    #else
2429            printf("0\n");
2430            yield = 0;
2431    #endif
2432            }
2433          else if (strcmp(argv[op + 1], "utf") == 0)
2434            {
2435    #ifdef SUPPORT_PCRE8
2436            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2437            printf("%d\n", rc);
2438            yield = rc;
2439    #else
2440            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2441            printf("%d\n", rc);
2442            yield = rc;
2443    #endif
2444            }
2445          else if (strcmp(argv[op + 1], "ucp") == 0)
2446            {
2447            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2448            printf("%d\n", rc);
2449            yield = rc;
2450            }
2451          else if (strcmp(argv[op + 1], "jit") == 0)
2452            {
2453            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2454            printf("%d\n", rc);
2455            yield = rc;
2456            }
2457          else if (strcmp(argv[op + 1], "newline") == 0)
2458            {
2459            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2460            print_newline_config(rc, TRUE);
2461            }
2462          else if (strcmp(argv[op + 1], "ebcdic") == 0)
2463            {
2464    #ifdef EBCDIC
2465            printf("1\n");
2466            yield = 1;
2467    #else
2468            printf("0\n");
2469    #endif
2470            }
2471          else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2472            {
2473    #ifdef EBCDIC
2474            printf("0x%02x\n", CHAR_LF);
2475    #else
2476            printf("0\n");
2477    #endif
2478            }
2479          else
2480            {
2481            printf("Unknown -C option: %s\n", argv[op + 1]);
2482            }
2483          goto EXIT;
2484          }
2485    
2486        /* No argument for -C: output all configuration information. */
2487    
2488        printf("PCRE version %s\n", version);
2489      printf("Compiled with\n");      printf("Compiled with\n");
2490    
2491    #ifdef EBCDIC
2492        printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2493    #endif
2494    
2495    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2496    are set, either both UTFs are supported or both are not supported. */
2497    
2498    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2499        printf("  8-bit and 16-bit support\n");
2500        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2501        if (rc)
2502          printf("  UTF-8 and UTF-16 support\n");
2503        else
2504          printf("  No UTF-8 or UTF-16 support\n");
2505    #elif defined SUPPORT_PCRE8
2506        printf("  8-bit support only\n");
2507      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2508      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2509      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2510        printf("  16-bit support only\n");
2511        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2512        printf("  %sUTF-16 support\n", rc? "" : "No ");
2513    #endif
2514    
2515        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2516      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2517      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2518      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      if (rc)
2519        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        {
2520        (rc == -2)? "ANYCRLF" :        const char *arch;
2521        (rc == -1)? "ANY" : "???");        (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2522      (void)pcre_config(PCRE_CONFIG_BSR, &rc);        printf("  Just-in-time compiler support: %s\n", arch);
2523          }
2524        else
2525          printf("  No just-in-time compiler support\n");
2526        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2527        print_newline_config(rc, FALSE);
2528        (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2529      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2530                                       "all Unicode newlines");                                       "all Unicode newlines");
2531      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2532      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2533      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2534      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2535      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2536      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
2537      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2538      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
2539      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2540      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
2541        if (showstore)
2542          {
2543          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2544          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2545          }
2546        printf("\n");
2547      goto EXIT;      goto EXIT;
2548      }      }
2549    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
2550             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
2551      {      {
2552      usage();      usage();
2553      goto EXIT;      goto EXIT;
2554      }      }
2555    else    else
2556      {      {
2557      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
2558        printf("** Unknown or malformed option %s\n", arg);
2559      usage();      usage();
2560      yield = 1;      yield = 1;
2561      goto EXIT;      goto EXIT;
# Line 1000  if (argc > 2) Line 2602  if (argc > 2)
2602    
2603  /* Set alternative malloc function */  /* Set alternative malloc function */
2604    
2605    #ifdef SUPPORT_PCRE8
2606  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2607  pcre_free = new_free;  pcre_free = new_free;
2608  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2609  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2610    #endif
2611    
2612    #ifdef SUPPORT_PCRE16
2613    pcre16_malloc = new_malloc;
2614    pcre16_free = new_free;
2615    pcre16_stack_malloc = stack_malloc;
2616    pcre16_stack_free = stack_free;
2617    #endif
2618    
2619  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2620    
2621  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2622    
2623  /* Main loop */  /* Main loop */
2624    
# Line 1022  while (!done) Line 2633  while (!done)
2633  #endif  #endif
2634    
2635    const char *error;    const char *error;
2636    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
2637    unsigned char *to_file = NULL;    pcre_uint8 *p, *pp, *ppp;
2638    const unsigned char *tables = NULL;    pcre_uint8 *to_file = NULL;
2639      const pcre_uint8 *tables = NULL;
2640      unsigned long int get_options;
2641    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2642    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2643      int do_allcaps = 0;
2644      int do_mark = 0;
2645    int do_study = 0;    int do_study = 0;
2646      int no_force_study = 0;
2647    int do_debug = debug;    int do_debug = debug;
2648    int do_G = 0;    int do_G = 0;
2649    int do_g = 0;    int do_g = 0;
2650    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2651    int do_showrest = 0;    int do_showrest = 0;
2652      int do_showcaprest = 0;
2653    int do_flip = 0;    int do_flip = 0;
2654    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2655    
2656    use_utf8 = 0;  #if !defined NODFA
2657      int dfa_matched = 0;
2658    #endif
2659    
2660      use_utf = 0;
2661    debug_lengths = 1;    debug_lengths = 1;
2662    
2663    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1051  while (!done) Line 2672  while (!done)
2672    
2673    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2674      {      {
2675      unsigned long int magic, get_options;      pcre_uint32 magic;
2676      uschar sbuf[8];      pcre_uint8 sbuf[8];
2677      FILE *f;      FILE *f;
2678    
2679      p++;      p++;
2680        if (*p == '!')
2681          {
2682          do_debug = TRUE;
2683          do_showinfo = TRUE;
2684          p++;
2685          }
2686    
2687      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2688      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2689      *pp = 0;      *pp = 0;
# Line 1067  while (!done) Line 2695  while (!done)
2695        continue;        continue;
2696        }        }
2697    
2698        first_gotten_store = 0;
2699      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2700    
2701      true_size =      true_size =
# Line 1074  while (!done) Line 2703  while (!done)
2703      true_study_size =      true_study_size =
2704        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2705    
2706      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
2707      regex_gotten_store = gotten_store;      if (re == NULL)
2708          {
2709          printf("** Failed to get %d bytes of memory for pcre object\n",
2710            (int)true_size);
2711          yield = 1;
2712          goto EXIT;
2713          }
2714        regex_gotten_store = first_gotten_store;
2715    
2716      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2717    
2718      magic = ((real_pcre *)re)->magic_number;      magic = ((REAL_PCRE *)re)->magic_number;
2719      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2720        {        {
2721        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2722          {          {
2723          do_flip = 1;          do_flip = 1;
2724          }          }
2725        else        else
2726          {          {
2727          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2728            new_free(re);
2729          fclose(f);          fclose(f);
2730          continue;          continue;
2731          }          }
2732        }        }
2733    
2734      fprintf(outfile, "Compiled regex%s loaded from %s\n",      /* We hide the byte-invert info for little and big endian tests. */
2735        do_flip? " (byte-inverted)" : "", p);      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2736          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
     /* Need to know if UTF-8 for printing data strings */  
2737    
2738      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      /* Now see if there is any following study data. */
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
   
     /* Now see if there is any following study data */  
2739    
2740      if (true_study_size != 0)      if (true_study_size != 0)
2741        {        {
# Line 1118  while (!done) Line 2751  while (!done)
2751          {          {
2752          FAIL_READ:          FAIL_READ:
2753          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2754          if (extra != NULL) new_free(extra);          if (extra != NULL)
2755          if (re != NULL) new_free(re);            {
2756              PCRE_FREE_STUDY(extra);
2757              }
2758            new_free(re);
2759          fclose(f);          fclose(f);
2760          continue;          continue;
2761          }          }
# Line 1128  while (!done) Line 2764  while (!done)
2764        }        }
2765      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2766    
2767        /* Flip the necessary bytes. */
2768        if (do_flip)
2769          {
2770          int rc;
2771          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2772          if (rc == PCRE_ERROR_BADMODE)
2773            {
2774            /* Simulate the result of the function call below. */
2775            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2776              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2777            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2778              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2779            new_free(re);
2780            fclose(f);
2781            continue;
2782            }
2783          }
2784    
2785        /* Need to know if UTF-8 for printing data strings. */
2786    
2787        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2788          {
2789          new_free(re);
2790          fclose(f);
2791          continue;
2792          }
2793        use_utf = (get_options & PCRE_UTF8) != 0;
2794    
2795      fclose(f);      fclose(f);
2796      goto SHOW_INFO;      goto SHOW_INFO;
2797      }      }
2798    
2799    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2800    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2801    
2802    delimiter = *p++;    delimiter = *p++;
2803    
# Line 1144  while (!done) Line 2808  while (!done)
2808      }      }
2809    
2810    pp = p;    pp = p;
2811    poffset = p - buffer;    poffset = (int)(p - buffer);
2812    
2813    for(;;)    for(;;)
2814      {      {
# Line 1184  while (!done) Line 2848  while (!done)
2848    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2849    
2850    options = 0;    options = 0;
2851    study_options = 0;    study_options = force_study_options;
2852    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
2853    
2854    while (*pp != 0)    while (*pp != 0)
# Line 1198  while (!done) Line 2862  while (!done)
2862        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2863        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2864    
2865        case '+': do_showrest = 1; break;        case '+':
2866          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2867          break;
2868    
2869          case '=': do_allcaps = 1; break;
2870        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2871        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2872        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1208  while (!done) Line 2876  while (!done)
2876        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2877        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2878        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
2879          case 'K': do_mark = 1; break;
2880        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2881        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2882    
# Line 1215  while (!done) Line 2884  while (!done)
2884        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2885  #endif  #endif
2886    
2887        case 'S': do_study = 1; break;        case 'S':
2888          do_study = 1;
2889          for (;;)
2890            {
2891            switch (*pp++)
2892              {
2893              case 'S':
2894              do_study = 0;
2895              no_force_study = 1;
2896              break;
2897    
2898              case '!':
2899              study_options |= PCRE_STUDY_EXTRA_NEEDED;
2900              break;
2901    
2902              case '+':
2903              if (*pp == '+')
2904                {
2905                verify_jit = TRUE;
2906                pp++;
2907                }
2908              if (*pp >= '1' && *pp <= '7')
2909                study_options |= jit_study_bits[*pp++ - '1'];
2910              else
2911                study_options |= jit_study_bits[6];
2912              break;
2913    
2914              case '-':
2915              study_options &= ~PCRE_STUDY_ALLJIT;
2916              break;
2917    
2918              default:
2919              pp--;
2920              goto ENDLOOP;
2921              }
2922            }
2923          ENDLOOP:
2924          break;
2925    
2926        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2927          case 'W': options |= PCRE_UCP; break;
2928        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2929          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2930        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2931        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2932        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2933    
2934          case 'T':
2935          switch (*pp++)
2936            {
2937            case '0': tables = tables0; break;
2938            case '1': tables = tables1; break;
2939    
2940            case '\r':
2941            case '\n':
2942            case ' ':
2943            case 0:
2944            fprintf(outfile, "** Missing table number after /T\n");
2945            goto SKIP_DATA;
2946    
2947            default:
2948            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2949            goto SKIP_DATA;
2950            }
2951          break;
2952    
2953        case 'L':        case 'L':
2954        ppp = pp;        ppp = pp;
2955        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1234  while (!done) Line 2962  while (!done)
2962          goto SKIP_DATA;          goto SKIP_DATA;
2963          }          }
2964        locale_set = 1;        locale_set = 1;
2965        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2966        pp = ppp;        pp = ppp;
2967        break;        break;
2968    
# Line 1247  while (!done) Line 2975  while (!done)
2975    
2976        case '<':        case '<':
2977          {          {
2978          if (strncmp((char *)pp, "JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2979            {            {
2980            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
2981            pp += 3;            pp += 3;
2982            }            }
2983          else          else
2984            {            {
2985            int x = check_newline(pp, outfile);            int x = check_newline(pp, outfile);
2986            if (x == 0) goto SKIP_DATA;            if (x == 0) goto SKIP_DATA;
2987            options |= x;            options |= x;
2988            while (*pp++ != '>');            while (*pp++ != '>');
2989            }            }
2990          }          }
2991        break;        break;
2992    
# Line 1275  while (!done) Line 3003  while (!done)
3003    
3004    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
3005    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
3006    local character tables. */    local character tables. Neither does it have 16-bit support. */
3007    
3008  #if !defined NOPOSIX  #if !defined NOPOSIX
3009    if (posix || do_posix)    if (posix || do_posix)
# Line 1288  while (!done) Line 3016  while (!done)
3016      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3017      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3018      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3019        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3020        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3021    
3022        first_gotten_store = 0;
3023      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
3024    
3025      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1308  while (!done) Line 3039  while (!done)
3039  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
3040    
3041      {      {
3042        /* In 16-bit mode, convert the input. */
3043    
3044    #ifdef SUPPORT_PCRE16
3045        if (use_pcre16)
3046          {
3047          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3048            {
3049            case -1:
3050            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3051              "converted to UTF-16\n");
3052            goto SKIP_DATA;
3053    
3054            case -2:
3055            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3056              "cannot be converted to UTF-16\n");
3057            goto SKIP_DATA;
3058    
3059            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3060            fprintf(outfile, "**Failed: character value greater than 0xffff "
3061              "cannot be converted to 16-bit in non-UTF mode\n");
3062            goto SKIP_DATA;
3063    
3064            default:
3065            break;
3066            }
3067          p = (pcre_uint8 *)buffer16;
3068          }
3069    #endif
3070    
3071        /* Compile many times when timing */
3072    
3073      if (timeit > 0)      if (timeit > 0)
3074        {        {
3075        register int i;        register int i;
# Line 1315  while (!done) Line 3077  while (!done)
3077        clock_t start_time = clock();        clock_t start_time = clock();
3078        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
3079          {          {
3080          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3081          if (re != NULL) free(re);          if (re != NULL) free(re);
3082          }          }
3083        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1324  while (!done) Line 3086  while (!done)
3086            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
3087        }        }
3088    
3089      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
3090        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3091    
3092      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
3093      if non-interactive. */      if non-interactive. */
# Line 1351  while (!done) Line 3114  while (!done)
3114        goto CONTINUE;        goto CONTINUE;
3115        }        }
3116    
3117      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
3118      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
3119      returns only limited data. Check that it agrees with the newer one. */      lines. */
3120    
3121      if (log_store)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3122        fprintf(outfile, "Memory allocation (code space): %d\n",        goto SKIP_DATA;
3123          (int)(gotten_store -      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
3124    
3125      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3126      and remember the store that was got. */      and remember the store that was got. */
3127    
3128      true_size = ((real_pcre *)re)->size;      true_size = ((REAL_PCRE *)re)->size;
3129      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3130    
3131        /* Output code size information if requested */
3132    
3133      /* If /S was present, study the regexp to generate additional info to      if (log_store)
3134      help with the matching. */        fprintf(outfile, "Memory allocation (code space): %d\n",
3135            (int)(first_gotten_store -
3136                  sizeof(REAL_PCRE) -
3137                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3138    
3139        /* If -s or /S was present, study the regex to generate additional info to
3140        help with the matching, unless the pattern has the SS option, which
3141        suppresses the effect of /S (used for a few test patterns where studying is
3142        never sensible). */
3143    
3144      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
3145        {        {
3146        if (timeit > 0)        if (timeit > 0)
3147          {          {
# Line 1378  while (!done) Line 3149  while (!done)
3149          clock_t time_taken;          clock_t time_taken;
3150          clock_t start_time = clock();          clock_t start_time = clock();
3151          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3152            extra = pcre_study(re, study_options, &error);            {
3153              PCRE_STUDY(extra, re, study_options, &error);
3154              }
3155          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3156          if (extra != NULL) free(extra);          if (extra != NULL)
3157              {
3158              PCRE_FREE_STUDY(extra);
3159              }
3160          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3161            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3162              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3163          }          }
3164        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options, &error);
3165        if (error != NULL)        if (error != NULL)
3166          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3167        else if (extra != NULL)        else if (extra != NULL)
3168            {
3169          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3170            if (log_store)
3171              {
3172              size_t jitsize;
3173              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3174                  jitsize != 0)
3175                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3176              }
3177            }
3178        }        }
3179    
3180      /* If the 'F' option was present, we flip the bytes of all the integer      /* If /K was present, we set up for handling MARK data. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
3181    
3182      if (do_flip)      if (do_mark)
3183        {        {
3184        real_pcre *rre = (real_pcre *)re;        if (extra == NULL)
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
3185          {          {
3186          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3187          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          extra->flags = 0;
         rsd->options = byteflip(rsd->options, sizeof(rsd->options));  
3188          }          }
3189          extra->mark = &markptr;
3190          extra->flags |= PCRE_EXTRA_MARK;
3191        }        }
3192    
3193      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
3194    
3195      SHOW_INFO:      SHOW_INFO:
3196    
3197      if (do_debug)      if (do_debug)
3198        {        {
3199        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3200        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3201        }        }
3202    
3203        /* We already have the options in get_options (see above) */
3204    
3205      if (do_showinfo)      if (do_showinfo)
3206        {        {
3207        unsigned long int get_options, all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3208        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3209          hascrorlf;          hascrorlf, maxlookbehind;
3210        int nameentrysize, namecount;        int nameentrysize, namecount;
3211        const uschar *nametable;        const pcre_uint8 *nametable;
3212    
3213        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3214        new_info(re, NULL, PCRE_INFO_SIZE, &size);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3215        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3216        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3217        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3218        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3219        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3220        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3221        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3222        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3223        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3224        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3225              != 0)
3226  #if !defined NOINFOCHECK          goto SKIP_DATA;
       old_count = pcre_info(re, &old_options, &old_first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3227    
3228        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3229          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1495  while (!done) Line 3238  while (!done)
3238          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3239          while (namecount-- > 0)          while (namecount-- > 0)
3240            {            {
3241            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3242              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3243              GET2(nametable, 0));  #else
3244              int imm2_size = IMM2_SIZE;
3245    #endif
3246              int length = (int)STRLEN(nametable + imm2_size);
3247              fprintf(outfile, "  ");
3248              PCHARSV(nametable, imm2_size, length, outfile);
3249              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3250    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3251              fprintf(outfile, "%3d\n", use_pcre16?
3252                 (int)(((PCRE_SPTR16)nametable)[0])
3253                :((int)nametable[0] << 8) | (int)nametable[1]);
3254              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3255    #else
3256              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3257    #ifdef SUPPORT_PCRE8
3258            nametable += nameentrysize;            nametable += nameentrysize;
3259    #else
3260              nametable += nameentrysize * 2;
3261    #endif
3262    #endif
3263            }            }
3264          }          }
3265    
3266        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3267        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3268    
3269        all_options = ((real_pcre *)re)->options;        all_options = ((REAL_PCRE *)re)->options;
3270        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3271    
3272        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3273          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3274            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3275            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3276            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1522  while (!done) Line 3283  while (!done)
3283            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3284            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3285            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3286            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3287            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3288              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3289              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3290            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3291    
3292        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1564  while (!done) Line 3327  while (!done)
3327          }          }
3328        else        else
3329          {          {
3330          int ch = first_char & 255;          const char *caseless =
3331          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3332            "" : " (caseless)";            "" : " (caseless)";
3333          if (PRINTHEX(ch))  
3334            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3335              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3336          else          else
3337            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3338              fprintf(outfile, "First char = ");
3339              pchar(first_char, outfile);
3340              fprintf(outfile, "%s\n", caseless);
3341              }
3342          }          }
3343    
3344        if (need_char < 0)        if (need_char < 0)
# Line 1579  while (!done) Line 3347  while (!done)
3347          }          }
3348        else        else
3349          {          {
3350          int ch = need_char & 255;          const char *caseless =
3351          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3352            "" : " (caseless)";            "" : " (caseless)";
3353          if (PRINTHEX(ch))  
3354            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3355              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3356          else          else
3357            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3358              fprintf(outfile, "Need char = ");
3359              pchar(need_char, outfile);
3360              fprintf(outfile, "%s\n", caseless);
3361              }
3362          }          }
3363    
3364          if (maxlookbehind > 0)
3365            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3366    
3367        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3368        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3369        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
3370        flipped.) */        flipped.) If study was forced by an external -s, don't show this
3371          information unless -i or -d was also present. This means that, except
3372          when auto-callouts are involved, the output from runs with and without
3373          -s should be identical. */
3374    
3375        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3376          {          {
3377          if (extra == NULL)          if (extra == NULL)
3378            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3379          else          else
3380            {            {
3381            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3382            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
3383    
3384            if (start_bits == NULL)            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3385              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3386            else  
3387              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3388              {              {
3389              int i;              if (start_bits == NULL)
3390              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3391              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3392                {                {
3393                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3394                  int c = 24;
3395                  fprintf(outfile, "Starting byte set: ");
3396                  for (i = 0; i < 256; i++)
3397                  {                  {
3398                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
3399                    {                    {
3400                    fprintf(outfile, "\n  ");                    if (c > 75)
3401                    c = 2;                      {
3402                    }                      fprintf(outfile, "\n  ");
3403                  if (PRINTHEX(i) && i != ' ')                      c = 2;
3404                    {                      }
3405                    fprintf(outfile, "%c ", i);                    if (PRINTOK(i) && i != ' ')
3406                    c += 2;                      {
3407                    }                      fprintf(outfile, "%c ", i);
3408                  else                      c += 2;
3409                    {                      }
3410                    fprintf(outfile, "\\x%02x ", i);                    else
3411                    c += 5;                      {
3412                        fprintf(outfile, "\\x%02x ", i);
3413                        c += 5;
3414                        }
3415                    }                    }
3416                  }                  }
3417                  fprintf(outfile, "\n");
3418                }                }
3419              fprintf(outfile, "\n");              }
3420              }
3421    
3422            /* Show this only if the JIT was set by /S, not by -s. */
3423    
3424            if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3425                (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3426              {
3427              int jit;
3428              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3429                {
3430                if (jit)
3431                  fprintf(outfile, "JIT study was successful\n");
3432                else
3433    #ifdef SUPPORT_JIT
3434                  fprintf(outfile, "JIT study was not successful\n");
3435    #else
3436                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3437    #endif
3438              }              }
3439            }            }
3440          }          }
# Line 1649  while (!done) Line 3453  while (!done)
3453          }          }
3454        else        else
3455          {          {
3456          uschar sbuf[8];          pcre_uint8 sbuf[8];
3457          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3458          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3459          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3460          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3461            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3462          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3463          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3464          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3465          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3466            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3467    
3468          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3469              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1667  while (!done) Line 3472  while (!done)
3472            }            }
3473          else          else
3474            {            {
3475            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3476    
3477              /* If there is study data, write it. */
3478    
3479            if (extra != NULL)            if (extra != NULL)
3480              {              {
3481              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1677  while (!done) Line 3485  while (!done)
3485                  strerror(errno));                  strerror(errno));
3486                }                }
3487              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
3488              }              }
3489            }            }
3490          fclose(f);          fclose(f);
3491          }          }
3492    
3493        new_free(re);        new_free(re);
3494        if (extra != NULL) new_free(extra);        if (extra != NULL)
3495        if (tables != NULL) new_free((void *)tables);          {
3496            PCRE_FREE_STUDY(extra);
3497            }
3498          if (locale_set)
3499            {
3500            new_free((void *)tables);
3501            setlocale(LC_CTYPE, "C");
3502            locale_set = 0;
3503            }
3504        continue;  /* With next regex */        continue;  /* With next regex */
3505        }        }
3506      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1694  while (!done) Line 3509  while (!done)
3509    
3510    for (;;)    for (;;)
3511      {      {
3512      uschar *q;      pcre_uint8 *q;
3513      uschar *bptr;      pcre_uint8 *bptr;
3514      int *use_offsets = offsets;      int *use_offsets = offsets;
3515      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3516      int callout_data = 0;      int callout_data = 0;
3517      int callout_data_set = 0;      int callout_data_set = 0;
3518      int count, c;      int count, c;
3519      int copystrings = 0;      int copystrings = 0;
3520      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
3521      int getstrings = 0;      int getstrings = 0;
3522      int getlist = 0;      int getlist = 0;
3523      int gmatched = 0;      int gmatched = 0;
3524      int start_offset = 0;      int start_offset = 0;
3525        int start_offset_sign = 1;
3526      int g_notempty = 0;      int g_notempty = 0;
3527      int use_dfa = 0;      int use_dfa = 0;
3528    
     options = 0;  
   
3529      *copynames = 0;      *copynames = 0;
3530      *getnames = 0;      *getnames = 0;
3531    
3532      copynamesptr = copynames;  #ifdef SUPPORT_PCRE16
3533      getnamesptr = getnames;      cn16ptr = copynames;
3534        gn16ptr = getnames;
3535    #endif
3536    #ifdef SUPPORT_PCRE8
3537        cn8ptr = copynames8;
3538        gn8ptr = getnames8;
3539    #endif
3540    
3541      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3542      first_callout = 1;      first_callout = 1;
3543        last_callout_mark = NULL;
3544      callout_extra = 0;      callout_extra = 0;
3545      callout_count = 0;      callout_count = 0;
3546      callout_fail_count = 999999;      callout_fail_count = 999999;
3547      callout_fail_id = -1;      callout_fail_id = -1;
3548      show_malloc = 0;      show_malloc = 0;
3549        options = 0;
3550    
3551      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3552        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 1734  while (!done) Line 3556  while (!done)
3556        {        {
3557        if (extend_inputline(infile, buffer + len, "data> ") == NULL)        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3558          {          {
3559          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
3560              {
3561              fprintf(outfile, "\n");
3562              break;
3563              }
3564          done = 1;          done = 1;
3565          goto CONTINUE;          goto CONTINUE;
3566          }          }
# Line 1756  while (!done) Line 3582  while (!done)
3582        int i = 0;        int i = 0;
3583        int n = 0;        int n = 0;
3584    
3585        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3586          In non-UTF mode, allow the value of the byte to fall through to later,
3587          where values greater than 127 are turned into UTF-8 when running in
3588          16-bit mode. */
3589    
3590          if (c != '\\')
3591            {
3592            if (use_utf)
3593              {
3594              *q++ = c;
3595              continue;
3596              }
3597            }
3598    
3599          /* Handle backslash escapes */
3600    
3601          else switch ((c = *p++))
3602          {          {
3603          case 'a': c =    7; break;          case 'a': c =    7; break;
3604          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 1772  while (!done) Line 3614  while (!done)
3614          c -= '0';          c -= '0';
3615          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3616            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3617          break;          break;
3618    
3619          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3620          if (*p == '{')          if (*p == '{')
3621            {            {
3622            unsigned char *pt = p;            pcre_uint8 *pt = p;
3623            c = 0;            c = 0;
3624            while (isxdigit(*(++pt)))  
3625              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3626              when isxdigit() is a macro that refers to its argument more than
3627              once. This is banned by the C Standard, but apparently happens in at
3628              least one MacOS environment. */
3629    
3630              for (pt++; isxdigit(*pt); pt++)
3631                {
3632                if (++i == 9)
3633                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3634                                   "using only the first eight.\n");
3635                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3636                }
3637            if (*pt == '}')            if (*pt == '}')
3638              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             utn = ord2utf8(c, buff8);  
             for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
             c = buff8[ii];   /* Last byte */  
3639              p = pt + 1;              p = pt + 1;
3640              break;              break;
3641              }              }
3642            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3643            }            }
 #endif  
3644    
3645          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3646            allows UTF-8 characters to be constructed byte by byte, and also allows
3647            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3648            Otherwise, pass it down to later code so that it can be turned into
3649            UTF-8 when running in 16-bit mode. */
3650    
3651          c = 0;          c = 0;
3652          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3653            {            {
3654            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3655            p++;            p++;
3656            }            }
3657            if (use_utf)
3658              {
3659              *q++ = c;
3660              continue;
3661              }
3662          break;          break;
3663    
3664          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 1825  while (!done) Line 3666  while (!done)
3666          continue;          continue;
3667    
3668          case '>':          case '>':
3669            if (*p == '-')
3670              {
3671              start_offset_sign = -1;
3672              p++;
3673              }
3674          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3675            start_offset *= start_offset_sign;
3676          continue;          continue;
3677    
3678          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1844  while (!done) Line 3691  while (!done)
3691            }            }
3692          else if (isalnum(*p))          else if (isalnum(*p))
3693            {            {
3694            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3695            }            }
3696          else if (*p == '+')          else if (*p == '+')
3697            {            {
# Line 1860  while (!done) Line 3700  while (!done)
3700            }            }
3701          else if (*p == '-')          else if (*p == '-')
3702            {            {
3703            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3704            p++;            p++;
3705            }            }
3706          else if (*p == '!')          else if (*p == '!')
# Line 1898  while (!done) Line 3738  while (!done)
3738  #endif  #endif
3739            use_dfa = 1;            use_dfa = 1;
3740          continue;          continue;
3741    #endif
3742    
3743    #if !defined NODFA
3744          case 'F':          case 'F':
3745          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
3746          continue;          continue;
# Line 1912  while (!done) Line 3754  while (!done)
3754            }            }
3755          else if (isalnum(*p))          else if (isalnum(*p))
3756            {            {
3757            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3758            while (isalnum(*p)) *npp++ = *p++;            }
3759            *npp++ = 0;          continue;
3760            *npp = 0;  
3761            n = pcre_get_stringnumber(re, (char *)getnamesptr);          case 'J':
3762            if (n < 0)          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3763              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);          if (extra != NULL
3764            getnamesptr = npp;              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3765                && extra->executable_jit != NULL)
3766              {
3767              if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3768              jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3769              PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3770            }            }
3771          continue;          continue;
3772    
# Line 1932  while (!done) Line 3779  while (!done)
3779          continue;          continue;
3780    
3781          case 'N':          case 'N':
3782          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
3783              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3784            else
3785              options |= PCRE_NOTEMPTY;
3786          continue;          continue;
3787    
3788          case 'O':          case 'O':
# Line 1952  while (!done) Line 3802  while (!done)
3802            }            }
3803          use_size_offsets = n;          use_size_offsets = n;
3804          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
3805              else use_offsets = offsets + size_offsets_max - n;  /* To catch overruns */
3806          continue;          continue;
3807    
3808          case 'P':          case 'P':
3809          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3810              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3811          continue;          continue;
3812    
3813          case 'Q':          case 'Q':
# Line 1990  while (!done) Line 3842  while (!done)
3842          show_malloc = 1;          show_malloc = 1;
3843          continue;          continue;
3844    
3845            case 'Y':
3846            options |= PCRE_NO_START_OPTIMIZE;
3847            continue;
3848    
3849          case 'Z':          case 'Z':
3850          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
3851          continue;          continue;
# Line 2007  while (!done) Line 3863  while (!done)
3863            }            }
3864          continue;          continue;
3865          }          }
3866        *q++ = c;  
3867          /* We now have a character value in c that may be greater than 255. In
3868          16-bit mode, we always convert characters to UTF-8 so that values greater
3869          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3870          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3871          mode must have come from \x{...} or octal constructs because values from
3872          \x.. get this far only in non-UTF mode. */
3873    
3874    #if !defined NOUTF || defined SUPPORT_PCRE16
3875          if (use_pcre16 || use_utf)
3876            {
3877            pcre_uint8 buff8[8];
3878            int ii, utn;
3879            utn = ord2utf8(c, buff8);
3880            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3881            }
3882          else
3883    #endif
3884            {
3885            if (c > 255)
3886              {
3887              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3888                "and UTF-8 mode is not enabled.\n", c);
3889              fprintf(outfile, "** Truncation will probably give the wrong "
3890                "result.\n");
3891              }
3892            *q++ = c;
3893            }
3894        }        }
3895    
3896        /* Reached end of subject string */
3897    
3898      *q = 0;      *q = 0;
3899      len = q - dbuffer;      len = (int)(q - dbuffer);
3900    
3901        /* Move the data to the end of the buffer so that a read over the end of
3902        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3903        we are using the POSIX interface, we must include the terminating zero. */
3904    
3905    #if !defined NOPOSIX
3906        if (posix || do_posix)
3907          {
3908          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3909          bptr += buffer_size - len - 1;
3910          }
3911        else
3912    #endif
3913          {
3914          memmove(bptr + buffer_size - len, bptr, len);
3915          bptr += buffer_size - len;
3916          }
3917    
3918      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
3919        {        {
# Line 2031  while (!done) Line 3934  while (!done)
3934          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3935        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3936        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3937          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3938    
3939        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3940    
# Line 2052  while (!done) Line 3956  while (!done)
3956            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3957              {              {
3958              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3959              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3960                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3961              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3962              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3963                {                {
3964                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3965                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3966                  outfile);                  outfile);
3967                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3968                }                }
# Line 2066  while (!done) Line 3970  while (!done)
3970            }            }
3971          }          }
3972        free(pmatch);        free(pmatch);
3973          goto NEXT_DATA;
3974        }        }
3975    
3976    #endif  /* !defined NOPOSIX */
3977    
3978      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3979    
3980      else  #ifdef SUPPORT_PCRE16
3981  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3982          {
3983          len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3984          switch(len)
3985            {
3986            case -1:
3987            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3988              "converted to UTF-16\n");
3989            goto NEXT_DATA;
3990    
3991            case -2:
3992            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3993              "cannot be converted to UTF-16\n");
3994            goto NEXT_DATA;
3995    
3996            case -3:
3997            fprintf(outfile, "**Failed: character value greater than 0xffff "
3998              "cannot be converted to 16-bit in non-UTF mode\n");
3999            goto NEXT_DATA;
4000    
4001            default:
4002            break;
4003            }
4004          bptr = (pcre_uint8 *)buffer16;
4005          }
4006    #endif
4007    
4008        /* Ensure that there is a JIT callback if we want to verify that JIT was
4009        actually used. If jit_stack == NULL, no stack has yet been assigned. */
4010    
4011        if (verify_jit && jit_stack == NULL && extra != NULL)
4012           { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4013    
4014      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
4015        {        {
4016          markptr = NULL;
4017          jit_was_used = FALSE;
4018    
4019        if (timeitm > 0)        if (timeitm > 0)
4020          {          {
4021          register int i;          register int i;
# Line 2084  while (!done) Line 4025  while (!done)
4025  #if !defined NODFA  #if !defined NODFA
4026          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
4027            {            {
4028            int workspace[1000];            if ((options & PCRE_DFA_RESTART) != 0)
4029                {
4030                fprintf(outfile, "Timing DFA restarts is not supported\n");
4031                break;
4032                }
4033              if (dfa_workspace == NULL)
4034                dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4035            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
4036              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              {
4037                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4038                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets,
4039                  dfa_workspace, DFA_WS_DIMENSION);
4040                }
4041            }            }
4042          else          else
4043  #endif  #endif
4044    
4045          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
4046            count = pcre_exec(re, extra, (char *)bptr, len,         &