/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 289 by ph10, Sun Dec 23 12:17:20 2007 UTC revision 1030 by ph10, Sat Sep 8 15:58:38 2012 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
50  #include "config.h"  #include "config.h"
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 58  POSSIBILITY OF SUCH DAMAGE.
58  #include <locale.h>  #include <locale.h>
59  #include <errno.h>  #include <errno.h>
60    
61  #ifdef SUPPORT_LIBREADLINE  /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67    #ifdef HAVE_UNISTD_H
68  #include <unistd.h>  #include <unistd.h>
69    #endif
70    #if defined(SUPPORT_LIBREADLINE)
71  #include <readline/readline.h>  #include <readline/readline.h>
72  #include <readline/history.h>  #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80  #endif  #endif
   
81    
82  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
83  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 69  input mode under Windows. */ Line 93  input mode under Windows. */
93  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
94  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
95    
96    #ifndef isatty
97    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
98    #endif                         /* though in some environments they seem to   */
99                                   /* be already defined, hence the #ifndefs.    */
100    #ifndef fileno
101    #define fileno _fileno
102    #endif
103    
104    /* A user sent this fix for Borland Builder 5 under Windows. */
105    
106    #ifdef __BORLANDC__
107    #define _setmode(handle, mode) setmode(handle, mode)
108    #endif
109    
110    /* Not Windows */
111    
112  #else  #else
113  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
114  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
115    #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
116    #define INPUT_MODE   "r"
117    #define OUTPUT_MODE  "w"
118    #else
119  #define INPUT_MODE   "rb"  #define INPUT_MODE   "rb"
120  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
121  #endif  #endif
122    #endif
123    
124    #define PRIV(name) name
125    
126  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
127  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 87  here before pcre_internal.h so that the Line 133  here before pcre_internal.h so that the
133  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
134    
135  #include "pcre.h"  #include "pcre.h"
136    
137    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
138    /* Configure internal macros to 16 bit mode. */
139    #define COMPILE_PCRE16
140    #endif
141    
142  #include "pcre_internal.h"  #include "pcre_internal.h"
143    
144  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* The pcre_printint() function, which prints the internal form of a compiled
145  two copies, we include the source file here, changing the names of the external  regex, is held in a separate file so that (a) it can be compiled in either
146  symbols to prevent clashes. */  8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
147    when that is compiled in debug mode. */
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
148    
149  #include "pcre_tables.c"  #ifdef SUPPORT_PCRE8
150    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151    #endif
152    #ifdef SUPPORT_PCRE16
153    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154    #endif
155    
156  /* We also need the pcre_printint() function for printing out compiled  /* We need access to some of the data tables that PCRE uses. So as not to have
157  patterns. This function is in a separate file so that it can be included in  to keep two copies, we include the source file here, changing the names of the
158  pcre_compile.c when that module is compiled with debugging enabled.  external symbols to prevent clashes. */
159    
160  The definition of the macro PRINTABLE, which determines whether to print an  #define PCRE_INCLUDED
161    
162    #include "pcre_tables.c"
163    
164    /* The definition of the macro PRINTABLE, which determines whether to print an
165  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
166  contained in this file. We uses it here also, in cases when the locale has not  the same as in the printint.src file. We uses it here in cases when the locale
167  been explicitly changed, so as to get consistent output from systems that  has not been explicitly changed, so as to get consistent output from systems
168  differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
169    
170  #include "pcre_printint.src"  #ifdef EBCDIC
171    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
172    #else
173    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
174    #endif
175    
176  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
177    
178    /* Posix support is disabled in 16 bit only mode. */
179    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
180    #define NOPOSIX
181    #endif
182    
183  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
184  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 128  Makefile. */ Line 188  Makefile. */
188  #include "pcreposix.h"  #include "pcreposix.h"
189  #endif  #endif
190    
191  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
192  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
193  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
194  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
195  UTF8 support if PCRE is built without it. */  
196    #ifndef SUPPORT_UTF
197  #ifndef SUPPORT_UTF8  #ifndef NOUTF
198  #ifndef NOUTF8  #define NOUTF
 #define NOUTF8  
199  #endif  #endif
200  #endif  #endif
201    
202    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
203    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
204    only from one place and is handled differently). I couldn't dream up any way of
205    using a single macro to do this in a generic way, because of the many different
206    argument requirements. We know that at least one of SUPPORT_PCRE8 and
207    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
208    use these in the definitions of generic macros.
209    
210    **** Special note about the PCHARSxxx macros: the address of the string to be
211    printed is always given as two arguments: a base address followed by an offset.
212    The base address is cast to the correct data size for 8 or 16 bit data; the
213    offset is in units of this size. If the string were given as base+offset in one
214    argument, the casting might be incorrectly applied. */
215    
216    #ifdef SUPPORT_PCRE8
217    
218    #define PCHARS8(lv, p, offset, len, f) \
219      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
220    
221    #define PCHARSV8(p, offset, len, f) \
222      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
223    
224    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
225      p = read_capture_name8(p, cn8, re)
226    
227    #define STRLEN8(p) ((int)strlen((char *)p))
228    
229    #define SET_PCRE_CALLOUT8(callout) \
230      pcre_callout = callout
231    
232    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
233       pcre_assign_jit_stack(extra, callback, userdata)
234    
235    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
236      re = pcre_compile((char *)pat, options, error, erroffset, tables)
237    
238    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
239        namesptr, cbuffer, size) \
240      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
241        (char *)namesptr, cbuffer, size)
242    
243    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
244      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
245    
246    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
247        offsets, size_offsets, workspace, size_workspace) \
248      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
249        offsets, size_offsets, workspace, size_workspace)
250    
251    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
252        offsets, size_offsets) \
253      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
254        offsets, size_offsets)
255    
256    #define PCRE_FREE_STUDY8(extra) \
257      pcre_free_study(extra)
258    
259    #define PCRE_FREE_SUBSTRING8(substring) \
260      pcre_free_substring(substring)
261    
262    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
263      pcre_free_substring_list(listptr)
264    
265    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
266        getnamesptr, subsptr) \
267      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
268        (char *)getnamesptr, subsptr)
269    
270    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
271      n = pcre_get_stringnumber(re, (char *)ptr)
272    
273    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
274      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
275    
276    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
277      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
278    
279    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
280      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
281    
282    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
283      pcre_printint(re, outfile, debug_lengths)
284    
285    #define PCRE_STUDY8(extra, re, options, error) \
286      extra = pcre_study(re, options, error)
287    
288    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
289      pcre_jit_stack_alloc(startsize, maxsize)
290    
291    #define PCRE_JIT_STACK_FREE8(stack) \
292      pcre_jit_stack_free(stack)
293    
294    #endif /* SUPPORT_PCRE8 */
295    
296    /* -----------------------------------------------------------*/
297    
298    #ifdef SUPPORT_PCRE16
299    
300    #define PCHARS16(lv, p, offset, len, f) \
301      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
302    
303    #define PCHARSV16(p, offset, len, f) \
304      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
305    
306    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
307      p = read_capture_name16(p, cn16, re)
308    
309    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
310    
311    #define SET_PCRE_CALLOUT16(callout) \
312      pcre16_callout = (int (*)(pcre16_callout_block *))callout
313    
314    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
315      pcre16_assign_jit_stack((pcre16_extra *)extra, \
316        (pcre16_jit_callback)callback, userdata)
317    
318    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
319      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
320        tables)
321    
322    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
323        namesptr, cbuffer, size) \
324      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
325        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
326    
327    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
328      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
329        (PCRE_UCHAR16 *)cbuffer, size/2)
330    
331    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
332        offsets, size_offsets, workspace, size_workspace) \
333      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
334        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
335        workspace, size_workspace)
336    
337    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338        offsets, size_offsets) \
339      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
340        len, start_offset, options, offsets, size_offsets)
341    
342    #define PCRE_FREE_STUDY16(extra) \
343      pcre16_free_study((pcre16_extra *)extra)
344    
345    #define PCRE_FREE_SUBSTRING16(substring) \
346      pcre16_free_substring((PCRE_SPTR16)substring)
347    
348    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
349      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
350    
351    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
352        getnamesptr, subsptr) \
353      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
354        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
355    
356    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
357      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
358    
359    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
360      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
361        (PCRE_SPTR16 *)(void*)subsptr)
362    
363    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
364      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
365        (PCRE_SPTR16 **)(void*)listptr)
366    
367    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
368      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
369        tables)
370    
371    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
372      pcre16_printint(re, outfile, debug_lengths)
373    
374    #define PCRE_STUDY16(extra, re, options, error) \
375      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
376    
377    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
378      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
379    
380    #define PCRE_JIT_STACK_FREE16(stack) \
381      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
382    
383    #endif /* SUPPORT_PCRE16 */
384    
385    
386    /* ----- Both modes are supported; a runtime test is needed, except for
387    pcre_config(), and the JIT stack functions, when it doesn't matter which
388    version is called. ----- */
389    
390    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
391    
392    #define CHAR_SIZE (use_pcre16? 2:1)
393    
394    #define PCHARS(lv, p, offset, len, f) \
395      if (use_pcre16) \
396        PCHARS16(lv, p, offset, len, f); \
397      else \
398        PCHARS8(lv, p, offset, len, f)
399    
400    #define PCHARSV(p, offset, len, f) \
401      if (use_pcre16) \
402        PCHARSV16(p, offset, len, f); \
403      else \
404        PCHARSV8(p, offset, len, f)
405    
406    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
407      if (use_pcre16) \
408        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
409      else \
410        READ_CAPTURE_NAME8(p, cn8, cn16, re)
411    
412    #define SET_PCRE_CALLOUT(callout) \
413      if (use_pcre16) \
414        SET_PCRE_CALLOUT16(callout); \
415      else \
416        SET_PCRE_CALLOUT8(callout)
417    
418    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
419    
420    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
421      if (use_pcre16) \
422        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
423      else \
424        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
425    
426    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
427      if (use_pcre16) \
428        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
429      else \
430        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
431    
432    #define PCRE_CONFIG pcre_config
433    
434    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
435        namesptr, cbuffer, size) \
436      if (use_pcre16) \
437        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
438          namesptr, cbuffer, size); \
439      else \
440        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
441          namesptr, cbuffer, size)
442    
443    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
444      if (use_pcre16) \
445        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
446      else \
447        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
448    
449    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
450        offsets, size_offsets, workspace, size_workspace) \
451      if (use_pcre16) \
452        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
453          offsets, size_offsets, workspace, size_workspace); \
454      else \
455        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
456          offsets, size_offsets, workspace, size_workspace)
457    
458    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
459        offsets, size_offsets) \
460      if (use_pcre16) \
461        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
462          offsets, size_offsets); \
463      else \
464        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
465          offsets, size_offsets)
466    
467    #define PCRE_FREE_STUDY(extra) \
468      if (use_pcre16) \
469        PCRE_FREE_STUDY16(extra); \
470      else \
471        PCRE_FREE_STUDY8(extra)
472    
473    #define PCRE_FREE_SUBSTRING(substring) \
474      if (use_pcre16) \
475        PCRE_FREE_SUBSTRING16(substring); \
476      else \
477        PCRE_FREE_SUBSTRING8(substring)
478    
479    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
480      if (use_pcre16) \
481        PCRE_FREE_SUBSTRING_LIST16(listptr); \
482      else \
483        PCRE_FREE_SUBSTRING_LIST8(listptr)
484    
485    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
486        getnamesptr, subsptr) \
487      if (use_pcre16) \
488        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
489          getnamesptr, subsptr); \
490      else \
491        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
492          getnamesptr, subsptr)
493    
494    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
495      if (use_pcre16) \
496        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
497      else \
498        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
499    
500    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
501      if (use_pcre16) \
502        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
503      else \
504        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
505    
506    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
507      if (use_pcre16) \
508        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
509      else \
510        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
511    
512    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
513      (use_pcre16 ? \
514         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
515        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
516    
517    #define PCRE_JIT_STACK_FREE(stack) \
518      if (use_pcre16) \
519        PCRE_JIT_STACK_FREE16(stack); \
520      else \
521        PCRE_JIT_STACK_FREE8(stack)
522    
523    #define PCRE_MAKETABLES \
524      (use_pcre16? pcre16_maketables() : pcre_maketables())
525    
526    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
527      if (use_pcre16) \
528        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
529      else \
530        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
531    
532    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
533      if (use_pcre16) \
534        PCRE_PRINTINT16(re, outfile, debug_lengths); \
535      else \
536        PCRE_PRINTINT8(re, outfile, debug_lengths)
537    
538    #define PCRE_STUDY(extra, re, options, error) \
539      if (use_pcre16) \
540        PCRE_STUDY16(extra, re, options, error); \
541      else \
542        PCRE_STUDY8(extra, re, options, error)
543    
544    /* ----- Only 8-bit mode is supported ----- */
545    
546    #elif defined SUPPORT_PCRE8
547    #define CHAR_SIZE                 1
548    #define PCHARS                    PCHARS8
549    #define PCHARSV                   PCHARSV8
550    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
551    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
552    #define STRLEN                    STRLEN8
553    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
554    #define PCRE_COMPILE              PCRE_COMPILE8
555    #define PCRE_CONFIG               pcre_config
556    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
557    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
558    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
559    #define PCRE_EXEC                 PCRE_EXEC8
560    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
561    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
562    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
563    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
564    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
565    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
566    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
567    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
568    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
569    #define PCRE_MAKETABLES           pcre_maketables()
570    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
571    #define PCRE_PRINTINT             PCRE_PRINTINT8
572    #define PCRE_STUDY                PCRE_STUDY8
573    
574    /* ----- Only 16-bit mode is supported ----- */
575    
576    #else
577    #define CHAR_SIZE                 2
578    #define PCHARS                    PCHARS16
579    #define PCHARSV                   PCHARSV16
580    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
581    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
582    #define STRLEN                    STRLEN16
583    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
584    #define PCRE_COMPILE              PCRE_COMPILE16
585    #define PCRE_CONFIG               pcre16_config
586    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
587    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
588    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
589    #define PCRE_EXEC                 PCRE_EXEC16
590    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
591    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
592    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
593    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
594    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
595    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
596    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
597    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
598    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
599    #define PCRE_MAKETABLES           pcre16_maketables()
600    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
601    #define PCRE_PRINTINT             PCRE_PRINTINT16
602    #define PCRE_STUDY                PCRE_STUDY16
603    #endif
604    
605    /* ----- End of mode-specific function call macros ----- */
606    
607    
608  /* Other parameters */  /* Other parameters */
609    
# Line 151  UTF8 support if PCRE is built without it Line 615  UTF8 support if PCRE is built without it
615  #endif  #endif
616  #endif  #endif
617    
618    #if !defined NODFA
619    #define DFA_WS_DIMENSION 1000
620    #endif
621    
622  /* This is the default loop count for timing. */  /* This is the default loop count for timing. */
623    
624  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
# Line 165  static int callout_fail_count; Line 633  static int callout_fail_count;
633  static int callout_fail_id;  static int callout_fail_id;
634  static int debug_lengths;  static int debug_lengths;
635  static int first_callout;  static int first_callout;
636    static int jit_was_used;
637  static int locale_set = 0;  static int locale_set = 0;
638  static int show_malloc;  static int show_malloc;
639  static int use_utf8;  static int use_utf;
640  static size_t gotten_store;  static size_t gotten_store;
641    static size_t first_gotten_store = 0;
642    static const unsigned char *last_callout_mark = NULL;
643    
644  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
645    
646  static int buffer_size = 50000;  static int buffer_size = 50000;
647  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
648  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
649  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
650    
651    /* Another buffer is needed translation to 16-bit character strings. It will
652    obtained and extended as required. */
653    
654    #ifdef SUPPORT_PCRE16
655    static int buffer16_size = 0;
656    static pcre_uint16 *buffer16 = NULL;
657    
658    #ifdef SUPPORT_PCRE8
659    
660    /* We need the table of operator lengths that is used for 16-bit compiling, in
661    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
662    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
663    appropriately for the 16-bit world. Just as a safety check, make sure that
664    COMPILE_PCRE16 is *not* set. */
665    
666    #ifdef COMPILE_PCRE16
667    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
668    #endif
669    
670    #if LINK_SIZE == 2
671    #undef LINK_SIZE
672    #define LINK_SIZE 1
673    #elif LINK_SIZE == 3 || LINK_SIZE == 4
674    #undef LINK_SIZE
675    #define LINK_SIZE 2
676    #else
677    #error LINK_SIZE must be either 2, 3, or 4
678    #endif
679    
680    #undef IMM2_SIZE
681    #define IMM2_SIZE 1
682    
683    #endif /* SUPPORT_PCRE8 */
684    
685    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
686    #endif  /* SUPPORT_PCRE16 */
687    
688    /* If we have 8-bit support, default use_pcre16 to false; if there is also
689    16-bit support, it can be changed by an option. If there is no 8-bit support,
690    there must be 16-bit support, so default it to 1. */
691    
692    #ifdef SUPPORT_PCRE8
693    static int use_pcre16 = 0;
694    #else
695    static int use_pcre16 = 1;
696    #endif
697    
698    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
699    
700    static int jit_study_bits[] =
701      {
702      PCRE_STUDY_JIT_COMPILE,
703      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
704      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
705      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
706      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
707      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
708      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
709        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
710    };
711    
712    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
713      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
714    
715    /* Textual explanations for runtime error codes */
716    
717    static const char *errtexts[] = {
718      NULL,  /* 0 is no error */
719      NULL,  /* NOMATCH is handled specially */
720      "NULL argument passed",
721      "bad option value",
722      "magic number missing",
723      "unknown opcode - pattern overwritten?",
724      "no more memory",
725      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
726      "match limit exceeded",
727      "callout error code",
728      NULL,  /* BADUTF8/16 is handled specially */
729      NULL,  /* BADUTF8/16 offset is handled specially */
730      NULL,  /* PARTIAL is handled specially */
731      "not used - internal error",
732      "internal error - pattern overwritten?",
733      "bad count value",
734      "item unsupported for DFA matching",
735      "backreference condition or recursion test not supported for DFA matching",
736      "match limit not supported for DFA matching",
737      "workspace size exceeded in DFA matching",
738      "too much recursion for DFA matching",
739      "recursion limit exceeded",
740      "not used - internal error",
741      "invalid combination of newline options",
742      "bad offset value",
743      NULL,  /* SHORTUTF8/16 is handled specially */
744      "nested recursion at the same subject position",
745      "JIT stack limit reached",
746      "pattern compiled in wrong mode: 8-bit/16-bit error",
747      "pattern compiled with other endianness",
748      "invalid data in workspace for DFA restart"
749    };
750    
751    
752    /*************************************************
753    *         Alternate character tables             *
754    *************************************************/
755    
756    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
757    using the default tables of the library. However, the T option can be used to
758    select alternate sets of tables, for different kinds of testing. Note also that
759    the L (locale) option also adjusts the tables. */
760    
761    /* This is the set of tables distributed as default with PCRE. It recognizes
762    only ASCII characters. */
763    
764    static const pcre_uint8 tables0[] = {
765    
766    /* This table is a lower casing table. */
767    
768        0,  1,  2,  3,  4,  5,  6,  7,
769        8,  9, 10, 11, 12, 13, 14, 15,
770       16, 17, 18, 19, 20, 21, 22, 23,
771       24, 25, 26, 27, 28, 29, 30, 31,
772       32, 33, 34, 35, 36, 37, 38, 39,
773       40, 41, 42, 43, 44, 45, 46, 47,
774       48, 49, 50, 51, 52, 53, 54, 55,
775       56, 57, 58, 59, 60, 61, 62, 63,
776       64, 97, 98, 99,100,101,102,103,
777      104,105,106,107,108,109,110,111,
778      112,113,114,115,116,117,118,119,
779      120,121,122, 91, 92, 93, 94, 95,
780       96, 97, 98, 99,100,101,102,103,
781      104,105,106,107,108,109,110,111,
782      112,113,114,115,116,117,118,119,
783      120,121,122,123,124,125,126,127,
784      128,129,130,131,132,133,134,135,
785      136,137,138,139,140,141,142,143,
786      144,145,146,147,148,149,150,151,
787      152,153,154,155,156,157,158,159,
788      160,161,162,163,164,165,166,167,
789      168,169,170,171,172,173,174,175,
790      176,177,178,179,180,181,182,183,
791      184,185,186,187,188,189,190,191,
792      192,193,194,195,196,197,198,199,
793      200,201,202,203,204,205,206,207,
794      208,209,210,211,212,213,214,215,
795      216,217,218,219,220,221,222,223,
796      224,225,226,227,228,229,230,231,
797      232,233,234,235,236,237,238,239,
798      240,241,242,243,244,245,246,247,
799      248,249,250,251,252,253,254,255,
800    
801    /* This table is a case flipping table. */
802    
803        0,  1,  2,  3,  4,  5,  6,  7,
804        8,  9, 10, 11, 12, 13, 14, 15,
805       16, 17, 18, 19, 20, 21, 22, 23,
806       24, 25, 26, 27, 28, 29, 30, 31,
807       32, 33, 34, 35, 36, 37, 38, 39,
808       40, 41, 42, 43, 44, 45, 46, 47,
809       48, 49, 50, 51, 52, 53, 54, 55,
810       56, 57, 58, 59, 60, 61, 62, 63,
811       64, 97, 98, 99,100,101,102,103,
812      104,105,106,107,108,109,110,111,
813      112,113,114,115,116,117,118,119,
814      120,121,122, 91, 92, 93, 94, 95,
815       96, 65, 66, 67, 68, 69, 70, 71,
816       72, 73, 74, 75, 76, 77, 78, 79,
817       80, 81, 82, 83, 84, 85, 86, 87,
818       88, 89, 90,123,124,125,126,127,
819      128,129,130,131,132,133,134,135,
820      136,137,138,139,140,141,142,143,
821      144,145,146,147,148,149,150,151,
822      152,153,154,155,156,157,158,159,
823      160,161,162,163,164,165,166,167,
824      168,169,170,171,172,173,174,175,
825      176,177,178,179,180,181,182,183,
826      184,185,186,187,188,189,190,191,
827      192,193,194,195,196,197,198,199,
828      200,201,202,203,204,205,206,207,
829      208,209,210,211,212,213,214,215,
830      216,217,218,219,220,221,222,223,
831      224,225,226,227,228,229,230,231,
832      232,233,234,235,236,237,238,239,
833      240,241,242,243,244,245,246,247,
834      248,249,250,251,252,253,254,255,
835    
836    /* This table contains bit maps for various character classes. Each map is 32
837    bytes long and the bits run from the least significant end of each byte. The
838    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
839    graph, print, punct, and cntrl. Other classes are built from combinations. */
840    
841      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
842      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
843      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845    
846      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
847      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850    
851      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
852      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
853      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
855    
856      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
858      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
859      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
860    
861      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
862      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
863      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
864      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
865    
866      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
867      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
868      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
869      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
870    
871      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
872      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
873      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
874      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
875    
876      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
877      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
878      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
879      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
880    
881      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
882      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
883      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
884      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
885    
886      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
887      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
888      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
889      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
890    
891    /* This table identifies various classes of character by individual bits:
892      0x01   white space character
893      0x02   letter
894      0x04   decimal digit
895      0x08   hexadecimal digit
896      0x10   alphanumeric or '_'
897      0x80   regular expression metacharacter or binary zero
898    */
899    
900      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
901      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
902      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
903      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
904      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
905      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
906      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
907      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
908      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
909      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
910      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
911      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
912      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
913      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
914      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
915      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
916      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
917      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
918      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
919      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
920      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
921      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
922      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
923      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
924      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
925      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
926      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
927      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
928      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
929      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
930      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
931      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
932    
933    /* This is a set of tables that came orginally from a Windows user. It seems to
934    be at least an approximation of ISO 8859. In particular, there are characters
935    greater than 128 that are marked as spaces, letters, etc. */
936    
937    static const pcre_uint8 tables1[] = {
938    0,1,2,3,4,5,6,7,
939    8,9,10,11,12,13,14,15,
940    16,17,18,19,20,21,22,23,
941    24,25,26,27,28,29,30,31,
942    32,33,34,35,36,37,38,39,
943    40,41,42,43,44,45,46,47,
944    48,49,50,51,52,53,54,55,
945    56,57,58,59,60,61,62,63,
946    64,97,98,99,100,101,102,103,
947    104,105,106,107,108,109,110,111,
948    112,113,114,115,116,117,118,119,
949    120,121,122,91,92,93,94,95,
950    96,97,98,99,100,101,102,103,
951    104,105,106,107,108,109,110,111,
952    112,113,114,115,116,117,118,119,
953    120,121,122,123,124,125,126,127,
954    128,129,130,131,132,133,134,135,
955    136,137,138,139,140,141,142,143,
956    144,145,146,147,148,149,150,151,
957    152,153,154,155,156,157,158,159,
958    160,161,162,163,164,165,166,167,
959    168,169,170,171,172,173,174,175,
960    176,177,178,179,180,181,182,183,
961    184,185,186,187,188,189,190,191,
962    224,225,226,227,228,229,230,231,
963    232,233,234,235,236,237,238,239,
964    240,241,242,243,244,245,246,215,
965    248,249,250,251,252,253,254,223,
966    224,225,226,227,228,229,230,231,
967    232,233,234,235,236,237,238,239,
968    240,241,242,243,244,245,246,247,
969    248,249,250,251,252,253,254,255,
970    0,1,2,3,4,5,6,7,
971    8,9,10,11,12,13,14,15,
972    16,17,18,19,20,21,22,23,
973    24,25,26,27,28,29,30,31,
974    32,33,34,35,36,37,38,39,
975    40,41,42,43,44,45,46,47,
976    48,49,50,51,52,53,54,55,
977    56,57,58,59,60,61,62,63,
978    64,97,98,99,100,101,102,103,
979    104,105,106,107,108,109,110,111,
980    112,113,114,115,116,117,118,119,
981    120,121,122,91,92,93,94,95,
982    96,65,66,67,68,69,70,71,
983    72,73,74,75,76,77,78,79,
984    80,81,82,83,84,85,86,87,
985    88,89,90,123,124,125,126,127,
986    128,129,130,131,132,133,134,135,
987    136,137,138,139,140,141,142,143,
988    144,145,146,147,148,149,150,151,
989    152,153,154,155,156,157,158,159,
990    160,161,162,163,164,165,166,167,
991    168,169,170,171,172,173,174,175,
992    176,177,178,179,180,181,182,183,
993    184,185,186,187,188,189,190,191,
994    224,225,226,227,228,229,230,231,
995    232,233,234,235,236,237,238,239,
996    240,241,242,243,244,245,246,215,
997    248,249,250,251,252,253,254,223,
998    192,193,194,195,196,197,198,199,
999    200,201,202,203,204,205,206,207,
1000    208,209,210,211,212,213,214,247,
1001    216,217,218,219,220,221,222,255,
1002    0,62,0,0,1,0,0,0,
1003    0,0,0,0,0,0,0,0,
1004    32,0,0,0,1,0,0,0,
1005    0,0,0,0,0,0,0,0,
1006    0,0,0,0,0,0,255,3,
1007    126,0,0,0,126,0,0,0,
1008    0,0,0,0,0,0,0,0,
1009    0,0,0,0,0,0,0,0,
1010    0,0,0,0,0,0,255,3,
1011    0,0,0,0,0,0,0,0,
1012    0,0,0,0,0,0,12,2,
1013    0,0,0,0,0,0,0,0,
1014    0,0,0,0,0,0,0,0,
1015    254,255,255,7,0,0,0,0,
1016    0,0,0,0,0,0,0,0,
1017    255,255,127,127,0,0,0,0,
1018    0,0,0,0,0,0,0,0,
1019    0,0,0,0,254,255,255,7,
1020    0,0,0,0,0,4,32,4,
1021    0,0,0,128,255,255,127,255,
1022    0,0,0,0,0,0,255,3,
1023    254,255,255,135,254,255,255,7,
1024    0,0,0,0,0,4,44,6,
1025    255,255,127,255,255,255,127,255,
1026    0,0,0,0,254,255,255,255,
1027    255,255,255,255,255,255,255,127,
1028    0,0,0,0,254,255,255,255,
1029    255,255,255,255,255,255,255,255,
1030    0,2,0,0,255,255,255,255,
1031    255,255,255,255,255,255,255,127,
1032    0,0,0,0,255,255,255,255,
1033    255,255,255,255,255,255,255,255,
1034    0,0,0,0,254,255,0,252,
1035    1,0,0,248,1,0,0,120,
1036    0,0,0,0,254,255,255,255,
1037    0,0,128,0,0,0,128,0,
1038    255,255,255,255,0,0,0,0,
1039    0,0,0,0,0,0,0,128,
1040    255,255,255,255,0,0,0,0,
1041    0,0,0,0,0,0,0,0,
1042    128,0,0,0,0,0,0,0,
1043    0,1,1,0,1,1,0,0,
1044    0,0,0,0,0,0,0,0,
1045    0,0,0,0,0,0,0,0,
1046    1,0,0,0,128,0,0,0,
1047    128,128,128,128,0,0,128,0,
1048    28,28,28,28,28,28,28,28,
1049    28,28,0,0,0,0,0,128,
1050    0,26,26,26,26,26,26,18,
1051    18,18,18,18,18,18,18,18,
1052    18,18,18,18,18,18,18,18,
1053    18,18,18,128,128,0,128,16,
1054    0,26,26,26,26,26,26,18,
1055    18,18,18,18,18,18,18,18,
1056    18,18,18,18,18,18,18,18,
1057    18,18,18,128,128,0,0,0,
1058    0,0,0,0,0,1,0,0,
1059    0,0,0,0,0,0,0,0,
1060    0,0,0,0,0,0,0,0,
1061    0,0,0,0,0,0,0,0,
1062    1,0,0,0,0,0,0,0,
1063    0,0,18,0,0,0,0,0,
1064    0,0,20,20,0,18,0,0,
1065    0,20,18,0,0,0,0,0,
1066    18,18,18,18,18,18,18,18,
1067    18,18,18,18,18,18,18,18,
1068    18,18,18,18,18,18,18,0,
1069    18,18,18,18,18,18,18,18,
1070    18,18,18,18,18,18,18,18,
1071    18,18,18,18,18,18,18,18,
1072    18,18,18,18,18,18,18,0,
1073    18,18,18,18,18,18,18,18
1074    };
1075    
1076    
1077    
1078    
1079    #ifndef HAVE_STRERROR
1080    /*************************************************
1081    *     Provide strerror() for non-ANSI libraries  *
1082    *************************************************/
1083    
1084    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1085    in their libraries, but can provide the same facility by this simple
1086    alternative function. */
1087    
1088    extern int   sys_nerr;
1089    extern char *sys_errlist[];
1090    
1091    char *
1092    strerror(int n)
1093    {
1094    if (n < 0 || n >= sys_nerr) return "unknown error number";
1095    return sys_errlist[n];
1096    }
1097    #endif /* HAVE_STRERROR */
1098    
1099    
1100    
1101    /*************************************************
1102    *       Print newline configuration              *
1103    *************************************************/
1104    
1105    /*
1106    Argument: the return code from PCRE_CONFIG_NEWLINE
1107    Returns:  nothing
1108    */
1109    
1110    static void
1111    print_newline_config(int rc)
1112    {
1113    const char *s = NULL;
1114    printf("  Newline sequence is ");
1115    switch(rc)
1116      {
1117      case CHAR_CR: s = "CR"; break;
1118      case CHAR_LF: s = "LF"; break;
1119      case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1120      case -1: s = "ANY"; break;
1121      case -2: s = "ANYCRLF"; break;
1122    
1123      default:
1124      printf("a non-standard value: 0x%04x\n", rc);
1125      return;
1126      }
1127    
1128    printf("%s\n", s);
1129    }
1130    
1131    
1132    
1133    /*************************************************
1134    *         JIT memory callback                    *
1135    *************************************************/
1136    
1137    static pcre_jit_stack* jit_callback(void *arg)
1138    {
1139    jit_was_used = TRUE;
1140    return (pcre_jit_stack *)arg;
1141    }
1142    
1143    
1144    #if !defined NOUTF || defined SUPPORT_PCRE16
1145    /*************************************************
1146    *            Convert UTF-8 string to value       *
1147    *************************************************/
1148    
1149    /* This function takes one or more bytes that represents a UTF-8 character,
1150    and returns the value of the character.
1151    
1152    Argument:
1153      utf8bytes   a pointer to the byte vector
1154      vptr        a pointer to an int to receive the value
1155    
1156    Returns:      >  0 => the number of bytes consumed
1157                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1158    */
1159    
1160    static int
1161    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1162    {
1163    int c = *utf8bytes++;
1164    int d = c;
1165    int i, j, s;
1166    
1167    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1168      {
1169      if ((d & 0x80) == 0) break;
1170      d <<= 1;
1171      }
1172    
1173    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1174    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1175    
1176    /* i now has a value in the range 1-5 */
1177    
1178    s = 6*i;
1179    d = (c & utf8_table3[i]) << s;
1180    
1181    for (j = 0; j < i; j++)
1182      {
1183      c = *utf8bytes++;
1184      if ((c & 0xc0) != 0x80) return -(j+1);
1185      s -= 6;
1186      d |= (c & 0x3f) << s;
1187      }
1188    
1189    /* Check that encoding was the correct unique one */
1190    
1191    for (j = 0; j < utf8_table1_size; j++)
1192      if (d <= utf8_table1[j]) break;
1193    if (j != i) return -(i+1);
1194    
1195    /* Valid value */
1196    
1197    *vptr = d;
1198    return i+1;
1199    }
1200    #endif /* NOUTF || SUPPORT_PCRE16 */
1201    
1202    
1203    
1204    #if !defined NOUTF || defined SUPPORT_PCRE16
1205    /*************************************************
1206    *       Convert character value to UTF-8         *
1207    *************************************************/
1208    
1209    /* This function takes an integer value in the range 0 - 0x7fffffff
1210    and encodes it as a UTF-8 character in 0 to 6 bytes.
1211    
1212    Arguments:
1213      cvalue     the character value
1214      utf8bytes  pointer to buffer for result - at least 6 bytes long
1215    
1216    Returns:     number of characters placed in the buffer
1217    */
1218    
1219    static int
1220    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1221    {
1222    register int i, j;
1223    for (i = 0; i < utf8_table1_size; i++)
1224      if (cvalue <= utf8_table1[i]) break;
1225    utf8bytes += i;
1226    for (j = i; j > 0; j--)
1227     {
1228     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1229     cvalue >>= 6;
1230     }
1231    *utf8bytes = utf8_table2[i] | cvalue;
1232    return i + 1;
1233    }
1234    #endif
1235    
1236    
1237    #ifdef SUPPORT_PCRE16
1238    /*************************************************
1239    *         Convert a string to 16-bit             *
1240    *************************************************/
1241    
1242    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1243    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1244    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1245    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1246    result is always left in buffer16.
1247    
1248    Note that this function does not object to surrogate values. This is
1249    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1250    for the purpose of testing that they are correctly faulted.
1251    
1252    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1253    in UTF-8 so that values greater than 255 can be handled.
1254    
1255    Arguments:
1256      data       TRUE if converting a data line; FALSE for a regex
1257      p          points to a byte string
1258      utf        true if UTF-8 (to be converted to UTF-16)
1259      len        number of bytes in the string (excluding trailing zero)
1260    
1261    Returns:     number of 16-bit data items used (excluding trailing zero)
1262                 OR -1 if a UTF-8 string is malformed
1263                 OR -2 if a value > 0x10ffff is encountered
1264                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1265    */
1266    
1267    static int
1268    to16(int data, pcre_uint8 *p, int utf, int len)
1269    {
1270    pcre_uint16 *pp;
1271    
1272    if (buffer16_size < 2*len + 2)
1273      {
1274      if (buffer16 != NULL) free(buffer16);
1275      buffer16_size = 2*len + 2;
1276      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1277      if (buffer16 == NULL)
1278        {
1279        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1280        exit(1);
1281        }
1282      }
1283    
1284    pp = buffer16;
1285    
1286    if (!utf && !data)
1287      {
1288      while (len-- > 0) *pp++ = *p++;
1289      }
1290    
1291    else
1292      {
1293      int c = 0;
1294      while (len > 0)
1295        {
1296        int chlen = utf82ord(p, &c);
1297        if (chlen <= 0) return -1;
1298        if (c > 0x10ffff) return -2;
1299        p += chlen;
1300        len -= chlen;
1301        if (c < 0x10000) *pp++ = c; else
1302          {
1303          if (!utf) return -3;
1304          c -= 0x10000;
1305          *pp++ = 0xD800 | (c >> 10);
1306          *pp++ = 0xDC00 | (c & 0x3ff);
1307          }
1308        }
1309      }
1310    
1311    *pp = 0;
1312    return pp - buffer16;
1313    }
1314    #endif
1315    
1316    
1317  /*************************************************  /*************************************************
1318  *        Read or extend an input line            *  *        Read or extend an input line            *
# Line 202  Returns:       pointer to the start of n Line 1337  Returns:       pointer to the start of n
1337                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1338  */  */
1339    
1340  static uschar *  static pcre_uint8 *
1341  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1342  {  {
1343  uschar *here = start;  pcre_uint8 *here = start;
1344    
1345  for (;;)  for (;;)
1346    {    {
1347    int rlen = buffer_size - (here - buffer);    size_t rlen = (size_t)(buffer_size - (here - buffer));
1348    
1349    if (rlen > 1000)    if (rlen > 1000)
1350      {      {
1351      int dlen;      int dlen;
1352    
1353      /* If libreadline support is required, use readline() to read a line if the      /* If libreadline or libedit support is required, use readline() to read a
1354      input is a terminal. Note that readline() removes the trailing newline, so      line if the input is a terminal. Note that readline() removes the trailing
1355      we must put it back again, to be compatible with fgets(). */      newline, so we must put it back again, to be compatible with fgets(). */
1356    
1357  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1358      if (isatty(fileno(f)))      if (isatty(fileno(f)))
1359        {        {
1360        size_t len;        size_t len;
# Line 239  for (;;) Line 1374  for (;;)
1374      /* Read the next line by normal means, prompting if the file is stdin. */      /* Read the next line by normal means, prompting if the file is stdin. */
1375    
1376        {        {
1377        if (f == stdin) printf(prompt);        if (f == stdin) printf("%s", prompt);
1378        if (fgets((char *)here, rlen,  f) == NULL)        if (fgets((char *)here, rlen,  f) == NULL)
1379          return (here == start)? NULL : start;          return (here == start)? NULL : start;
1380        }        }
# Line 252  for (;;) Line 1387  for (;;)
1387    else    else
1388      {      {
1389      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1390      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1391      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1392      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1393    
1394      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1395        {        {
# Line 285  return NULL;  /* Control never gets here Line 1420  return NULL;  /* Control never gets here
1420    
1421    
1422    
   
   
   
   
1423  /*************************************************  /*************************************************
1424  *          Read number from string               *  *          Read number from string               *
1425  *************************************************/  *************************************************/
# Line 305  Returns:        the unsigned long Line 1436  Returns:        the unsigned long
1436  */  */
1437    
1438  static int  static int
1439  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1440  {  {
1441  int result = 0;  int result = 0;
1442  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 316  return(result); Line 1447  return(result);
1447    
1448    
1449    
   
1450  /*************************************************  /*************************************************
1451  *            Convert UTF-8 string to value       *  *             Print one character                *
1452  *************************************************/  *************************************************/
1453    
1454  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
   
 Argument:  
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
   
 Returns:      >  0 => the number of bytes consumed  
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
1455    
1456  #if !defined NOUTF8  static int pchar(int c, FILE *f)
   
 static int  
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1457  {  {
1458  int c = *utf8bytes++;  if (PRINTOK(c))
 int d = c;  
 int i, j, s;  
   
 for (i = -1; i < 6; i++)               /* i is number of additional bytes */  
1459    {    {
1460    if ((d & 0x80) == 0) break;    if (f != NULL) fprintf(f, "%c", c);
1461    d <<= 1;    return 1;
1462    }    }
1463    
1464  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (c < 0x100)
 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  
   
 /* i now has a value in the range 1-5 */  
   
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
   
 for (j = 0; j < i; j++)  
1465    {    {
1466    c = *utf8bytes++;    if (use_utf)
1467    if ((c & 0xc0) != 0x80) return -(j+1);      {
1468    s -= 6;      if (f != NULL) fprintf(f, "\\x{%02x}", c);
1469    d |= (c & 0x3f) << s;      return 6;
1470    }      }
1471      else
1472  /* Check that encoding was the correct unique one */      {
1473        if (f != NULL) fprintf(f, "\\x%02x", c);
1474  for (j = 0; j < utf8_table1_size; j++)      return 4;
1475    if (d <= utf8_table1[j]) break;      }
1476  if (j != i) return -(i+1);    }
   
 /* Valid value */  
   
 *vptr = d;  
 return i+1;  
 }  
   
 #endif  
   
   
   
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
 */  
   
 #if !defined NOUTF8  
1477    
1478  static int  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1479  ord2utf8(int cvalue, uschar *utf8bytes)  return (c <= 0x000000ff)? 6 :
1480  {         (c <= 0x00000fff)? 7 :
1481  register int i, j;         (c <= 0x0000ffff)? 8 :
1482  for (i = 0; i < utf8_table1_size; i++)         (c <= 0x000fffff)? 9 : 10;
   if (cvalue <= utf8_table1[i]) break;  
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1483  }  }
1484    
 #endif  
   
1485    
1486    
1487    #ifdef SUPPORT_PCRE8
1488  /*************************************************  /*************************************************
1489  *             Print character string             *  *         Print 8-bit character string           *
1490  *************************************************/  *************************************************/
1491    
1492  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1493  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1494    
1495  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1496  {  {
1497  int c = 0;  int c = 0;
1498  int yield = 0;  int yield = 0;
1499    
1500    if (length < 0)
1501      length = strlen((char *)p);
1502    
1503  while (length-- > 0)  while (length-- > 0)
1504    {    {
1505  #if !defined NOUTF8  #if !defined NOUTF
1506    if (use_utf8)    if (use_utf)
1507      {      {
1508      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
   
1509      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1510        {        {
1511        length -= rc - 1;        length -= rc - 1;
1512        p += rc;        p += rc;
1513        if (PRINTHEX(c))        yield += pchar(c, f);
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
1514        continue;        continue;
1515        }        }
1516      }      }
1517  #endif  #endif
1518      c = *p++;
1519      yield += pchar(c, f);
1520      }
1521    
1522     /* Not UTF-8, or malformed UTF-8  */  return yield;
1523    }
1524    #endif
1525    
1526    c = *p++;  
1527    if (PRINTHEX(c))  
1528      {  #ifdef SUPPORT_PCRE16
1529      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1530      yield++;  *    Find length of 0-terminated 16-bit string   *
1531      }  *************************************************/
1532    else  
1533    static int strlen16(PCRE_SPTR16 p)
1534    {
1535    int len = 0;
1536    while (*p++ != 0) len++;
1537    return len;
1538    }
1539    #endif  /* SUPPORT_PCRE16 */
1540    
1541    
1542    #ifdef SUPPORT_PCRE16
1543    /*************************************************
1544    *           Print 16-bit character string        *
1545    *************************************************/
1546    
1547    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1548    If handed a NULL file, just counts chars without printing. */
1549    
1550    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1551    {
1552    int yield = 0;
1553    
1554    if (length < 0)
1555      length = strlen16(p);
1556    
1557    while (length-- > 0)
1558      {
1559      int c = *p++ & 0xffff;
1560    #if !defined NOUTF
1561      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1562      {      {
1563      if (f != NULL) fprintf(f, "\\x%02x", c);      int d = *p & 0xffff;
1564      yield += 4;      if (d >= 0xDC00 && d < 0xDFFF)
1565          {
1566          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1567          length--;
1568          p++;
1569          }
1570      }      }
1571    #endif
1572      yield += pchar(c, f);
1573    }    }
1574    
1575  return yield;  return yield;
1576  }  }
1577    #endif  /* SUPPORT_PCRE16 */
1578    
1579    
1580    
1581    #ifdef SUPPORT_PCRE8
1582    /*************************************************
1583    *     Read a capture name (8-bit) and check it   *
1584    *************************************************/
1585    
1586    static pcre_uint8 *
1587    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1588    {
1589    pcre_uint8 *npp = *pp;
1590    while (isalnum(*p)) *npp++ = *p++;
1591    *npp++ = 0;
1592    *npp = 0;
1593    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1594      {
1595      fprintf(outfile, "no parentheses with name \"");
1596      PCHARSV(*pp, 0, -1, outfile);
1597      fprintf(outfile, "\"\n");
1598      }
1599    
1600    *pp = npp;
1601    return p;
1602    }
1603    #endif  /* SUPPORT_PCRE8 */
1604    
1605    
1606    
1607    #ifdef SUPPORT_PCRE16
1608    /*************************************************
1609    *     Read a capture name (16-bit) and check it  *
1610    *************************************************/
1611    
1612    /* Note that the text being read is 8-bit. */
1613    
1614    static pcre_uint8 *
1615    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1616    {
1617    pcre_uint16 *npp = *pp;
1618    while (isalnum(*p)) *npp++ = *p++;
1619    *npp++ = 0;
1620    *npp = 0;
1621    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1622      {
1623      fprintf(outfile, "no parentheses with name \"");
1624      PCHARSV(*pp, 0, -1, outfile);
1625      fprintf(outfile, "\"\n");
1626      }
1627    *pp = npp;
1628    return p;
1629    }
1630    #endif  /* SUPPORT_PCRE16 */
1631    
1632    
1633    
# Line 503  if (callout_extra) Line 1656  if (callout_extra)
1656      else      else
1657        {        {
1658        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1659        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1660          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1661        fprintf(f, "\n");        fprintf(f, "\n");
1662        }        }
# Line 516  printed lengths of the substrings. */ Line 1669  printed lengths of the substrings. */
1669    
1670  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1671    
1672  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1673  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1674    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1675    
1676  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1677    
1678  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1679    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1680    
1681  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 559  fprintf(outfile, "%.*s", (cb->next_item_ Line 1712  fprintf(outfile, "%.*s", (cb->next_item_
1712  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1713  first_callout = 0;  first_callout = 0;
1714    
1715    if (cb->mark != last_callout_mark)
1716      {
1717      if (cb->mark == NULL)
1718        fprintf(outfile, "Latest Mark: <unset>\n");
1719      else
1720        {
1721        fprintf(outfile, "Latest Mark: ");
1722        PCHARSV(cb->mark, 0, -1, outfile);
1723        putc('\n', outfile);
1724        }
1725      last_callout_mark = cb->mark;
1726      }
1727    
1728  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1729    {    {
1730    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 578  return (cb->callout_number != callout_fa Line 1744  return (cb->callout_number != callout_fa
1744  *            Local malloc functions              *  *            Local malloc functions              *
1745  *************************************************/  *************************************************/
1746    
1747  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1748  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1749    show_malloc variable is set only during matching. */
1750    
1751  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1752  {  {
1753  void *block = malloc(size);  void *block = malloc(size);
1754  gotten_store = size;  gotten_store = size;
1755    if (first_gotten_store == 0) first_gotten_store = size;
1756  if (show_malloc)  if (show_malloc)
1757    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1758  return block;  return block;
# Line 597  if (show_malloc) Line 1765  if (show_malloc)
1765  free(block);  free(block);
1766  }  }
1767    
   
1768  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1769    
1770  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 620  free(block); Line 1787  free(block);
1787  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1788  *************************************************/  *************************************************/
1789    
1790  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1791    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1792    value, but the code is defensive.
1793    
1794    Arguments:
1795      re        compiled regex
1796      study     study data
1797      option    PCRE_INFO_xxx option
1798      ptr       where to put the data
1799    
1800    Returns:    0 when OK, < 0 on error
1801    */
1802    
1803  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static int
1804    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1805  {  {
1806  int rc;  int rc;
1807  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1808    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1809    #ifdef SUPPORT_PCRE16
1810      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1811    #else
1812      rc = PCRE_ERROR_BADMODE;
1813    #endif
1814    else
1815    #ifdef SUPPORT_PCRE8
1816      rc = pcre_fullinfo(re, study, option, ptr);
1817    #else
1818      rc = PCRE_ERROR_BADMODE;
1819    #endif
1820    
1821    if (rc < 0)
1822      {
1823      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1824        use_pcre16? "16" : "", option);
1825      if (rc == PCRE_ERROR_BADMODE)
1826        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1827          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1828      }
1829    
1830    return rc;
1831  }  }
1832    
1833    
1834    
1835  /*************************************************  /*************************************************
1836  *         Byte flipping function                 *  *             Swap byte functions                *
1837  *************************************************/  *************************************************/
1838    
1839  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1840  byteflip(unsigned long int value, int n)  value, respectively.
1841    
1842    Arguments:
1843      value        any number
1844    
1845    Returns:       the byte swapped value
1846    */
1847    
1848    static pcre_uint32
1849    swap_uint32(pcre_uint32 value)
1850  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1851  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1852         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1853         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1854         ((value & 0xff000000) >> 24);         (value >> 24);
1855  }  }
1856    
1857    static pcre_uint16
1858    swap_uint16(pcre_uint16 value)
1859    {
1860    return (value >> 8) | (value << 8);
1861    }
1862    
1863    
1864    
1865    /*************************************************
1866    *        Flip bytes in a compiled pattern        *
1867    *************************************************/
1868    
1869    /* This function is called if the 'F' option was present on a pattern that is
1870    to be written to a file. We flip the bytes of all the integer fields in the
1871    regex data block and the study block. In 16-bit mode this also flips relevant
1872    bytes in the pattern itself. This is to make it possible to test PCRE's
1873    ability to reload byte-flipped patterns, e.g. those compiled on a different
1874    architecture. */
1875    
1876    static void
1877    regexflip(pcre *ere, pcre_extra *extra)
1878    {
1879    REAL_PCRE *re = (REAL_PCRE *)ere;
1880    #ifdef SUPPORT_PCRE16
1881    int op;
1882    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1883    int length = re->name_count * re->name_entry_size;
1884    #ifdef SUPPORT_UTF
1885    BOOL utf = (re->options & PCRE_UTF16) != 0;
1886    BOOL utf16_char = FALSE;
1887    #endif /* SUPPORT_UTF */
1888    #endif /* SUPPORT_PCRE16 */
1889    
1890    /* Always flip the bytes in the main data block and study blocks. */
1891    
1892    re->magic_number = REVERSED_MAGIC_NUMBER;
1893    re->size = swap_uint32(re->size);
1894    re->options = swap_uint32(re->options);
1895    re->flags = swap_uint16(re->flags);
1896    re->top_bracket = swap_uint16(re->top_bracket);
1897    re->top_backref = swap_uint16(re->top_backref);
1898    re->first_char = swap_uint16(re->first_char);
1899    re->req_char = swap_uint16(re->req_char);
1900    re->name_table_offset = swap_uint16(re->name_table_offset);
1901    re->name_entry_size = swap_uint16(re->name_entry_size);
1902    re->name_count = swap_uint16(re->name_count);
1903    
1904    if (extra != NULL)
1905      {
1906      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1907      rsd->size = swap_uint32(rsd->size);
1908      rsd->flags = swap_uint32(rsd->flags);
1909      rsd->minlength = swap_uint32(rsd->minlength);
1910      }
1911    
1912    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1913    in the name table, if present, and then in the pattern itself. */
1914    
1915    #ifdef SUPPORT_PCRE16
1916    if (!use_pcre16) return;
1917    
1918    while(TRUE)
1919      {
1920      /* Swap previous characters. */
1921      while (length-- > 0)
1922        {
1923        *ptr = swap_uint16(*ptr);
1924        ptr++;
1925        }
1926    #ifdef SUPPORT_UTF
1927      if (utf16_char)
1928        {
1929        if ((ptr[-1] & 0xfc00) == 0xd800)
1930          {
1931          /* We know that there is only one extra character in UTF-16. */
1932          *ptr = swap_uint16(*ptr);
1933          ptr++;
1934          }
1935        }
1936      utf16_char = FALSE;
1937    #endif /* SUPPORT_UTF */
1938    
1939      /* Get next opcode. */
1940    
1941      length = 0;
1942      op = *ptr;
1943      *ptr++ = swap_uint16(op);
1944    
1945      switch (op)
1946        {
1947        case OP_END:
1948        return;
1949    
1950    #ifdef SUPPORT_UTF
1951        case OP_CHAR:
1952        case OP_CHARI:
1953        case OP_NOT:
1954        case OP_NOTI:
1955        case OP_STAR:
1956        case OP_MINSTAR:
1957        case OP_PLUS:
1958        case OP_MINPLUS:
1959        case OP_QUERY:
1960        case OP_MINQUERY:
1961        case OP_UPTO:
1962        case OP_MINUPTO:
1963        case OP_EXACT:
1964        case OP_POSSTAR:
1965        case OP_POSPLUS:
1966        case OP_POSQUERY:
1967        case OP_POSUPTO:
1968        case OP_STARI:
1969        case OP_MINSTARI:
1970        case OP_PLUSI:
1971        case OP_MINPLUSI:
1972        case OP_QUERYI:
1973        case OP_MINQUERYI:
1974        case OP_UPTOI:
1975        case OP_MINUPTOI:
1976        case OP_EXACTI:
1977        case OP_POSSTARI:
1978        case OP_POSPLUSI:
1979        case OP_POSQUERYI:
1980        case OP_POSUPTOI:
1981        case OP_NOTSTAR:
1982        case OP_NOTMINSTAR:
1983        case OP_NOTPLUS:
1984        case OP_NOTMINPLUS:
1985        case OP_NOTQUERY:
1986        case OP_NOTMINQUERY:
1987        case OP_NOTUPTO:
1988        case OP_NOTMINUPTO:
1989        case OP_NOTEXACT:
1990        case OP_NOTPOSSTAR:
1991        case OP_NOTPOSPLUS:
1992        case OP_NOTPOSQUERY:
1993        case OP_NOTPOSUPTO:
1994        case OP_NOTSTARI:
1995        case OP_NOTMINSTARI:
1996        case OP_NOTPLUSI:
1997        case OP_NOTMINPLUSI:
1998        case OP_NOTQUERYI:
1999        case OP_NOTMINQUERYI:
2000        case OP_NOTUPTOI:
2001        case OP_NOTMINUPTOI:
2002        case OP_NOTEXACTI:
2003        case OP_NOTPOSSTARI:
2004        case OP_NOTPOSPLUSI:
2005        case OP_NOTPOSQUERYI:
2006        case OP_NOTPOSUPTOI:
2007        if (utf) utf16_char = TRUE;
2008    #endif
2009        /* Fall through. */
2010    
2011        default:
2012        length = OP_lengths16[op] - 1;
2013        break;
2014    
2015        case OP_CLASS:
2016        case OP_NCLASS:
2017        /* Skip the character bit map. */
2018        ptr += 32/sizeof(pcre_uint16);
2019        length = 0;
2020        break;
2021    
2022        case OP_XCLASS:
2023        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2024        if (LINK_SIZE > 1)
2025          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2026            - (1 + LINK_SIZE + 1));
2027        else
2028          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2029    
2030        /* Reverse the size of the XCLASS instance. */
2031        *ptr = swap_uint16(*ptr);
2032        ptr++;
2033        if (LINK_SIZE > 1)
2034          {
2035          *ptr = swap_uint16(*ptr);
2036          ptr++;
2037          }
2038    
2039        op = *ptr;
2040        *ptr = swap_uint16(op);
2041        ptr++;
2042        if ((op & XCL_MAP) != 0)
2043          {
2044          /* Skip the character bit map. */
2045          ptr += 32/sizeof(pcre_uint16);
2046          length -= 32/sizeof(pcre_uint16);
2047          }
2048        break;
2049        }
2050      }
2051    /* Control should never reach here in 16 bit mode. */
2052    #endif /* SUPPORT_PCRE16 */
2053    }
2054    
2055    
2056    
# Line 653  return ((value & 0x000000ff) << 24) | Line 2059  return ((value & 0x000000ff) << 24) |
2059  *************************************************/  *************************************************/
2060    
2061  static int  static int
2062  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2063    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
2064    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
2065  {  {
# Line 668  for (;;) Line 2074  for (;;)
2074    {    {
2075    *limit = mid;    *limit = mid;
2076    
2077    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2078      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2079    
2080    if (count == errnumber)    if (count == errnumber)
# Line 713  Returns:    < 0, = 0, or > 0, according Line 2119  Returns:    < 0, = 0, or > 0, according
2119  */  */
2120    
2121  static int  static int
2122  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2123  {  {
2124  while (n--)  while (n--)
2125    {    {
# Line 729  return 0; Line 2135  return 0;
2135  *         Check newline indicator                *  *         Check newline indicator                *
2136  *************************************************/  *************************************************/
2137    
2138  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
2139  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
2140    
2141  Arguments:  Arguments:
2142    p           points after the leading '<'    p           points after the leading '<'
# Line 741  Returns:      appropriate PCRE_NEWLINE_x Line 2146  Returns:      appropriate PCRE_NEWLINE_x
2146  */  */
2147    
2148  static int  static int
2149  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2150  {  {
2151  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2152  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2153  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2154  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2155  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2156  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2157  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2158  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2159  return 0;  return 0;
2160  }  }
# Line 765  usage(void) Line 2170  usage(void)
2170  {  {
2171  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2172  printf("Input and output default to stdin and stdout.\n");  printf("Input and output default to stdin and stdout.\n");
2173  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2174  printf("If input is a terminal, readline() is used to read from it.\n");  printf("If input is a terminal, readline() is used to read from it.\n");
2175  #else  #else
2176  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2177  #endif  #endif
2178  printf("\nOptions:\n");  printf("\nOptions:\n");
2179  printf("  -b       show compiled code (bytecode)\n");  #ifdef SUPPORT_PCRE16
2180    printf("  -16      use the 16-bit library\n");
2181    #endif
2182    printf("  -b       show compiled code\n");
2183  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2184    printf("  -C arg   show a specific compile-time option\n");
2185    printf("           and exit with its value. The arg can be:\n");
2186    printf("     linksize     internal link size [2, 3, 4]\n");
2187    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2188    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2189    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2190    printf("     ucp          Unicode Properties supported [0, 1]\n");
2191    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2192    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2193  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2194  #if !defined NODFA  #if !defined NODFA
2195  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
2196  #endif  #endif
2197  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
2198  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
2199           "  -M       find MATCH_LIMIT minimum for each subject\n"
2200         "  -m       output memory used information\n"         "  -m       output memory used information\n"
2201         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
2202  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 786  printf("  -p       use POSIX interface\n Line 2204  printf("  -p       use POSIX interface\n
2204  #endif  #endif
2205  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
2206  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2207  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
2208           "  -s+      force each pattern to be studied, using JIT if available\n"
2209           "  -s++     ditto, verifying when JIT was actually used\n"
2210           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2211           "             where 1 <= n <= 7 selects JIT options\n"
2212           "  -s++n    ditto, verifying when JIT was actually used\n"
2213         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2214  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2215  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 806  options, followed by a set of test data, Line 2229  options, followed by a set of test data,
2229  int main(int argc, char **argv)  int main(int argc, char **argv)
2230  {  {
2231  FILE *infile = stdin;  FILE *infile = stdin;
2232    const char *version;
2233  int options = 0;  int options = 0;
2234  int study_options = 0;  int study_options = 0;
2235    int default_find_match_limit = FALSE;
2236  int op = 1;  int op = 1;
2237  int timeit = 0;  int timeit = 0;
2238  int timeitm = 0;  int timeitm = 0;
2239  int showinfo = 0;  int showinfo = 0;
2240  int showstore = 0;  int showstore = 0;
2241    int force_study = -1;
2242    int force_study_options = 0;
2243  int quiet = 0;  int quiet = 0;
2244  int size_offsets = 45;  int size_offsets = 45;
2245  int size_offsets_max;  int size_offsets_max;
2246  int *offsets = NULL;  int *offsets = NULL;
 #if !defined NOPOSIX  
 int posix = 0;  
 #endif  
2247  int debug = 0;  int debug = 0;
2248  int done = 0;  int done = 0;
2249  int all_use_dfa = 0;  int all_use_dfa = 0;
2250    int verify_jit = 0;
2251  int yield = 0;  int yield = 0;
2252  int stack_size;  int stack_size;
2253    
2254  /* These vectors store, end-to-end, a list of captured substring names. Assume  #if !defined NOPOSIX
2255  that 1024 is plenty long enough for the few names we'll be testing. */  int posix = 0;
2256    #endif
2257    #if !defined NODFA
2258    int *dfa_workspace = NULL;
2259    #endif
2260    
2261  uschar copynames[1024];  pcre_jit_stack *jit_stack = NULL;
 uschar getnames[1024];  
2262    
2263  uschar *copynamesptr;  /* These vectors store, end-to-end, a list of zero-terminated captured
2264  uschar *getnamesptr;  substring names, each list itself being terminated by an empty name. Assume
2265    that 1024 is plenty long enough for the few names we'll be testing. It is
2266    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2267    for the actual memory, to ensure alignment. */
2268    
2269    pcre_uint16 copynames[1024];
2270    pcre_uint16 getnames[1024];
2271    
2272    #ifdef SUPPORT_PCRE16
2273    pcre_uint16 *cn16ptr;
2274    pcre_uint16 *gn16ptr;
2275    #endif
2276    
2277  /* Get buffers from malloc() so that Electric Fence will check their misuse  #ifdef SUPPORT_PCRE8
2278  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2279    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2280    pcre_uint8 *cn8ptr;
2281    pcre_uint8 *gn8ptr;
2282    #endif
2283    
2284  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2285  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
2286  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
2287    
2288    buffer = (pcre_uint8 *)malloc(buffer_size);
2289    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2290    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2291    
2292  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2293    
# Line 855  it set 0x8000, but then I was advised th Line 2302  it set 0x8000, but then I was advised th
2302  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2303  #endif  #endif
2304    
2305    /* Get the version number: both pcre_version() and pcre16_version() give the
2306    same answer. We just need to ensure that we call one that is available. */
2307    
2308    #ifdef SUPPORT_PCRE8
2309    version = pcre_version();
2310    #else
2311    version = pcre16_version();
2312    #endif
2313    
2314  /* Scan options */  /* Scan options */
2315    
2316  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2317    {    {
2318    unsigned char *endptr;    pcre_uint8 *endptr;
2319      char *arg = argv[op];
2320    
2321    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(arg, "-m") == 0) showstore = 1;
2322      showstore = 1;    else if (strcmp(arg, "-s") == 0) force_study = 0;
2323    else if (strcmp(argv[op], "-q") == 0) quiet = 1;  
2324    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strncmp(arg, "-s+", 3) == 0)
2325    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;      {
2326    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;      arg += 3;
2327        if (*arg == '+') { arg++; verify_jit = TRUE; }
2328        force_study = 1;
2329        if (*arg == 0)
2330          force_study_options = jit_study_bits[6];
2331        else if (*arg >= '1' && *arg <= '7')
2332          force_study_options = jit_study_bits[*arg - '1'];
2333        else goto BAD_ARG;
2334        }
2335      else if (strcmp(arg, "-16") == 0)
2336        {
2337    #ifdef SUPPORT_PCRE16
2338        use_pcre16 = 1;
2339    #else
2340        printf("** This version of PCRE was built without 16-bit support\n");
2341        exit(1);
2342    #endif
2343        }
2344      else if (strcmp(arg, "-q") == 0) quiet = 1;
2345      else if (strcmp(arg, "-b") == 0) debug = 1;
2346      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2347      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2348      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2349  #if !defined NODFA  #if !defined NODFA
2350    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2351  #endif  #endif
2352    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2353        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2354          *endptr == 0))          *endptr == 0))
2355      {      {
2356      op++;      op++;
2357      argc--;      argc--;
2358      }      }
2359    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2360      {      {
2361      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2362      int temp;      int temp;
2363      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2364                       *endptr == 0))                       *endptr == 0))
2365        {        {
2366        timeitm = temp;        timeitm = temp;
# Line 891  while (argc > 1 && argv[op][0] == '-') Line 2370  while (argc > 1 && argv[op][0] == '-')
2370      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2371      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2372      }      }
2373    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2374        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2375          *endptr == 0))          *endptr == 0))
2376      {      {
2377  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2378      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2379      exit(1);      exit(1);
2380  #else  #else
# Line 914  while (argc > 1 && argv[op][0] == '-') Line 2393  while (argc > 1 && argv[op][0] == '-')
2393  #endif  #endif
2394      }      }
2395  #if !defined NOPOSIX  #if !defined NOPOSIX
2396    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2397  #endif  #endif
2398    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2399      {      {
2400      int rc;      int rc;
2401      printf("PCRE version %s\n", pcre_version());      unsigned long int lrc;
2402    
2403        if (argc > 2)
2404          {
2405          if (strcmp(argv[op + 1], "linksize") == 0)
2406            {
2407            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2408            printf("%d\n", rc);
2409            yield = rc;
2410            goto EXIT;
2411            }
2412          if (strcmp(argv[op + 1], "pcre8") == 0)
2413            {
2414    #ifdef SUPPORT_PCRE8
2415            printf("1\n");
2416            yield = 1;
2417    #else
2418            printf("0\n");
2419            yield = 0;
2420    #endif
2421            goto EXIT;
2422            }
2423          if (strcmp(argv[op + 1], "pcre16") == 0)
2424            {
2425    #ifdef SUPPORT_PCRE16
2426            printf("1\n");
2427            yield = 1;
2428    #else
2429            printf("0\n");
2430            yield = 0;
2431    #endif
2432            goto EXIT;
2433            }
2434          if (strcmp(argv[op + 1], "utf") == 0)
2435            {
2436    #ifdef SUPPORT_PCRE8
2437            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2438            printf("%d\n", rc);
2439            yield = rc;
2440    #else
2441            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2442            printf("%d\n", rc);
2443            yield = rc;
2444    #endif
2445            goto EXIT;
2446            }
2447          if (strcmp(argv[op + 1], "ucp") == 0)
2448            {
2449            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2450            printf("%d\n", rc);
2451            yield = rc;
2452            goto EXIT;
2453            }
2454          if (strcmp(argv[op + 1], "jit") == 0)
2455            {
2456            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2457            printf("%d\n", rc);
2458            yield = rc;
2459            goto EXIT;
2460            }
2461          if (strcmp(argv[op + 1], "newline") == 0)
2462            {
2463            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2464            print_newline_config(rc);
2465            goto EXIT;
2466            }
2467          printf("Unknown -C option: %s\n", argv[op + 1]);
2468          goto EXIT;
2469          }
2470    
2471        printf("PCRE version %s\n", version);
2472      printf("Compiled with\n");      printf("Compiled with\n");
2473    
2474    #ifdef EBCDIC
2475        printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2476    #endif
2477    
2478    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2479    are set, either both UTFs are supported or both are not supported. */
2480    
2481    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2482        printf("  8-bit and 16-bit support\n");
2483        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2484        if (rc)
2485          printf("  UTF-8 and UTF-16 support\n");
2486        else
2487          printf("  No UTF-8 or UTF-16 support\n");
2488    #elif defined SUPPORT_PCRE8
2489        printf("  8-bit support only\n");
2490      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2491      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2492      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2493        printf("  16-bit support only\n");
2494        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2495        printf("  %sUTF-16 support\n", rc? "" : "No ");
2496    #endif
2497    
2498        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2499      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2500      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2501      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      if (rc)
2502        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        {
2503        (rc == -2)? "ANYCRLF" :        const char *arch;
2504        (rc == -1)? "ANY" : "???");        (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2505      (void)pcre_config(PCRE_CONFIG_BSR, &rc);        printf("  Just-in-time compiler support: %s\n", arch);
2506          }
2507        else
2508          printf("  No just-in-time compiler support\n");
2509        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2510        print_newline_config(rc);
2511        (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2512      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2513                                       "all Unicode newlines");                                       "all Unicode newlines");
2514      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2515      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2516      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2517      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2518      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2519      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
2520      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2521      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
2522      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2523      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
2524        if (showstore)
2525          {
2526          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2527          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2528          }
2529        printf("\n");
2530      goto EXIT;      goto EXIT;
2531      }      }
2532    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
2533             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
2534      {      {
2535      usage();      usage();
2536      goto EXIT;      goto EXIT;
2537      }      }
2538    else    else
2539      {      {
2540      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
2541        printf("** Unknown or malformed option %s\n", arg);
2542      usage();      usage();
2543      yield = 1;      yield = 1;
2544      goto EXIT;      goto EXIT;
# Line 1000  if (argc > 2) Line 2585  if (argc > 2)
2585    
2586  /* Set alternative malloc function */  /* Set alternative malloc function */
2587    
2588    #ifdef SUPPORT_PCRE8
2589  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2590  pcre_free = new_free;  pcre_free = new_free;
2591  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2592  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2593    #endif
2594    
2595    #ifdef SUPPORT_PCRE16
2596    pcre16_malloc = new_malloc;
2597    pcre16_free = new_free;
2598    pcre16_stack_malloc = stack_malloc;
2599    pcre16_stack_free = stack_free;
2600    #endif
2601    
2602  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2603    
2604  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2605    
2606  /* Main loop */  /* Main loop */
2607    
# Line 1022  while (!done) Line 2616  while (!done)
2616  #endif  #endif
2617    
2618    const char *error;    const char *error;
2619    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
2620    unsigned char *to_file = NULL;    pcre_uint8 *p, *pp, *ppp;
2621    const unsigned char *tables = NULL;    pcre_uint8 *to_file = NULL;
2622      const pcre_uint8 *tables = NULL;
2623      unsigned long int get_options;
2624    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2625    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2626      int do_allcaps = 0;
2627      int do_mark = 0;
2628    int do_study = 0;    int do_study = 0;
2629      int no_force_study = 0;
2630    int do_debug = debug;    int do_debug = debug;
2631    int do_G = 0;    int do_G = 0;
2632    int do_g = 0;    int do_g = 0;
2633    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2634    int do_showrest = 0;    int do_showrest = 0;
2635      int do_showcaprest = 0;
2636    int do_flip = 0;    int do_flip = 0;
2637    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2638    
2639    use_utf8 = 0;  #if !defined NODFA
2640      int dfa_matched = 0;
2641    #endif
2642    
2643      use_utf = 0;
2644    debug_lengths = 1;    debug_lengths = 1;
2645    
2646    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1051  while (!done) Line 2655  while (!done)
2655    
2656    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2657      {      {
2658      unsigned long int magic, get_options;      pcre_uint32 magic;
2659      uschar sbuf[8];      pcre_uint8 sbuf[8];
2660      FILE *f;      FILE *f;
2661    
2662      p++;      p++;
2663        if (*p == '!')
2664          {
2665          do_debug = TRUE;
2666          do_showinfo = TRUE;
2667          p++;
2668          }
2669    
2670      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2671      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2672      *pp = 0;      *pp = 0;
# Line 1067  while (!done) Line 2678  while (!done)
2678        continue;        continue;
2679        }        }
2680    
2681        first_gotten_store = 0;
2682      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2683    
2684      true_size =      true_size =
# Line 1074  while (!done) Line 2686  while (!done)
2686      true_study_size =      true_study_size =
2687        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2688    
2689      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
2690      regex_gotten_store = gotten_store;      if (re == NULL)
2691          {
2692          printf("** Failed to get %d bytes of memory for pcre object\n",
2693            (int)true_size);
2694          yield = 1;
2695          goto EXIT;
2696          }
2697        regex_gotten_store = first_gotten_store;
2698    
2699      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2700    
2701      magic = ((real_pcre *)re)->magic_number;      magic = ((REAL_PCRE *)re)->magic_number;
2702      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2703        {        {
2704        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2705          {          {
2706          do_flip = 1;          do_flip = 1;
2707          }          }
2708        else        else
2709          {          {
2710          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2711            new_free(re);
2712          fclose(f);          fclose(f);
2713          continue;          continue;
2714          }          }
2715        }        }
2716    
2717      fprintf(outfile, "Compiled regex%s loaded from %s\n",      /* We hide the byte-invert info for little and big endian tests. */
2718        do_flip? " (byte-inverted)" : "", p);      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2719          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
     /* Need to know if UTF-8 for printing data strings */  
2720    
2721      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      /* Now see if there is any following study data. */
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
   
     /* Now see if there is any following study data */  
2722    
2723      if (true_study_size != 0)      if (true_study_size != 0)
2724        {        {
# Line 1118  while (!done) Line 2734  while (!done)
2734          {          {
2735          FAIL_READ:          FAIL_READ:
2736          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2737          if (extra != NULL) new_free(extra);          if (extra != NULL)
2738          if (re != NULL) new_free(re);            {
2739              PCRE_FREE_STUDY(extra);
2740              }
2741            new_free(re);
2742          fclose(f);          fclose(f);
2743          continue;          continue;
2744          }          }
# Line 1128  while (!done) Line 2747  while (!done)
2747        }        }
2748      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2749    
2750        /* Flip the necessary bytes. */
2751        if (do_flip)
2752          {
2753          int rc;
2754          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2755          if (rc == PCRE_ERROR_BADMODE)
2756            {
2757            /* Simulate the result of the function call below. */
2758            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2759              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2760            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2761              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2762            new_free(re);
2763            fclose(f);
2764            continue;
2765            }
2766          }
2767    
2768        /* Need to know if UTF-8 for printing data strings. */
2769    
2770        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2771          {
2772          new_free(re);
2773          fclose(f);
2774          continue;
2775          }
2776        use_utf = (get_options & PCRE_UTF8) != 0;
2777    
2778      fclose(f);      fclose(f);
2779      goto SHOW_INFO;      goto SHOW_INFO;
2780      }      }
2781    
2782    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2783    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2784    
2785    delimiter = *p++;    delimiter = *p++;
2786    
# Line 1144  while (!done) Line 2791  while (!done)
2791      }      }
2792    
2793    pp = p;    pp = p;
2794    poffset = p - buffer;    poffset = (int)(p - buffer);
2795    
2796    for(;;)    for(;;)
2797      {      {
# Line 1184  while (!done) Line 2831  while (!done)
2831    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2832    
2833    options = 0;    options = 0;
2834    study_options = 0;    study_options = force_study_options;
2835    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
2836    
2837    while (*pp != 0)    while (*pp != 0)
# Line 1198  while (!done) Line 2845  while (!done)
2845        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2846        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2847    
2848        case '+': do_showrest = 1; break;        case '+':
2849          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2850          break;
2851    
2852          case '=': do_allcaps = 1; break;
2853        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2854        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2855        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1208  while (!done) Line 2859  while (!done)
2859        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2860        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2861        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
2862          case 'K': do_mark = 1; break;
2863        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2864        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2865    
# Line 1215  while (!done) Line 2867  while (!done)
2867        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2868  #endif  #endif
2869    
2870        case 'S': do_study = 1; break;        case 'S':
2871          do_study = 1;
2872          for (;;)
2873            {
2874            switch (*pp++)
2875              {
2876              case 'S':
2877              do_study = 0;
2878              no_force_study = 1;
2879              break;
2880    
2881              case '!':
2882              study_options |= PCRE_STUDY_EXTRA_NEEDED;
2883              break;
2884    
2885              case '+':
2886              if (*pp == '+')
2887                {
2888                verify_jit = TRUE;
2889                pp++;
2890                }
2891              if (*pp >= '1' && *pp <= '7')
2892                study_options |= jit_study_bits[*pp++ - '1'];
2893              else
2894                study_options |= jit_study_bits[6];
2895              break;
2896    
2897              case '-':
2898              study_options &= ~PCRE_STUDY_ALLJIT;
2899              break;
2900    
2901              default:
2902              pp--;
2903              goto ENDLOOP;
2904              }
2905            }
2906          ENDLOOP:
2907          break;
2908    
2909        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2910          case 'W': options |= PCRE_UCP; break;
2911        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2912          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2913        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2914        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2915        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2916    
2917          case 'T':
2918          switch (*pp++)
2919            {
2920            case '0': tables = tables0; break;
2921            case '1': tables = tables1; break;
2922    
2923            case '\r':
2924            case '\n':
2925            case ' ':
2926            case 0:
2927            fprintf(outfile, "** Missing table number after /T\n");
2928            goto SKIP_DATA;
2929    
2930            default:
2931            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2932            goto SKIP_DATA;
2933            }
2934          break;
2935    
2936        case 'L':        case 'L':
2937        ppp = pp;        ppp = pp;
2938        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1234  while (!done) Line 2945  while (!done)
2945          goto SKIP_DATA;          goto SKIP_DATA;
2946          }          }
2947        locale_set = 1;        locale_set = 1;
2948        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2949        pp = ppp;        pp = ppp;
2950        break;        break;
2951    
# Line 1247  while (!done) Line 2958  while (!done)
2958    
2959        case '<':        case '<':
2960          {          {
2961          int x = check_newline(pp, outfile);          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2962          if (x == 0) goto SKIP_DATA;            {
2963          options |= x;            options |= PCRE_JAVASCRIPT_COMPAT;
2964          while (*pp++ != '>');            pp += 3;
2965              }
2966            else
2967              {
2968              int x = check_newline(pp, outfile);
2969              if (x == 0) goto SKIP_DATA;
2970              options |= x;
2971              while (*pp++ != '>');
2972              }
2973          }          }
2974        break;        break;
2975    
# Line 1267  while (!done) Line 2986  while (!done)
2986    
2987    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2988    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2989    local character tables. */    local character tables. Neither does it have 16-bit support. */
2990    
2991  #if !defined NOPOSIX  #if !defined NOPOSIX
2992    if (posix || do_posix)    if (posix || do_posix)
# Line 1280  while (!done) Line 2999  while (!done)
2999      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3000      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3001      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3002        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3003        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3004    
3005        first_gotten_store = 0;
3006      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
3007    
3008      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1300  while (!done) Line 3022  while (!done)
3022  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
3023    
3024      {      {
3025        /* In 16-bit mode, convert the input. */
3026    
3027    #ifdef SUPPORT_PCRE16
3028        if (use_pcre16)
3029          {
3030          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3031            {
3032            case -1:
3033            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3034              "converted to UTF-16\n");
3035            goto SKIP_DATA;
3036    
3037            case -2:
3038            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3039              "cannot be converted to UTF-16\n");
3040            goto SKIP_DATA;
3041    
3042            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3043            fprintf(outfile, "**Failed: character value greater than 0xffff "
3044              "cannot be converted to 16-bit in non-UTF mode\n");
3045            goto SKIP_DATA;
3046    
3047            default:
3048            break;
3049            }
3050          p = (pcre_uint8 *)buffer16;
3051          }
3052    #endif
3053    
3054        /* Compile many times when timing */
3055    
3056      if (timeit > 0)      if (timeit > 0)
3057        {        {
3058        register int i;        register int i;
# Line 1307  while (!done) Line 3060  while (!done)
3060        clock_t start_time = clock();        clock_t start_time = clock();
3061        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
3062          {          {
3063          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3064          if (re != NULL) free(re);          if (re != NULL) free(re);
3065          }          }
3066        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1316  while (!done) Line 3069  while (!done)
3069            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
3070        }        }
3071    
3072      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
3073        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3074    
3075      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
3076      if non-interactive. */      if non-interactive. */
# Line 1343  while (!done) Line 3097  while (!done)
3097        goto CONTINUE;        goto CONTINUE;
3098        }        }
3099    
3100      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
3101      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
3102      returns only limited data. Check that it agrees with the newer one. */      lines. */
3103    
3104      if (log_store)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3105        fprintf(outfile, "Memory allocation (code space): %d\n",        goto SKIP_DATA;
3106          (int)(gotten_store -      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
3107    
3108      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3109      and remember the store that was got. */      and remember the store that was got. */
3110    
3111      true_size = ((real_pcre *)re)->size;      true_size = ((REAL_PCRE *)re)->size;
3112      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3113    
3114        /* Output code size information if requested */
3115    
3116      /* If /S was present, study the regexp to generate additional info to      if (log_store)
3117      help with the matching. */        fprintf(outfile, "Memory allocation (code space): %d\n",
3118            (int)(first_gotten_store -
3119                  sizeof(REAL_PCRE) -
3120                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3121    
3122        /* If -s or /S was present, study the regex to generate additional info to
3123        help with the matching, unless the pattern has the SS option, which
3124        suppresses the effect of /S (used for a few test patterns where studying is
3125        never sensible). */
3126    
3127      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
3128        {        {
3129        if (timeit > 0)        if (timeit > 0)
3130          {          {
# Line 1370  while (!done) Line 3132  while (!done)
3132          clock_t time_taken;          clock_t time_taken;
3133          clock_t start_time = clock();          clock_t start_time = clock();
3134          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3135            extra = pcre_study(re, study_options, &error);            {
3136              PCRE_STUDY(extra, re, study_options, &error);
3137              }
3138          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3139          if (extra != NULL) free(extra);          if (extra != NULL)
3140              {
3141              PCRE_FREE_STUDY(extra);
3142              }
3143          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3144            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3145              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3146          }          }
3147        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options, &error);
3148        if (error != NULL)        if (error != NULL)
3149          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3150        else if (extra != NULL)        else if (extra != NULL)
3151            {
3152          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3153            if (log_store)
3154              {
3155              size_t jitsize;
3156              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3157                  jitsize != 0)
3158                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3159              }
3160            }
3161        }        }
3162    
3163      /* If the 'F' option was present, we flip the bytes of all the integer      /* If /K was present, we set up for handling MARK data. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
3164    
3165      if (do_flip)      if (do_mark)
3166        {        {
3167        real_pcre *rre = (real_pcre *)re;        if (extra == NULL)
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
3168          {          {
3169          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3170          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          extra->flags = 0;
         rsd->options = byteflip(rsd->options, sizeof(rsd->options));  
3171          }          }
3172          extra->mark = &markptr;
3173          extra->flags |= PCRE_EXTRA_MARK;
3174        }        }
3175    
3176      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
3177    
3178      SHOW_INFO:      SHOW_INFO:
3179    
3180      if (do_debug)      if (do_debug)
3181        {        {
3182        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3183        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3184        }        }
3185    
3186        /* We already have the options in get_options (see above) */
3187    
3188      if (do_showinfo)      if (do_showinfo)
3189        {        {
3190        unsigned long int get_options, all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3191        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3192          hascrorlf;          hascrorlf, maxlookbehind;
3193        int nameentrysize, namecount;        int nameentrysize, namecount;
3194        const uschar *nametable;        const pcre_uint8 *nametable;
3195    
3196        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3197        new_info(re, NULL, PCRE_INFO_SIZE, &size);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3198        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3199        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3200        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3201        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3202        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3203        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3204        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3205        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3206        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3207        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3208              != 0)
3209  #if !defined NOINFOCHECK          goto SKIP_DATA;
       old_count = pcre_info(re, &old_options, &old_first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3210    
3211        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3212          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1487  while (!done) Line 3221  while (!done)
3221          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3222          while (namecount-- > 0)          while (namecount-- > 0)
3223            {            {
3224            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3225              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3226              GET2(nametable, 0));  #else
3227              int imm2_size = IMM2_SIZE;
3228    #endif
3229              int length = (int)STRLEN(nametable + imm2_size);
3230              fprintf(outfile, "  ");
3231              PCHARSV(nametable, imm2_size, length, outfile);
3232              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3233    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3234              fprintf(outfile, "%3d\n", use_pcre16?
3235                 (int)(((PCRE_SPTR16)nametable)[0])
3236                :((int)nametable[0] << 8) | (int)nametable[1]);
3237              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3238    #else
3239              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3240    #ifdef SUPPORT_PCRE8
3241            nametable += nameentrysize;            nametable += nameentrysize;
3242    #else
3243              nametable += nameentrysize * 2;
3244    #endif
3245    #endif
3246            }            }
3247          }          }
3248    
3249        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3250        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3251    
3252        all_options = ((real_pcre *)re)->options;        all_options = ((REAL_PCRE *)re)->options;
3253        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3254    
3255        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3256          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3257            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3258            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3259            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1514  while (!done) Line 3266  while (!done)
3266            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3267            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3268            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3269            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3270            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3271              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3272              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3273            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3274    
3275        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1556  while (!done) Line 3310  while (!done)
3310          }          }
3311        else        else
3312          {          {
3313          int ch = first_char & 255;          const char *caseless =
3314          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3315            "" : " (caseless)";            "" : " (caseless)";
3316          if (PRINTHEX(ch))  
3317            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3318              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3319          else          else
3320            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3321              fprintf(outfile, "First char = ");
3322              pchar(first_char, outfile);
3323              fprintf(outfile, "%s\n", caseless);
3324              }
3325          }          }
3326    
3327        if (need_char < 0)        if (need_char < 0)
# Line 1571  while (!done) Line 3330  while (!done)
3330          }          }
3331        else        else
3332          {          {
3333          int ch = need_char & 255;          const char *caseless =
3334          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3335            "" : " (caseless)";            "" : " (caseless)";
3336          if (PRINTHEX(ch))  
3337            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3338              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3339          else          else
3340            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3341              fprintf(outfile, "Need char = ");
3342              pchar(need_char, outfile);
3343              fprintf(outfile, "%s\n", caseless);
3344              }
3345          }          }
3346    
3347          if (maxlookbehind > 0)
3348            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3349    
3350        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3351        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3352        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
3353        flipped.) */        flipped.) If study was forced by an external -s, don't show this
3354          information unless -i or -d was also present. This means that, except
3355          when auto-callouts are involved, the output from runs with and without
3356          -s should be identical. */
3357    
3358        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3359          {          {
3360          if (extra == NULL)          if (extra == NULL)
3361            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3362          else          else
3363            {            {
3364            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3365            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
3366    
3367            if (start_bits == NULL)            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3368              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3369            else  
3370              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3371              {              {
3372              int i;              if (start_bits == NULL)
3373              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3374              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3375                {                {
3376                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3377                  int c = 24;
3378                  fprintf(outfile, "Starting byte set: ");
3379                  for (i = 0; i < 256; i++)
3380                  {                  {
3381                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
3382                    {                    {
3383                    fprintf(outfile, "\n  ");                    if (c > 75)
3384                    c = 2;                      {
3385                    }                      fprintf(outfile, "\n  ");
3386                  if (PRINTHEX(i) && i != ' ')                      c = 2;
3387                    {                      }
3388                    fprintf(outfile, "%c ", i);                    if (PRINTOK(i) && i != ' ')
3389                    c += 2;                      {
3390                    }                      fprintf(outfile, "%c ", i);
3391                  else                      c += 2;
3392                    {                      }
3393                    fprintf(outfile, "\\x%02x ", i);                    else
3394                    c += 5;                      {
3395                        fprintf(outfile, "\\x%02x ", i);
3396                        c += 5;
3397                        }
3398                    }                    }
3399                  }                  }
3400                  fprintf(outfile, "\n");
3401                }                }
3402              fprintf(outfile, "\n");              }
3403              }
3404    
3405            /* Show this only if the JIT was set by /S, not by -s. */
3406    
3407            if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3408                (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3409              {
3410              int jit;
3411              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3412                {
3413                if (jit)
3414                  fprintf(outfile, "JIT study was successful\n");
3415                else
3416    #ifdef SUPPORT_JIT
3417                  fprintf(outfile, "JIT study was not successful\n");
3418    #else
3419                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3420    #endif
3421              }              }
3422            }            }
3423          }          }
# Line 1641  while (!done) Line 3436  while (!done)
3436          }          }
3437        else        else
3438          {          {
3439          uschar sbuf[8];          pcre_uint8 sbuf[8];
3440          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3441          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3442          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3443          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3444            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3445          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3446          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3447          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3448          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3449            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3450    
3451          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3452              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1659  while (!done) Line 3455  while (!done)
3455            }            }
3456          else          else
3457            {            {
3458            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3459    
3460              /* If there is study data, write it. */
3461    
3462            if (extra != NULL)            if (extra != NULL)
3463              {              {
3464              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1669  while (!done) Line 3468  while (!done)
3468                  strerror(errno));                  strerror(errno));
3469                }                }
3470              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
3471              }              }
3472            }            }
3473          fclose(f);          fclose(f);
3474          }          }
3475    
3476        new_free(re);        new_free(re);
3477        if (extra != NULL) new_free(extra);        if (extra != NULL)
3478        if (tables != NULL) new_free((void *)tables);          {
3479            PCRE_FREE_STUDY(extra);
3480            }
3481          if (locale_set)
3482            {
3483            new_free((void *)tables);
3484            setlocale(LC_CTYPE, "C");
3485            locale_set = 0;
3486            }
3487        continue;  /* With next regex */        continue;  /* With next regex */
3488        }        }
3489      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1686  while (!done) Line 3492  while (!done)
3492    
3493    for (;;)    for (;;)
3494      {      {
3495      uschar *q;      pcre_uint8 *q;
3496      uschar *bptr;      pcre_uint8 *bptr;
3497      int *use_offsets = offsets;      int *use_offsets = offsets;
3498      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3499      int callout_data = 0;      int callout_data = 0;
3500      int callout_data_set = 0;      int callout_data_set = 0;
3501      int count, c;      int count, c;
3502      int copystrings = 0;      int copystrings = 0;
3503      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
3504      int getstrings = 0;      int getstrings = 0;
3505      int getlist = 0;      int getlist = 0;
3506      int gmatched = 0;      int gmatched = 0;
3507      int start_offset = 0;      int start_offset = 0;
3508        int start_offset_sign = 1;
3509      int g_notempty = 0;      int g_notempty = 0;
3510      int use_dfa = 0;      int use_dfa = 0;
3511    
     options = 0;  
   
3512      *copynames = 0;      *copynames = 0;
3513      *getnames = 0;      *getnames = 0;
3514    
3515      copynamesptr = copynames;  #ifdef SUPPORT_PCRE16
3516      getnamesptr = getnames;      cn16ptr = copynames;
3517        gn16ptr = getnames;
3518    #endif
3519    #ifdef SUPPORT_PCRE8
3520        cn8ptr = copynames8;
3521        gn8ptr = getnames8;
3522    #endif
3523    
3524      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3525      first_callout = 1;      first_callout = 1;
3526        last_callout_mark = NULL;
3527      callout_extra = 0;      callout_extra = 0;
3528      callout_count = 0;      callout_count = 0;
3529      callout_fail_count = 999999;      callout_fail_count = 999999;
3530      callout_fail_id = -1;      callout_fail_id = -1;
3531      show_malloc = 0;      show_malloc = 0;
3532        options = 0;
3533    
3534      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3535        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 1726  while (!done) Line 3539  while (!done)
3539        {        {
3540        if (extend_inputline(infile, buffer + len, "data> ") == NULL)        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3541          {          {
3542          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
3543              {
3544              fprintf(outfile, "\n");
3545              break;
3546              }
3547          done = 1;          done = 1;
3548          goto CONTINUE;          goto CONTINUE;
3549          }          }
# Line 1748  while (!done) Line 3565  while (!done)
3565        int i = 0;        int i = 0;
3566        int n = 0;        int n = 0;
3567    
3568        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3569          In non-UTF mode, allow the value of the byte to fall through to later,
3570          where values greater than 127 are turned into UTF-8 when running in
3571          16-bit mode. */
3572    
3573          if (c != '\\')
3574            {
3575            if (use_utf)
3576              {
3577              *q++ = c;
3578              continue;
3579              }
3580            }
3581    
3582          /* Handle backslash escapes */
3583    
3584          else switch ((c = *p++))
3585          {          {
3586          case 'a': c =    7; break;          case 'a': c =    7; break;
3587          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 1764  while (!done) Line 3597  while (!done)
3597          c -= '0';          c -= '0';
3598          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3599            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3600          break;          break;
3601    
3602          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3603          if (*p == '{')          if (*p == '{')
3604            {            {
3605            unsigned char *pt = p;            pcre_uint8 *pt = p;
3606            c = 0;            c = 0;
3607            while (isxdigit(*(++pt)))  
3608              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3609              when isxdigit() is a macro that refers to its argument more than
3610              once. This is banned by the C Standard, but apparently happens in at
3611              least one MacOS environment. */
3612    
3613              for (pt++; isxdigit(*pt); pt++)
3614                {
3615                if (++i == 9)
3616                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3617                                   "using only the first eight.\n");
3618                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3619                }
3620            if (*pt == '}')            if (*pt == '}')
3621              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             utn = ord2utf8(c, buff8);  
             for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
             c = buff8[ii];   /* Last byte */  
3622              p = pt + 1;              p = pt + 1;
3623              break;              break;
3624              }              }
3625            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3626            }            }
 #endif  
3627    
3628          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3629            allows UTF-8 characters to be constructed byte by byte, and also allows
3630            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3631            Otherwise, pass it down to later code so that it can be turned into
3632            UTF-8 when running in 16-bit mode. */
3633    
3634          c = 0;          c = 0;
3635          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3636            {            {
3637            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3638            p++;            p++;
3639            }            }
3640            if (use_utf)
3641              {
3642              *q++ = c;
3643              continue;
3644              }
3645          break;          break;
3646    
3647          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 1817  while (!done) Line 3649  while (!done)
3649          continue;          continue;
3650    
3651          case '>':          case '>':
3652            if (*p == '-')
3653              {
3654              start_offset_sign = -1;
3655              p++;
3656              }
3657          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3658            start_offset *= start_offset_sign;
3659          continue;          continue;
3660    
3661          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1836  while (!done) Line 3674  while (!done)
3674            }            }
3675          else if (isalnum(*p))          else if (isalnum(*p))
3676            {            {
3677            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3678            }            }
3679          else if (*p == '+')          else if (*p == '+')
3680            {            {
# Line 1852  while (!done) Line 3683  while (!done)
3683            }            }
3684          else if (*p == '-')          else if (*p == '-')
3685            {            {
3686            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3687            p++;            p++;
3688            }            }
3689          else if (*p == '!')          else if (*p == '!')
# Line 1890  while (!done) Line 3721  while (!done)
3721  #endif  #endif
3722            use_dfa = 1;            use_dfa = 1;
3723          continue;          continue;
3724    #endif
3725    
3726    #if !defined NODFA
3727          case 'F':          case 'F':
3728          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
3729          continue;          continue;
# Line 1904  while (!done) Line 3737  while (!done)
3737            }            }
3738          else if (isalnum(*p))          else if (isalnum(*p))
3739            {            {
3740            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3741            while (isalnum(*p)) *npp++ = *p++;            }
3742            *npp++ = 0;          continue;
3743            *npp = 0;  
3744            n = pcre_get_stringnumber(re, (char *)getnamesptr);          case 'J':
3745            if (n < 0)          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3746              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);          if (extra != NULL
3747            getnamesptr = npp;              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3748                && extra->executable_jit != NULL)
3749              {
3750              if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3751              jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3752              PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3753            }            }
3754          continue;          continue;
3755    
# Line 1924  while (!done) Line 3762  while (!done)
3762          continue;          continue;
3763    
3764          case 'N':          case 'N':
3765          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
3766              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3767            else
3768              options |= PCRE_NOTEMPTY;
3769          continue;          continue;
3770    
3771          case 'O':          case 'O':
# Line 1944  while (!done) Line 3785  while (!done)
3785            }            }
3786          use_size_offsets = n;          use_size_offsets = n;
3787          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
3788              else use_offsets = offsets + size_offsets_max - n;  /* To catch overruns */
3789          continue;          continue;
3790    
3791          case 'P':          case 'P':
3792          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3793              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3794          continue;          continue;
3795    
3796          case 'Q':          case 'Q':
# Line 1982  while (!done) Line 3825  while (!done)
3825          show_malloc = 1;          show_malloc = 1;
3826          continue;          continue;
3827    
3828            case 'Y':
3829            options |= PCRE_NO_START_OPTIMIZE;
3830            continue;
3831    
3832          case 'Z':          case 'Z':
3833          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
3834          continue;          continue;
# Line 1999  while (!done) Line 3846  while (!done)
3846            }            }
3847          continue;          continue;
3848          }          }
3849        *q++ = c;  
3850          /* We now have a character value in c that may be greater than 255. In
3851          16-bit mode, we always convert characters to UTF-8 so that values greater
3852          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3853          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3854          mode must have come from \x{...} or octal constructs because values from
3855          \x.. get this far only in non-UTF mode. */
3856    
3857    #if !defined NOUTF || defined SUPPORT_PCRE16
3858          if (use_pcre16 || use_utf)
3859            {
3860            pcre_uint8 buff8[8];
3861            int ii, utn;
3862            utn = ord2utf8(c, buff8);
3863            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3864            }
3865          else
3866    #endif
3867            {
3868            if (c > 255)
3869              {
3870              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3871                "and UTF-8 mode is not enabled.\n", c);
3872              fprintf(outfile, "** Truncation will probably give the wrong "
3873                "result.\n");
3874              }
3875            *q++ = c;
3876            }
3877        }        }
3878    
3879        /* Reached end of subject string */
3880    
3881      *q = 0;      *q = 0;
3882      len = q - dbuffer;      len = (int)(q - dbuffer);
3883    
3884        /* Move the data to the end of the buffer so that a read over the end of
3885        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3886        we are using the POSIX interface, we must include the terminating zero. */
3887    
3888    #if !defined NOPOSIX
3889        if (posix || do_posix)
3890          {
3891          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3892          bptr += buffer_size - len - 1;
3893          }
3894        else
3895    #endif
3896          {
3897          memmove(bptr + buffer_size - len, bptr, len);
3898          bptr += buffer_size - len;
3899          }
3900    
3901      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
3902        {        {
# Line 2023  while (!done) Line 3917  while (!done)
3917          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3918        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3919        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3920          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3921    
3922        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3923    
# Line 2044  while (!done) Line 3939  while (!done)
3939            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3940              {              {
3941              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3942              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3943                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3944              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3945              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3946                {                {
3947                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3948                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3949                  outfile);                  outfile);
3950                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3951                }                }
# Line 2058  while (!done) Line 3953  while (!done)
3953            }            }
3954          }          }
3955        free(pmatch);        free(pmatch);
3956          goto NEXT_DATA;
3957        }        }
3958    
3959    #endif  /* !defined NOPOSIX */
3960    
3961      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3962    
3963      else  #ifdef SUPPORT_PCRE16
3964  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3965          {
3966          len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3967          switch(len)
3968            {
3969            case -1:
3970            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3971              "converted to UTF-16\n");
3972            goto NEXT_DATA;
3973    
3974            case -2:
3975            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3976              "cannot be converted to UTF-16\n");
3977            goto NEXT_DATA;
3978    
3979            case -3:
3980            fprintf(outfile, "**Failed: character value greater than 0xffff "
3981              "cannot be converted to 16-bit in non-UTF mode\n");
3982            goto NEXT_DATA;
3983    
3984            default:
3985            break;
3986            }
3987          bptr = (pcre_uint8 *)buffer16;
3988          }
3989    #endif
3990    
3991        /* Ensure that there is a JIT callback if we want to verify that JIT was
3992        actually used. If jit_stack == NULL, no stack has yet been assigned. */
3993    
3994        if (verify_jit && jit_stack == NULL && extra != NULL)
3995           { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
3996    
3997      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3998        {        {
3999          markptr = NULL;
4000          jit_was_used = FALSE;
4001    
4002        if (timeitm > 0)        if (timeitm > 0)
4003          {          {
4004          register int i;          register int i;
# Line 2076  while (!done) Line 4008  while (!done)
4008  #if !defined NODFA  #if !defined NODFA
4009          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
4010            {            {
4011            int workspace[1000];            if ((options & PCRE_DFA_RESTART) != 0)
4012                {
4013                fprintf(outfile, "Timing DFA restarts is not supported\n");
4014                break;
4015                }
4016              if (dfa_workspace == NULL)
4017                dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4018            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
4019              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              {
4020                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4021                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets,
4022                  dfa_workspace, DFA_WS_DIMENSION);
4023                }
4024            }            }
4025          else          else
4026  #endif  #endif
4027    
4028          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
4029            count = pcre_exec(re, extra, (char *)bptr, len,            {
4030              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4031                (options | g_notempty), use_offsets, use_size_offsets);
4032              }
4033          time_taken = clock() - start_time;          time_taken = clock() - start_time;
4034          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
4035            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2097  while (!done) Line 4038  while (!done)
4038    
4039        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
4040        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
4041        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
4042          running of pcre_exec(), so disable the JIT optimization. This makes it
4043