/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 255 by ph10, Wed Sep 19 08:50:04 2007 UTC revision 1027 by ph10, Mon Sep 3 14:01:38 2012 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
50  #include "config.h"  #include "config.h"
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 58  POSSIBILITY OF SUCH DAMAGE.
58  #include <locale.h>  #include <locale.h>
59  #include <errno.h>  #include <errno.h>
60    
61    /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67    #ifdef HAVE_UNISTD_H
68    #include <unistd.h>
69    #endif
70    #if defined(SUPPORT_LIBREADLINE)
71    #include <readline/readline.h>
72    #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80    #endif
81    
82  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
83  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 63  input mode under Windows. */ Line 93  input mode under Windows. */
93  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
94  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
95    
96    #ifndef isatty
97    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
98    #endif                         /* though in some environments they seem to   */
99                                   /* be already defined, hence the #ifndefs.    */
100    #ifndef fileno
101    #define fileno _fileno
102    #endif
103    
104    /* A user sent this fix for Borland Builder 5 under Windows. */
105    
106    #ifdef __BORLANDC__
107    #define _setmode(handle, mode) setmode(handle, mode)
108    #endif
109    
110    /* Not Windows */
111    
112  #else  #else
113  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
114  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
115    #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
116    #define INPUT_MODE   "r"
117    #define OUTPUT_MODE  "w"
118    #else
119  #define INPUT_MODE   "rb"  #define INPUT_MODE   "rb"
120  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
121  #endif  #endif
122    #endif
123    
124    #define PRIV(name) name
125    
126  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
127  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 81  here before pcre_internal.h so that the Line 133  here before pcre_internal.h so that the
133  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
134    
135  #include "pcre.h"  #include "pcre.h"
136    
137    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
138    /* Configure internal macros to 16 bit mode. */
139    #define COMPILE_PCRE16
140    #endif
141    
142  #include "pcre_internal.h"  #include "pcre_internal.h"
143    
144  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* The pcre_printint() function, which prints the internal form of a compiled
145  two copies, we include the source file here, changing the names of the external  regex, is held in a separate file so that (a) it can be compiled in either
146  symbols to prevent clashes. */  8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
147    when that is compiled in debug mode. */
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
148    
149  #include "pcre_tables.c"  #ifdef SUPPORT_PCRE8
150    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151    #endif
152    #ifdef SUPPORT_PCRE16
153    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154    #endif
155    
156    /* We need access to some of the data tables that PCRE uses. So as not to have
157    to keep two copies, we include the source file here, changing the names of the
158    external symbols to prevent clashes. */
159    
160  /* We also need the pcre_printint() function for printing out compiled  #define PCRE_INCLUDED
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled.  
161    
162  The definition of the macro PRINTABLE, which determines whether to print an  #include "pcre_tables.c"
163    
164    /* The definition of the macro PRINTABLE, which determines whether to print an
165  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
166  contained in this file. We uses it here also, in cases when the locale has not  the same as in the printint.src file. We uses it here in cases when the locale
167  been explicitly changed, so as to get consistent output from systems that  has not been explicitly changed, so as to get consistent output from systems
168  differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
169    
170  #include "pcre_printint.src"  #ifdef EBCDIC
171    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
172    #else
173    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
174    #endif
175    
176  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
177    
178    /* Posix support is disabled in 16 bit only mode. */
179    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
180    #define NOPOSIX
181    #endif
182    
183  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
184  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 122  Makefile. */ Line 188  Makefile. */
188  #include "pcreposix.h"  #include "pcreposix.h"
189  #endif  #endif
190    
191  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
192  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
193  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
194  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
195  UTF8 support if PCRE is built without it. */  
196    #ifndef SUPPORT_UTF
197  #ifndef SUPPORT_UTF8  #ifndef NOUTF
198  #ifndef NOUTF8  #define NOUTF
199  #define NOUTF8  #endif
200  #endif  #endif
201    
202    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
203    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
204    only from one place and is handled differently). I couldn't dream up any way of
205    using a single macro to do this in a generic way, because of the many different
206    argument requirements. We know that at least one of SUPPORT_PCRE8 and
207    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
208    use these in the definitions of generic macros.
209    
210    **** Special note about the PCHARSxxx macros: the address of the string to be
211    printed is always given as two arguments: a base address followed by an offset.
212    The base address is cast to the correct data size for 8 or 16 bit data; the
213    offset is in units of this size. If the string were given as base+offset in one
214    argument, the casting might be incorrectly applied. */
215    
216    #ifdef SUPPORT_PCRE8
217    
218    #define PCHARS8(lv, p, offset, len, f) \
219      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
220    
221    #define PCHARSV8(p, offset, len, f) \
222      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
223    
224    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
225      p = read_capture_name8(p, cn8, re)
226    
227    #define STRLEN8(p) ((int)strlen((char *)p))
228    
229    #define SET_PCRE_CALLOUT8(callout) \
230      pcre_callout = callout
231    
232    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
233       pcre_assign_jit_stack(extra, callback, userdata)
234    
235    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
236      re = pcre_compile((char *)pat, options, error, erroffset, tables)
237    
238    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
239        namesptr, cbuffer, size) \
240      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
241        (char *)namesptr, cbuffer, size)
242    
243    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
244      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
245    
246    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
247        offsets, size_offsets, workspace, size_workspace) \
248      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
249        offsets, size_offsets, workspace, size_workspace)
250    
251    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
252        offsets, size_offsets) \
253      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
254        offsets, size_offsets)
255    
256    #define PCRE_FREE_STUDY8(extra) \
257      pcre_free_study(extra)
258    
259    #define PCRE_FREE_SUBSTRING8(substring) \
260      pcre_free_substring(substring)
261    
262    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
263      pcre_free_substring_list(listptr)
264    
265    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
266        getnamesptr, subsptr) \
267      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
268        (char *)getnamesptr, subsptr)
269    
270    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
271      n = pcre_get_stringnumber(re, (char *)ptr)
272    
273    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
274      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
275    
276    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
277      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
278    
279    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
280      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
281    
282    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
283      pcre_printint(re, outfile, debug_lengths)
284    
285    #define PCRE_STUDY8(extra, re, options, error) \
286      extra = pcre_study(re, options, error)
287    
288    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
289      pcre_jit_stack_alloc(startsize, maxsize)
290    
291    #define PCRE_JIT_STACK_FREE8(stack) \
292      pcre_jit_stack_free(stack)
293    
294    #endif /* SUPPORT_PCRE8 */
295    
296    /* -----------------------------------------------------------*/
297    
298    #ifdef SUPPORT_PCRE16
299    
300    #define PCHARS16(lv, p, offset, len, f) \
301      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
302    
303    #define PCHARSV16(p, offset, len, f) \
304      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
305    
306    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
307      p = read_capture_name16(p, cn16, re)
308    
309    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
310    
311    #define SET_PCRE_CALLOUT16(callout) \
312      pcre16_callout = (int (*)(pcre16_callout_block *))callout
313    
314    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
315      pcre16_assign_jit_stack((pcre16_extra *)extra, \
316        (pcre16_jit_callback)callback, userdata)
317    
318    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
319      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
320        tables)
321    
322    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
323        namesptr, cbuffer, size) \
324      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
325        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
326    
327    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
328      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
329        (PCRE_UCHAR16 *)cbuffer, size/2)
330    
331    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
332        offsets, size_offsets, workspace, size_workspace) \
333      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
334        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
335        workspace, size_workspace)
336    
337    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338        offsets, size_offsets) \
339      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
340        len, start_offset, options, offsets, size_offsets)
341    
342    #define PCRE_FREE_STUDY16(extra) \
343      pcre16_free_study((pcre16_extra *)extra)
344    
345    #define PCRE_FREE_SUBSTRING16(substring) \
346      pcre16_free_substring((PCRE_SPTR16)substring)
347    
348    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
349      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
350    
351    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
352        getnamesptr, subsptr) \
353      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
354        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
355    
356    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
357      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
358    
359    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
360      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
361        (PCRE_SPTR16 *)(void*)subsptr)
362    
363    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
364      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
365        (PCRE_SPTR16 **)(void*)listptr)
366    
367    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
368      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
369        tables)
370    
371    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
372      pcre16_printint(re, outfile, debug_lengths)
373    
374    #define PCRE_STUDY16(extra, re, options, error) \
375      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
376    
377    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
378      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
379    
380    #define PCRE_JIT_STACK_FREE16(stack) \
381      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
382    
383    #endif /* SUPPORT_PCRE16 */
384    
385    
386    /* ----- Both modes are supported; a runtime test is needed, except for
387    pcre_config(), and the JIT stack functions, when it doesn't matter which
388    version is called. ----- */
389    
390    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
391    
392    #define CHAR_SIZE (use_pcre16? 2:1)
393    
394    #define PCHARS(lv, p, offset, len, f) \
395      if (use_pcre16) \
396        PCHARS16(lv, p, offset, len, f); \
397      else \
398        PCHARS8(lv, p, offset, len, f)
399    
400    #define PCHARSV(p, offset, len, f) \
401      if (use_pcre16) \
402        PCHARSV16(p, offset, len, f); \
403      else \
404        PCHARSV8(p, offset, len, f)
405    
406    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
407      if (use_pcre16) \
408        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
409      else \
410        READ_CAPTURE_NAME8(p, cn8, cn16, re)
411    
412    #define SET_PCRE_CALLOUT(callout) \
413      if (use_pcre16) \
414        SET_PCRE_CALLOUT16(callout); \
415      else \
416        SET_PCRE_CALLOUT8(callout)
417    
418    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
419    
420    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
421      if (use_pcre16) \
422        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
423      else \
424        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
425    
426    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
427      if (use_pcre16) \
428        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
429      else \
430        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
431    
432    #define PCRE_CONFIG pcre_config
433    
434    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
435        namesptr, cbuffer, size) \
436      if (use_pcre16) \
437        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
438          namesptr, cbuffer, size); \
439      else \
440        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
441          namesptr, cbuffer, size)
442    
443    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
444      if (use_pcre16) \
445        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
446      else \
447        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
448    
449    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
450        offsets, size_offsets, workspace, size_workspace) \
451      if (use_pcre16) \
452        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
453          offsets, size_offsets, workspace, size_workspace); \
454      else \
455        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
456          offsets, size_offsets, workspace, size_workspace)
457    
458    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
459        offsets, size_offsets) \
460      if (use_pcre16) \
461        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
462          offsets, size_offsets); \
463      else \
464        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
465          offsets, size_offsets)
466    
467    #define PCRE_FREE_STUDY(extra) \
468      if (use_pcre16) \
469        PCRE_FREE_STUDY16(extra); \
470      else \
471        PCRE_FREE_STUDY8(extra)
472    
473    #define PCRE_FREE_SUBSTRING(substring) \
474      if (use_pcre16) \
475        PCRE_FREE_SUBSTRING16(substring); \
476      else \
477        PCRE_FREE_SUBSTRING8(substring)
478    
479    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
480      if (use_pcre16) \
481        PCRE_FREE_SUBSTRING_LIST16(listptr); \
482      else \
483        PCRE_FREE_SUBSTRING_LIST8(listptr)
484    
485    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
486        getnamesptr, subsptr) \
487      if (use_pcre16) \
488        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
489          getnamesptr, subsptr); \
490      else \
491        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
492          getnamesptr, subsptr)
493    
494    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
495      if (use_pcre16) \
496        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
497      else \
498        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
499    
500    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
501      if (use_pcre16) \
502        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
503      else \
504        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
505    
506    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
507      if (use_pcre16) \
508        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
509      else \
510        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
511    
512    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
513      (use_pcre16 ? \
514         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
515        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
516    
517    #define PCRE_JIT_STACK_FREE(stack) \
518      if (use_pcre16) \
519        PCRE_JIT_STACK_FREE16(stack); \
520      else \
521        PCRE_JIT_STACK_FREE8(stack)
522    
523    #define PCRE_MAKETABLES \
524      (use_pcre16? pcre16_maketables() : pcre_maketables())
525    
526    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
527      if (use_pcre16) \
528        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
529      else \
530        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
531    
532    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
533      if (use_pcre16) \
534        PCRE_PRINTINT16(re, outfile, debug_lengths); \
535      else \
536        PCRE_PRINTINT8(re, outfile, debug_lengths)
537    
538    #define PCRE_STUDY(extra, re, options, error) \
539      if (use_pcre16) \
540        PCRE_STUDY16(extra, re, options, error); \
541      else \
542        PCRE_STUDY8(extra, re, options, error)
543    
544    /* ----- Only 8-bit mode is supported ----- */
545    
546    #elif defined SUPPORT_PCRE8
547    #define CHAR_SIZE                 1
548    #define PCHARS                    PCHARS8
549    #define PCHARSV                   PCHARSV8
550    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
551    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
552    #define STRLEN                    STRLEN8
553    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
554    #define PCRE_COMPILE              PCRE_COMPILE8
555    #define PCRE_CONFIG               pcre_config
556    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
557    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
558    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
559    #define PCRE_EXEC                 PCRE_EXEC8
560    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
561    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
562    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
563    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
564    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
565    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
566    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
567    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
568    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
569    #define PCRE_MAKETABLES           pcre_maketables()
570    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
571    #define PCRE_PRINTINT             PCRE_PRINTINT8
572    #define PCRE_STUDY                PCRE_STUDY8
573    
574    /* ----- Only 16-bit mode is supported ----- */
575    
576    #else
577    #define CHAR_SIZE                 2
578    #define PCHARS                    PCHARS16
579    #define PCHARSV                   PCHARSV16
580    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
581    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
582    #define STRLEN                    STRLEN16
583    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
584    #define PCRE_COMPILE              PCRE_COMPILE16
585    #define PCRE_CONFIG               pcre16_config
586    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
587    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
588    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
589    #define PCRE_EXEC                 PCRE_EXEC16
590    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
591    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
592    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
593    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
594    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
595    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
596    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
597    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
598    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
599    #define PCRE_MAKETABLES           pcre16_maketables()
600    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
601    #define PCRE_PRINTINT             PCRE_PRINTINT16
602    #define PCRE_STUDY                PCRE_STUDY16
603  #endif  #endif
604    
605    /* ----- End of mode-specific function call macros ----- */
606    
607    
608  /* Other parameters */  /* Other parameters */
609    
# Line 145  UTF8 support if PCRE is built without it Line 615  UTF8 support if PCRE is built without it
615  #endif  #endif
616  #endif  #endif
617    
618    #if !defined NODFA
619    #define DFA_WS_DIMENSION 1000
620    #endif
621    
622  /* This is the default loop count for timing. */  /* This is the default loop count for timing. */
623    
624  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
# Line 159  static int callout_fail_count; Line 633  static int callout_fail_count;
633  static int callout_fail_id;  static int callout_fail_id;
634  static int debug_lengths;  static int debug_lengths;
635  static int first_callout;  static int first_callout;
636    static int jit_was_used;
637  static int locale_set = 0;  static int locale_set = 0;
638  static int show_malloc;  static int show_malloc;
639  static int use_utf8;  static int use_utf;
640  static size_t gotten_store;  static size_t gotten_store;
641    static size_t first_gotten_store = 0;
642    static const unsigned char *last_callout_mark = NULL;
643    
644  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
645    
646  static int buffer_size = 50000;  static int buffer_size = 50000;
647  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
648  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
649  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
650    
651    /* Another buffer is needed translation to 16-bit character strings. It will
652    obtained and extended as required. */
653    
654    #ifdef SUPPORT_PCRE16
655    static int buffer16_size = 0;
656    static pcre_uint16 *buffer16 = NULL;
657    
658    #ifdef SUPPORT_PCRE8
659    
660    /* We need the table of operator lengths that is used for 16-bit compiling, in
661    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
662    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
663    appropriately for the 16-bit world. Just as a safety check, make sure that
664    COMPILE_PCRE16 is *not* set. */
665    
666    #ifdef COMPILE_PCRE16
667    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
668    #endif
669    
670  /*************************************************  #if LINK_SIZE == 2
671  *        Read or extend an input line            *  #undef LINK_SIZE
672  *************************************************/  #define LINK_SIZE 1
673    #elif LINK_SIZE == 3 || LINK_SIZE == 4
674  /* Input lines are read into buffer, but both patterns and data lines can be  #undef LINK_SIZE
675  continued over multiple input lines. In addition, if the buffer fills up, we  #define LINK_SIZE 2
676  want to automatically expand it so as to be able to handle extremely large  #else
677  lines that are needed for certain stress tests. When the input buffer is  #error LINK_SIZE must be either 2, 3, or 4
678  expanded, the other two buffers must also be expanded likewise, and the  #endif
 contents of pbuffer, which are a copy of the input for callouts, must be  
 preserved (for when expansion happens for a data line). This is not the most  
 optimal way of handling this, but hey, this is just a test program!  
679    
680  Arguments:  #undef IMM2_SIZE
681    f            the file to read  #define IMM2_SIZE 1
   start        where in buffer to start (this *must* be within buffer)  
682    
683  Returns:       pointer to the start of new data  #endif /* SUPPORT_PCRE8 */
                could be a copy of start, or could be moved  
                NULL if no data read and EOF reached  
 */  
684    
685  static uschar *  static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
686  extend_inputline(FILE *f, uschar *start)  #endif  /* SUPPORT_PCRE16 */
 {  
 uschar *here = start;  
687    
688  for (;;)  /* If we have 8-bit support, default use_pcre16 to false; if there is also
689    {  16-bit support, it can be changed by an option. If there is no 8-bit support,
690    int rlen = buffer_size - (here - buffer);  there must be 16-bit support, so default it to 1. */
691    
692    if (rlen > 1000)  #ifdef SUPPORT_PCRE8
693      {  static int use_pcre16 = 0;
694      int dlen;  #else
695      if (fgets((char *)here, rlen,  f) == NULL)  static int use_pcre16 = 1;
696        return (here == start)? NULL : start;  #endif
     dlen = (int)strlen((char *)here);  
     if (dlen > 0 && here[dlen - 1] == '\n') return start;  
     here += dlen;  
     }  
697    
698    else  /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
     {  
     int new_buffer_size = 2*buffer_size;  
     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);  
     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);  
     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);  
699    
700      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)  static int jit_study_bits[] =
701        {    {
702        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);    PCRE_STUDY_JIT_COMPILE,
703        exit(1);    PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
704        }    PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
705      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
706      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
707      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
708      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
709        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
710    };
711    
712    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
713      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
714    
715    /* Textual explanations for runtime error codes */
716    
717    static const char *errtexts[] = {
718      NULL,  /* 0 is no error */
719      NULL,  /* NOMATCH is handled specially */
720      "NULL argument passed",
721      "bad option value",
722      "magic number missing",
723      "unknown opcode - pattern overwritten?",
724      "no more memory",
725      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
726      "match limit exceeded",
727      "callout error code",
728      NULL,  /* BADUTF8/16 is handled specially */
729      NULL,  /* BADUTF8/16 offset is handled specially */
730      NULL,  /* PARTIAL is handled specially */
731      "not used - internal error",
732      "internal error - pattern overwritten?",
733      "bad count value",
734      "item unsupported for DFA matching",
735      "backreference condition or recursion test not supported for DFA matching",
736      "match limit not supported for DFA matching",
737      "workspace size exceeded in DFA matching",
738      "too much recursion for DFA matching",
739      "recursion limit exceeded",
740      "not used - internal error",
741      "invalid combination of newline options",
742      "bad offset value",
743      NULL,  /* SHORTUTF8/16 is handled specially */
744      "nested recursion at the same subject position",
745      "JIT stack limit reached",
746      "pattern compiled in wrong mode: 8-bit/16-bit error",
747      "pattern compiled with other endianness",
748      "invalid data in workspace for DFA restart"
749    };
750    
     memcpy(new_buffer, buffer, buffer_size);  
     memcpy(new_pbuffer, pbuffer, buffer_size);  
751    
752      buffer_size = new_buffer_size;  /*************************************************
753    *         Alternate character tables             *
754    *************************************************/
755    
756      start = new_buffer + (start - buffer);  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
757      here = new_buffer + (here - buffer);  using the default tables of the library. However, the T option can be used to
758    select alternate sets of tables, for different kinds of testing. Note also that
759    the L (locale) option also adjusts the tables. */
760    
761    /* This is the set of tables distributed as default with PCRE. It recognizes
762    only ASCII characters. */
763    
764    static const pcre_uint8 tables0[] = {
765    
766    /* This table is a lower casing table. */
767    
768        0,  1,  2,  3,  4,  5,  6,  7,
769        8,  9, 10, 11, 12, 13, 14, 15,
770       16, 17, 18, 19, 20, 21, 22, 23,
771       24, 25, 26, 27, 28, 29, 30, 31,
772       32, 33, 34, 35, 36, 37, 38, 39,
773       40, 41, 42, 43, 44, 45, 46, 47,
774       48, 49, 50, 51, 52, 53, 54, 55,
775       56, 57, 58, 59, 60, 61, 62, 63,
776       64, 97, 98, 99,100,101,102,103,
777      104,105,106,107,108,109,110,111,
778      112,113,114,115,116,117,118,119,
779      120,121,122, 91, 92, 93, 94, 95,
780       96, 97, 98, 99,100,101,102,103,
781      104,105,106,107,108,109,110,111,
782      112,113,114,115,116,117,118,119,
783      120,121,122,123,124,125,126,127,
784      128,129,130,131,132,133,134,135,
785      136,137,138,139,140,141,142,143,
786      144,145,146,147,148,149,150,151,
787      152,153,154,155,156,157,158,159,
788      160,161,162,163,164,165,166,167,
789      168,169,170,171,172,173,174,175,
790      176,177,178,179,180,181,182,183,
791      184,185,186,187,188,189,190,191,
792      192,193,194,195,196,197,198,199,
793      200,201,202,203,204,205,206,207,
794      208,209,210,211,212,213,214,215,
795      216,217,218,219,220,221,222,223,
796      224,225,226,227,228,229,230,231,
797      232,233,234,235,236,237,238,239,
798      240,241,242,243,244,245,246,247,
799      248,249,250,251,252,253,254,255,
800    
801    /* This table is a case flipping table. */
802    
803        0,  1,  2,  3,  4,  5,  6,  7,
804        8,  9, 10, 11, 12, 13, 14, 15,
805       16, 17, 18, 19, 20, 21, 22, 23,
806       24, 25, 26, 27, 28, 29, 30, 31,
807       32, 33, 34, 35, 36, 37, 38, 39,
808       40, 41, 42, 43, 44, 45, 46, 47,
809       48, 49, 50, 51, 52, 53, 54, 55,
810       56, 57, 58, 59, 60, 61, 62, 63,
811       64, 97, 98, 99,100,101,102,103,
812      104,105,106,107,108,109,110,111,
813      112,113,114,115,116,117,118,119,
814      120,121,122, 91, 92, 93, 94, 95,
815       96, 65, 66, 67, 68, 69, 70, 71,
816       72, 73, 74, 75, 76, 77, 78, 79,
817       80, 81, 82, 83, 84, 85, 86, 87,
818       88, 89, 90,123,124,125,126,127,
819      128,129,130,131,132,133,134,135,
820      136,137,138,139,140,141,142,143,
821      144,145,146,147,148,149,150,151,
822      152,153,154,155,156,157,158,159,
823      160,161,162,163,164,165,166,167,
824      168,169,170,171,172,173,174,175,
825      176,177,178,179,180,181,182,183,
826      184,185,186,187,188,189,190,191,
827      192,193,194,195,196,197,198,199,
828      200,201,202,203,204,205,206,207,
829      208,209,210,211,212,213,214,215,
830      216,217,218,219,220,221,222,223,
831      224,225,226,227,228,229,230,231,
832      232,233,234,235,236,237,238,239,
833      240,241,242,243,244,245,246,247,
834      248,249,250,251,252,253,254,255,
835    
836    /* This table contains bit maps for various character classes. Each map is 32
837    bytes long and the bits run from the least significant end of each byte. The
838    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
839    graph, print, punct, and cntrl. Other classes are built from combinations. */
840    
841      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
842      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
843      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845    
846      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
847      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850    
851      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
852      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
853      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
855    
856      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
858      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
859      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
860    
861      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
862      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
863      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
864      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
865    
866      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
867      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
868      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
869      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
870    
871      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
872      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
873      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
874      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
875    
876      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
877      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
878      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
879      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
880    
881      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
882      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
883      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
884      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
885    
886      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
887      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
888      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
889      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
890    
891    /* This table identifies various classes of character by individual bits:
892      0x01   white space character
893      0x02   letter
894      0x04   decimal digit
895      0x08   hexadecimal digit
896      0x10   alphanumeric or '_'
897      0x80   regular expression metacharacter or binary zero
898    */
899    
900      free(buffer);    0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
901      free(dbuffer);    0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
902      free(pbuffer);    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
903      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
904      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
905      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
906      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
907      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
908      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
909      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
910      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
911      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
912      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
913      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
914      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
915      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
916      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
917      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
918      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
919      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
920      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
921      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
922      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
923      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
924      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
925      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
926      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
927      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
928      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
929      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
930      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
931      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
932    
933    /* This is a set of tables that came orginally from a Windows user. It seems to
934    be at least an approximation of ISO 8859. In particular, there are characters
935    greater than 128 that are marked as spaces, letters, etc. */
936    
937    static const pcre_uint8 tables1[] = {
938    0,1,2,3,4,5,6,7,
939    8,9,10,11,12,13,14,15,
940    16,17,18,19,20,21,22,23,
941    24,25,26,27,28,29,30,31,
942    32,33,34,35,36,37,38,39,
943    40,41,42,43,44,45,46,47,
944    48,49,50,51,52,53,54,55,
945    56,57,58,59,60,61,62,63,
946    64,97,98,99,100,101,102,103,
947    104,105,106,107,108,109,110,111,
948    112,113,114,115,116,117,118,119,
949    120,121,122,91,92,93,94,95,
950    96,97,98,99,100,101,102,103,
951    104,105,106,107,108,109,110,111,
952    112,113,114,115,116,117,118,119,
953    120,121,122,123,124,125,126,127,
954    128,129,130,131,132,133,134,135,
955    136,137,138,139,140,141,142,143,
956    144,145,146,147,148,149,150,151,
957    152,153,154,155,156,157,158,159,
958    160,161,162,163,164,165,166,167,
959    168,169,170,171,172,173,174,175,
960    176,177,178,179,180,181,182,183,
961    184,185,186,187,188,189,190,191,
962    224,225,226,227,228,229,230,231,
963    232,233,234,235,236,237,238,239,
964    240,241,242,243,244,245,246,215,
965    248,249,250,251,252,253,254,223,
966    224,225,226,227,228,229,230,231,
967    232,233,234,235,236,237,238,239,
968    240,241,242,243,244,245,246,247,
969    248,249,250,251,252,253,254,255,
970    0,1,2,3,4,5,6,7,
971    8,9,10,11,12,13,14,15,
972    16,17,18,19,20,21,22,23,
973    24,25,26,27,28,29,30,31,
974    32,33,34,35,36,37,38,39,
975    40,41,42,43,44,45,46,47,
976    48,49,50,51,52,53,54,55,
977    56,57,58,59,60,61,62,63,
978    64,97,98,99,100,101,102,103,
979    104,105,106,107,108,109,110,111,
980    112,113,114,115,116,117,118,119,
981    120,121,122,91,92,93,94,95,
982    96,65,66,67,68,69,70,71,
983    72,73,74,75,76,77,78,79,
984    80,81,82,83,84,85,86,87,
985    88,89,90,123,124,125,126,127,
986    128,129,130,131,132,133,134,135,
987    136,137,138,139,140,141,142,143,
988    144,145,146,147,148,149,150,151,
989    152,153,154,155,156,157,158,159,
990    160,161,162,163,164,165,166,167,
991    168,169,170,171,172,173,174,175,
992    176,177,178,179,180,181,182,183,
993    184,185,186,187,188,189,190,191,
994    224,225,226,227,228,229,230,231,
995    232,233,234,235,236,237,238,239,
996    240,241,242,243,244,245,246,215,
997    248,249,250,251,252,253,254,223,
998    192,193,194,195,196,197,198,199,
999    200,201,202,203,204,205,206,207,
1000    208,209,210,211,212,213,214,247,
1001    216,217,218,219,220,221,222,255,
1002    0,62,0,0,1,0,0,0,
1003    0,0,0,0,0,0,0,0,
1004    32,0,0,0,1,0,0,0,
1005    0,0,0,0,0,0,0,0,
1006    0,0,0,0,0,0,255,3,
1007    126,0,0,0,126,0,0,0,
1008    0,0,0,0,0,0,0,0,
1009    0,0,0,0,0,0,0,0,
1010    0,0,0,0,0,0,255,3,
1011    0,0,0,0,0,0,0,0,
1012    0,0,0,0,0,0,12,2,
1013    0,0,0,0,0,0,0,0,
1014    0,0,0,0,0,0,0,0,
1015    254,255,255,7,0,0,0,0,
1016    0,0,0,0,0,0,0,0,
1017    255,255,127,127,0,0,0,0,
1018    0,0,0,0,0,0,0,0,
1019    0,0,0,0,254,255,255,7,
1020    0,0,0,0,0,4,32,4,
1021    0,0,0,128,255,255,127,255,
1022    0,0,0,0,0,0,255,3,
1023    254,255,255,135,254,255,255,7,
1024    0,0,0,0,0,4,44,6,
1025    255,255,127,255,255,255,127,255,
1026    0,0,0,0,254,255,255,255,
1027    255,255,255,255,255,255,255,127,
1028    0,0,0,0,254,255,255,255,
1029    255,255,255,255,255,255,255,255,
1030    0,2,0,0,255,255,255,255,
1031    255,255,255,255,255,255,255,127,
1032    0,0,0,0,255,255,255,255,
1033    255,255,255,255,255,255,255,255,
1034    0,0,0,0,254,255,0,252,
1035    1,0,0,248,1,0,0,120,
1036    0,0,0,0,254,255,255,255,
1037    0,0,128,0,0,0,128,0,
1038    255,255,255,255,0,0,0,0,
1039    0,0,0,0,0,0,0,128,
1040    255,255,255,255,0,0,0,0,
1041    0,0,0,0,0,0,0,0,
1042    128,0,0,0,0,0,0,0,
1043    0,1,1,0,1,1,0,0,
1044    0,0,0,0,0,0,0,0,
1045    0,0,0,0,0,0,0,0,
1046    1,0,0,0,128,0,0,0,
1047    128,128,128,128,0,0,128,0,
1048    28,28,28,28,28,28,28,28,
1049    28,28,0,0,0,0,0,128,
1050    0,26,26,26,26,26,26,18,
1051    18,18,18,18,18,18,18,18,
1052    18,18,18,18,18,18,18,18,
1053    18,18,18,128,128,0,128,16,
1054    0,26,26,26,26,26,26,18,
1055    18,18,18,18,18,18,18,18,
1056    18,18,18,18,18,18,18,18,
1057    18,18,18,128,128,0,0,0,
1058    0,0,0,0,0,1,0,0,
1059    0,0,0,0,0,0,0,0,
1060    0,0,0,0,0,0,0,0,
1061    0,0,0,0,0,0,0,0,
1062    1,0,0,0,0,0,0,0,
1063    0,0,18,0,0,0,0,0,
1064    0,0,20,20,0,18,0,0,
1065    0,20,18,0,0,0,0,0,
1066    18,18,18,18,18,18,18,18,
1067    18,18,18,18,18,18,18,18,
1068    18,18,18,18,18,18,18,0,
1069    18,18,18,18,18,18,18,18,
1070    18,18,18,18,18,18,18,18,
1071    18,18,18,18,18,18,18,18,
1072    18,18,18,18,18,18,18,0,
1073    18,18,18,18,18,18,18,18
1074    };
1075    
     buffer = new_buffer;  
     dbuffer = new_dbuffer;  
     pbuffer = new_pbuffer;  
     }  
   }  
1076    
 return NULL;  /* Control never gets here */  
 }  
1077    
1078    
1079    #ifndef HAVE_STRERROR
1080    /*************************************************
1081    *     Provide strerror() for non-ANSI libraries  *
1082    *************************************************/
1083    
1084    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1085    in their libraries, but can provide the same facility by this simple
1086    alternative function. */
1087    
1088    extern int   sys_nerr;
1089    extern char *sys_errlist[];
1090    
1091    char *
1092    strerror(int n)
1093    {
1094    if (n < 0 || n >= sys_nerr) return "unknown error number";
1095    return sys_errlist[n];
1096    }
1097    #endif /* HAVE_STRERROR */
1098    
1099    
1100  /*************************************************  /*************************************************
1101  *          Read number from string               *  *         JIT memory callback                    *
1102  *************************************************/  *************************************************/
1103    
1104  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  static pcre_jit_stack* jit_callback(void *arg)
 around with conditional compilation, just do the job by hand. It is only used  
 for unpicking arguments, so just keep it simple.  
   
 Arguments:  
   str           string to be converted  
   endptr        where to put the end pointer  
   
 Returns:        the unsigned long  
 */  
   
 static int  
 get_value(unsigned char *str, unsigned char **endptr)  
1105  {  {
1106  int result = 0;  jit_was_used = TRUE;
1107  while(*str != 0 && isspace(*str)) str++;  return (pcre_jit_stack *)arg;
 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');  
 *endptr = str;  
 return(result);  
1108  }  }
1109    
1110    
1111    #if !defined NOUTF || defined SUPPORT_PCRE16
   
1112  /*************************************************  /*************************************************
1113  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
1114  *************************************************/  *************************************************/
# Line 297  Returns:      >  0 => the number of byte Line 1124  Returns:      >  0 => the number of byte
1124                -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
1125  */  */
1126    
 #if !defined NOUTF8  
   
1127  static int  static int
1128  utf82ord(unsigned char *utf8bytes, int *vptr)  utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1129  {  {
1130  int c = *utf8bytes++;  int c = *utf8bytes++;
1131  int d = c;  int d = c;
# Line 339  if (j != i) return -(i+1); Line 1164  if (j != i) return -(i+1);
1164  *vptr = d;  *vptr = d;
1165  return i+1;  return i+1;
1166  }  }
1167    #endif /* NOUTF || SUPPORT_PCRE16 */
 #endif  
1168    
1169    
1170    
1171    #if !defined NOUTF || defined SUPPORT_PCRE16
1172  /*************************************************  /*************************************************
1173  *       Convert character value to UTF-8         *  *       Convert character value to UTF-8         *
1174  *************************************************/  *************************************************/
# Line 358  Arguments: Line 1183  Arguments:
1183  Returns:     number of characters placed in the buffer  Returns:     number of characters placed in the buffer
1184  */  */
1185    
 #if !defined NOUTF8  
   
1186  static int  static int
1187  ord2utf8(int cvalue, uschar *utf8bytes)  ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1188  {  {
1189  register int i, j;  register int i, j;
1190  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
# Line 375  for (j = i; j > 0; j--) Line 1198  for (j = i; j > 0; j--)
1198  *utf8bytes = utf8_table2[i] | cvalue;  *utf8bytes = utf8_table2[i] | cvalue;
1199  return i + 1;  return i + 1;
1200  }  }
1201    #endif
1202    
1203    
1204    #ifdef SUPPORT_PCRE16
1205    /*************************************************
1206    *         Convert a string to 16-bit             *
1207    *************************************************/
1208    
1209    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1210    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1211    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1212    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1213    result is always left in buffer16.
1214    
1215    Note that this function does not object to surrogate values. This is
1216    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1217    for the purpose of testing that they are correctly faulted.
1218    
1219    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1220    in UTF-8 so that values greater than 255 can be handled.
1221    
1222    Arguments:
1223      data       TRUE if converting a data line; FALSE for a regex
1224      p          points to a byte string
1225      utf        true if UTF-8 (to be converted to UTF-16)
1226      len        number of bytes in the string (excluding trailing zero)
1227    
1228    Returns:     number of 16-bit data items used (excluding trailing zero)
1229                 OR -1 if a UTF-8 string is malformed
1230                 OR -2 if a value > 0x10ffff is encountered
1231                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1232    */
1233    
1234    static int
1235    to16(int data, pcre_uint8 *p, int utf, int len)
1236    {
1237    pcre_uint16 *pp;
1238    
1239    if (buffer16_size < 2*len + 2)
1240      {
1241      if (buffer16 != NULL) free(buffer16);
1242      buffer16_size = 2*len + 2;
1243      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1244      if (buffer16 == NULL)
1245        {
1246        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1247        exit(1);
1248        }
1249      }
1250    
1251    pp = buffer16;
1252    
1253    if (!utf && !data)
1254      {
1255      while (len-- > 0) *pp++ = *p++;
1256      }
1257    
1258    else
1259      {
1260      int c = 0;
1261      while (len > 0)
1262        {
1263        int chlen = utf82ord(p, &c);
1264        if (chlen <= 0) return -1;
1265        if (c > 0x10ffff) return -2;
1266        p += chlen;
1267        len -= chlen;
1268        if (c < 0x10000) *pp++ = c; else
1269          {
1270          if (!utf) return -3;
1271          c -= 0x10000;
1272          *pp++ = 0xD800 | (c >> 10);
1273          *pp++ = 0xDC00 | (c & 0x3ff);
1274          }
1275        }
1276      }
1277    
1278    *pp = 0;
1279    return pp - buffer16;
1280    }
1281    #endif
1282    
1283    
1284    /*************************************************
1285    *        Read or extend an input line            *
1286    *************************************************/
1287    
1288    /* Input lines are read into buffer, but both patterns and data lines can be
1289    continued over multiple input lines. In addition, if the buffer fills up, we
1290    want to automatically expand it so as to be able to handle extremely large
1291    lines that are needed for certain stress tests. When the input buffer is
1292    expanded, the other two buffers must also be expanded likewise, and the
1293    contents of pbuffer, which are a copy of the input for callouts, must be
1294    preserved (for when expansion happens for a data line). This is not the most
1295    optimal way of handling this, but hey, this is just a test program!
1296    
1297    Arguments:
1298      f            the file to read
1299      start        where in buffer to start (this *must* be within buffer)
1300      prompt       for stdin or readline()
1301    
1302    Returns:       pointer to the start of new data
1303                   could be a copy of start, or could be moved
1304                   NULL if no data read and EOF reached
1305    */
1306    
1307    static pcre_uint8 *
1308    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1309    {
1310    pcre_uint8 *here = start;
1311    
1312    for (;;)
1313      {
1314      size_t rlen = (size_t)(buffer_size - (here - buffer));
1315    
1316      if (rlen > 1000)
1317        {
1318        int dlen;
1319    
1320        /* If libreadline or libedit support is required, use readline() to read a
1321        line if the input is a terminal. Note that readline() removes the trailing
1322        newline, so we must put it back again, to be compatible with fgets(). */
1323    
1324    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1325        if (isatty(fileno(f)))
1326          {
1327          size_t len;
1328          char *s = readline(prompt);
1329          if (s == NULL) return (here == start)? NULL : start;
1330          len = strlen(s);
1331          if (len > 0) add_history(s);
1332          if (len > rlen - 1) len = rlen - 1;
1333          memcpy(here, s, len);
1334          here[len] = '\n';
1335          here[len+1] = 0;
1336          free(s);
1337          }
1338        else
1339    #endif
1340    
1341        /* Read the next line by normal means, prompting if the file is stdin. */
1342    
1343          {
1344          if (f == stdin) printf("%s", prompt);
1345          if (fgets((char *)here, rlen,  f) == NULL)
1346            return (here == start)? NULL : start;
1347          }
1348    
1349        dlen = (int)strlen((char *)here);
1350        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1351        here += dlen;
1352        }
1353    
1354      else
1355        {
1356        int new_buffer_size = 2*buffer_size;
1357        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1358        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1359        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1360    
1361        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1362          {
1363          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1364          exit(1);
1365          }
1366    
1367        memcpy(new_buffer, buffer, buffer_size);
1368        memcpy(new_pbuffer, pbuffer, buffer_size);
1369    
1370        buffer_size = new_buffer_size;
1371    
1372        start = new_buffer + (start - buffer);
1373        here = new_buffer + (here - buffer);
1374    
1375        free(buffer);
1376        free(dbuffer);
1377        free(pbuffer);
1378    
1379        buffer = new_buffer;
1380        dbuffer = new_dbuffer;
1381        pbuffer = new_pbuffer;
1382        }
1383      }
1384    
1385    return NULL;  /* Control never gets here */
1386    }
1387    
1388    
1389    
1390    /*************************************************
1391    *          Read number from string               *
1392    *************************************************/
1393    
1394    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1395    around with conditional compilation, just do the job by hand. It is only used
1396    for unpicking arguments, so just keep it simple.
1397    
1398    Arguments:
1399      str           string to be converted
1400      endptr        where to put the end pointer
1401    
1402    Returns:        the unsigned long
1403    */
1404    
1405    static int
1406    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1407    {
1408    int result = 0;
1409    while(*str != 0 && isspace(*str)) str++;
1410    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1411    *endptr = str;
1412    return(result);
1413    }
1414    
1415    
1416    
1417    /*************************************************
1418    *             Print one character                *
1419    *************************************************/
1420    
1421    /* Print a single character either literally, or as a hex escape. */
1422    
1423    static int pchar(int c, FILE *f)
1424    {
1425    if (PRINTOK(c))
1426      {
1427      if (f != NULL) fprintf(f, "%c", c);
1428      return 1;
1429      }
1430    
1431  #endif  if (c < 0x100)
1432      {
1433      if (use_utf)
1434        {
1435        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1436        return 6;
1437        }
1438      else
1439        {
1440        if (f != NULL) fprintf(f, "\\x%02x", c);
1441        return 4;
1442        }
1443      }
1444    
1445    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1446    return (c <= 0x000000ff)? 6 :
1447           (c <= 0x00000fff)? 7 :
1448           (c <= 0x0000ffff)? 8 :
1449           (c <= 0x000fffff)? 9 : 10;
1450    }
1451    
1452    
1453    
1454    #ifdef SUPPORT_PCRE8
1455  /*************************************************  /*************************************************
1456  *             Print character string             *  *         Print 8-bit character string           *
1457  *************************************************/  *************************************************/
1458    
1459  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1460  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1461    
1462  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1463  {  {
1464  int c = 0;  int c = 0;
1465  int yield = 0;  int yield = 0;
1466    
1467    if (length < 0)
1468      length = strlen((char *)p);
1469    
1470  while (length-- > 0)  while (length-- > 0)
1471    {    {
1472  #if !defined NOUTF8  #if !defined NOUTF
1473    if (use_utf8)    if (use_utf)
1474      {      {
1475      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
   
1476      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1477        {        {
1478        length -= rc - 1;        length -= rc - 1;
1479        p += rc;        p += rc;
1480        if (PRINTHEX(c))        yield += pchar(c, f);
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
1481        continue;        continue;
1482        }        }
1483      }      }
1484  #endif  #endif
1485      c = *p++;
1486      yield += pchar(c, f);
1487      }
1488    
1489    return yield;
1490    }
1491    #endif
1492    
    /* Not UTF-8, or malformed UTF-8  */  
1493    
1494    c = *p++;  
1495    if (PRINTHEX(c))  #ifdef SUPPORT_PCRE16
1496      {  /*************************************************
1497      if (f != NULL) fprintf(f, "%c", c);  *    Find length of 0-terminated 16-bit string   *
1498      yield++;  *************************************************/
1499      }  
1500    else  static int strlen16(PCRE_SPTR16 p)
1501    {
1502    int len = 0;
1503    while (*p++ != 0) len++;
1504    return len;
1505    }
1506    #endif  /* SUPPORT_PCRE16 */
1507    
1508    
1509    #ifdef SUPPORT_PCRE16
1510    /*************************************************
1511    *           Print 16-bit character string        *
1512    *************************************************/
1513    
1514    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1515    If handed a NULL file, just counts chars without printing. */
1516    
1517    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1518    {
1519    int yield = 0;
1520    
1521    if (length < 0)
1522      length = strlen16(p);
1523    
1524    while (length-- > 0)
1525      {
1526      int c = *p++ & 0xffff;
1527    #if !defined NOUTF
1528      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1529      {      {
1530      if (f != NULL) fprintf(f, "\\x%02x", c);      int d = *p & 0xffff;
1531      yield += 4;      if (d >= 0xDC00 && d < 0xDFFF)
1532          {
1533          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1534          length--;
1535          p++;
1536          }
1537      }      }
1538    #endif
1539      yield += pchar(c, f);
1540    }    }
1541    
1542  return yield;  return yield;
1543  }  }
1544    #endif  /* SUPPORT_PCRE16 */
1545    
1546    
1547    
1548    #ifdef SUPPORT_PCRE8
1549    /*************************************************
1550    *     Read a capture name (8-bit) and check it   *
1551    *************************************************/
1552    
1553    static pcre_uint8 *
1554    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1555    {
1556    pcre_uint8 *npp = *pp;
1557    while (isalnum(*p)) *npp++ = *p++;
1558    *npp++ = 0;
1559    *npp = 0;
1560    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1561      {
1562      fprintf(outfile, "no parentheses with name \"");
1563      PCHARSV(*pp, 0, -1, outfile);
1564      fprintf(outfile, "\"\n");
1565      }
1566    
1567    *pp = npp;
1568    return p;
1569    }
1570    #endif  /* SUPPORT_PCRE8 */
1571    
1572    
1573    
1574    #ifdef SUPPORT_PCRE16
1575    /*************************************************
1576    *     Read a capture name (16-bit) and check it  *
1577    *************************************************/
1578    
1579    /* Note that the text being read is 8-bit. */
1580    
1581    static pcre_uint8 *
1582    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1583    {
1584    pcre_uint16 *npp = *pp;
1585    while (isalnum(*p)) *npp++ = *p++;
1586    *npp++ = 0;
1587    *npp = 0;
1588    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1589      {
1590      fprintf(outfile, "no parentheses with name \"");
1591      PCHARSV(*pp, 0, -1, outfile);
1592      fprintf(outfile, "\"\n");
1593      }
1594    *pp = npp;
1595    return p;
1596    }
1597    #endif  /* SUPPORT_PCRE16 */
1598    
1599    
1600    
# Line 468  if (callout_extra) Line 1623  if (callout_extra)
1623      else      else
1624        {        {
1625        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1626        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1627          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1628        fprintf(f, "\n");        fprintf(f, "\n");
1629        }        }
# Line 481  printed lengths of the substrings. */ Line 1636  printed lengths of the substrings. */
1636    
1637  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1638    
1639  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1640  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1641    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1642    
1643  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1644    
1645  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1646    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1647    
1648  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 524  fprintf(outfile, "%.*s", (cb->next_item_ Line 1679  fprintf(outfile, "%.*s", (cb->next_item_
1679  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1680  first_callout = 0;  first_callout = 0;
1681    
1682    if (cb->mark != last_callout_mark)
1683      {
1684      if (cb->mark == NULL)
1685        fprintf(outfile, "Latest Mark: <unset>\n");
1686      else
1687        {
1688        fprintf(outfile, "Latest Mark: ");
1689        PCHARSV(cb->mark, 0, -1, outfile);
1690        putc('\n', outfile);
1691        }
1692      last_callout_mark = cb->mark;
1693      }
1694    
1695  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1696    {    {
1697    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 543  return (cb->callout_number != callout_fa Line 1711  return (cb->callout_number != callout_fa
1711  *            Local malloc functions              *  *            Local malloc functions              *
1712  *************************************************/  *************************************************/
1713    
1714  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1715  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1716    show_malloc variable is set only during matching. */
1717    
1718  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1719  {  {
1720  void *block = malloc(size);  void *block = malloc(size);
1721  gotten_store = size;  gotten_store = size;
1722    if (first_gotten_store == 0) first_gotten_store = size;
1723  if (show_malloc)  if (show_malloc)
1724    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1725  return block;  return block;
# Line 562  if (show_malloc) Line 1732  if (show_malloc)
1732  free(block);  free(block);
1733  }  }
1734    
   
1735  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1736    
1737  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 585  free(block); Line 1754  free(block);
1754  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1755  *************************************************/  *************************************************/
1756    
1757  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1758    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1759    value, but the code is defensive.
1760    
1761    Arguments:
1762      re        compiled regex
1763      study     study data
1764      option    PCRE_INFO_xxx option
1765      ptr       where to put the data
1766    
1767    Returns:    0 when OK, < 0 on error
1768    */
1769    
1770  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static int
1771    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1772  {  {
1773  int rc;  int rc;
1774  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1775    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1776    #ifdef SUPPORT_PCRE16
1777      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1778    #else
1779      rc = PCRE_ERROR_BADMODE;
1780    #endif
1781    else
1782    #ifdef SUPPORT_PCRE8
1783      rc = pcre_fullinfo(re, study, option, ptr);
1784    #else
1785      rc = PCRE_ERROR_BADMODE;
1786    #endif
1787    
1788    if (rc < 0)
1789      {
1790      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1791        use_pcre16? "16" : "", option);
1792      if (rc == PCRE_ERROR_BADMODE)
1793        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1794          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1795      }
1796    
1797    return rc;
1798  }  }
1799    
1800    
1801    
1802  /*************************************************  /*************************************************
1803  *         Byte flipping function                 *  *             Swap byte functions                *
1804  *************************************************/  *************************************************/
1805    
1806  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1807  byteflip(unsigned long int value, int n)  value, respectively.
1808    
1809    Arguments:
1810      value        any number
1811    
1812    Returns:       the byte swapped value
1813    */
1814    
1815    static pcre_uint32
1816    swap_uint32(pcre_uint32 value)
1817  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1818  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1819         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1820         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1821         ((value & 0xff000000) >> 24);         (value >> 24);
1822    }
1823    
1824    static pcre_uint16
1825    swap_uint16(pcre_uint16 value)
1826    {
1827    return (value >> 8) | (value << 8);
1828  }  }
1829    
1830    
1831    
1832    /*************************************************
1833    *        Flip bytes in a compiled pattern        *
1834    *************************************************/
1835    
1836    /* This function is called if the 'F' option was present on a pattern that is
1837    to be written to a file. We flip the bytes of all the integer fields in the
1838    regex data block and the study block. In 16-bit mode this also flips relevant
1839    bytes in the pattern itself. This is to make it possible to test PCRE's
1840    ability to reload byte-flipped patterns, e.g. those compiled on a different
1841    architecture. */
1842    
1843    static void
1844    regexflip(pcre *ere, pcre_extra *extra)
1845    {
1846    REAL_PCRE *re = (REAL_PCRE *)ere;
1847    #ifdef SUPPORT_PCRE16
1848    int op;
1849    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1850    int length = re->name_count * re->name_entry_size;
1851    #ifdef SUPPORT_UTF
1852    BOOL utf = (re->options & PCRE_UTF16) != 0;
1853    BOOL utf16_char = FALSE;
1854    #endif /* SUPPORT_UTF */
1855    #endif /* SUPPORT_PCRE16 */
1856    
1857    /* Always flip the bytes in the main data block and study blocks. */
1858    
1859    re->magic_number = REVERSED_MAGIC_NUMBER;
1860    re->size = swap_uint32(re->size);
1861    re->options = swap_uint32(re->options);
1862    re->flags = swap_uint16(re->flags);
1863    re->top_bracket = swap_uint16(re->top_bracket);
1864    re->top_backref = swap_uint16(re->top_backref);
1865    re->first_char = swap_uint16(re->first_char);
1866    re->req_char = swap_uint16(re->req_char);
1867    re->name_table_offset = swap_uint16(re->name_table_offset);
1868    re->name_entry_size = swap_uint16(re->name_entry_size);
1869    re->name_count = swap_uint16(re->name_count);
1870    
1871    if (extra != NULL)
1872      {
1873      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1874      rsd->size = swap_uint32(rsd->size);
1875      rsd->flags = swap_uint32(rsd->flags);
1876      rsd->minlength = swap_uint32(rsd->minlength);
1877      }
1878    
1879    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1880    in the name table, if present, and then in the pattern itself. */
1881    
1882    #ifdef SUPPORT_PCRE16
1883    if (!use_pcre16) return;
1884    
1885    while(TRUE)
1886      {
1887      /* Swap previous characters. */
1888      while (length-- > 0)
1889        {
1890        *ptr = swap_uint16(*ptr);
1891        ptr++;
1892        }
1893    #ifdef SUPPORT_UTF
1894      if (utf16_char)
1895        {
1896        if ((ptr[-1] & 0xfc00) == 0xd800)
1897          {
1898          /* We know that there is only one extra character in UTF-16. */
1899          *ptr = swap_uint16(*ptr);
1900          ptr++;
1901          }
1902        }
1903      utf16_char = FALSE;
1904    #endif /* SUPPORT_UTF */
1905    
1906      /* Get next opcode. */
1907    
1908      length = 0;
1909      op = *ptr;
1910      *ptr++ = swap_uint16(op);
1911    
1912      switch (op)
1913        {
1914        case OP_END:
1915        return;
1916    
1917    #ifdef SUPPORT_UTF
1918        case OP_CHAR:
1919        case OP_CHARI:
1920        case OP_NOT:
1921        case OP_NOTI:
1922        case OP_STAR:
1923        case OP_MINSTAR:
1924        case OP_PLUS:
1925        case OP_MINPLUS:
1926        case OP_QUERY:
1927        case OP_MINQUERY:
1928        case OP_UPTO:
1929        case OP_MINUPTO:
1930        case OP_EXACT:
1931        case OP_POSSTAR:
1932        case OP_POSPLUS:
1933        case OP_POSQUERY:
1934        case OP_POSUPTO:
1935        case OP_STARI:
1936        case OP_MINSTARI:
1937        case OP_PLUSI:
1938        case OP_MINPLUSI:
1939        case OP_QUERYI:
1940        case OP_MINQUERYI:
1941        case OP_UPTOI:
1942        case OP_MINUPTOI:
1943        case OP_EXACTI:
1944        case OP_POSSTARI:
1945        case OP_POSPLUSI:
1946        case OP_POSQUERYI:
1947        case OP_POSUPTOI:
1948        case OP_NOTSTAR:
1949        case OP_NOTMINSTAR:
1950        case OP_NOTPLUS:
1951        case OP_NOTMINPLUS:
1952        case OP_NOTQUERY:
1953        case OP_NOTMINQUERY:
1954        case OP_NOTUPTO:
1955        case OP_NOTMINUPTO:
1956        case OP_NOTEXACT:
1957        case OP_NOTPOSSTAR:
1958        case OP_NOTPOSPLUS:
1959        case OP_NOTPOSQUERY:
1960        case OP_NOTPOSUPTO:
1961        case OP_NOTSTARI:
1962        case OP_NOTMINSTARI:
1963        case OP_NOTPLUSI:
1964        case OP_NOTMINPLUSI:
1965        case OP_NOTQUERYI:
1966        case OP_NOTMINQUERYI:
1967        case OP_NOTUPTOI:
1968        case OP_NOTMINUPTOI:
1969        case OP_NOTEXACTI:
1970        case OP_NOTPOSSTARI:
1971        case OP_NOTPOSPLUSI:
1972        case OP_NOTPOSQUERYI:
1973        case OP_NOTPOSUPTOI:
1974        if (utf) utf16_char = TRUE;
1975    #endif
1976        /* Fall through. */
1977    
1978        default:
1979        length = OP_lengths16[op] - 1;
1980        break;
1981    
1982        case OP_CLASS:
1983        case OP_NCLASS:
1984        /* Skip the character bit map. */
1985        ptr += 32/sizeof(pcre_uint16);
1986        length = 0;
1987        break;
1988    
1989        case OP_XCLASS:
1990        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1991        if (LINK_SIZE > 1)
1992          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1993            - (1 + LINK_SIZE + 1));
1994        else
1995          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1996    
1997        /* Reverse the size of the XCLASS instance. */
1998        *ptr = swap_uint16(*ptr);
1999        ptr++;
2000        if (LINK_SIZE > 1)
2001          {
2002          *ptr = swap_uint16(*ptr);
2003          ptr++;
2004          }
2005    
2006        op = *ptr;
2007        *ptr = swap_uint16(op);
2008        ptr++;
2009        if ((op & XCL_MAP) != 0)
2010          {
2011          /* Skip the character bit map. */
2012          ptr += 32/sizeof(pcre_uint16);
2013          length -= 32/sizeof(pcre_uint16);
2014          }
2015        break;
2016        }
2017      }
2018    /* Control should never reach here in 16 bit mode. */
2019    #endif /* SUPPORT_PCRE16 */
2020    }
2021    
2022    
2023    
2024  /*************************************************  /*************************************************
2025  *        Check match or recursion limit          *  *        Check match or recursion limit          *
2026  *************************************************/  *************************************************/
2027    
2028  static int  static int
2029  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2030    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
2031    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
2032  {  {
# Line 633  for (;;) Line 2041  for (;;)
2041    {    {
2042    *limit = mid;    *limit = mid;
2043    
2044    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2045      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2046    
2047    if (count == errnumber)    if (count == errnumber)
# Line 678  Returns:    < 0, = 0, or > 0, according Line 2086  Returns:    < 0, = 0, or > 0, according
2086  */  */
2087    
2088  static int  static int
2089  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2090  {  {
2091  while (n--)  while (n--)
2092    {    {
# Line 694  return 0; Line 2102  return 0;
2102  *         Check newline indicator                *  *         Check newline indicator                *
2103  *************************************************/  *************************************************/
2104    
2105  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
2106  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
2107    
2108  Arguments:  Arguments:
2109    p           points after the leading '<'    p           points after the leading '<'
# Line 706  Returns:      appropriate PCRE_NEWLINE_x Line 2113  Returns:      appropriate PCRE_NEWLINE_x
2113  */  */
2114    
2115  static int  static int
2116  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2117  {  {
2118  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2119  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2120  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2121  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2122  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2123  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2124  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2125  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2126  return 0;  return 0;
2127  }  }
# Line 728  return 0; Line 2135  return 0;
2135  static void  static void
2136  usage(void)  usage(void)
2137  {  {
2138  printf("Usage:     pcretest [options] [<input> [<output>]]\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2139  printf("  -b       show compiled code (bytecode)\n");  printf("Input and output default to stdin and stdout.\n");
2140    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2141    printf("If input is a terminal, readline() is used to read from it.\n");
2142    #else
2143    printf("This version of pcretest is not linked with readline().\n");
2144    #endif
2145    printf("\nOptions:\n");
2146    #ifdef SUPPORT_PCRE16
2147    printf("  -16      use the 16-bit library\n");
2148    #endif
2149    printf("  -b       show compiled code\n");
2150  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2151    printf("  -C arg   show a specific compile-time option\n");
2152    printf("           and exit with its value. The arg can be:\n");
2153    printf("     linksize     internal link size [2, 3, 4]\n");
2154    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2155    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2156    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2157    printf("     ucp          Unicode Properties supported [0, 1]\n");
2158    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2159    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2160  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2161  #if !defined NODFA  #if !defined NODFA
2162  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
2163  #endif  #endif
2164  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
2165  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
2166           "  -M       find MATCH_LIMIT minimum for each subject\n"
2167         "  -m       output memory used information\n"         "  -m       output memory used information\n"
2168         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
2169  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 744  printf("  -p       use POSIX interface\n Line 2171  printf("  -p       use POSIX interface\n
2171  #endif  #endif
2172  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
2173  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2174  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
2175           "  -s+      force each pattern to be studied, using JIT if available\n"
2176           "  -s++     ditto, verifying when JIT was actually used\n"
2177           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2178           "             where 1 <= n <= 7 selects JIT options\n"
2179           "  -s++n    ditto, verifying when JIT was actually used\n"
2180         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2181  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2182  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 764  options, followed by a set of test data, Line 2196  options, followed by a set of test data,
2196  int main(int argc, char **argv)  int main(int argc, char **argv)
2197  {  {
2198  FILE *infile = stdin;  FILE *infile = stdin;
2199    const char *version;
2200  int options = 0;  int options = 0;
2201  int study_options = 0;  int study_options = 0;
2202    int default_find_match_limit = FALSE;
2203  int op = 1;  int op = 1;
2204  int timeit = 0;  int timeit = 0;
2205  int timeitm = 0;  int timeitm = 0;
2206  int showinfo = 0;  int showinfo = 0;
2207  int showstore = 0;  int showstore = 0;
2208    int force_study = -1;
2209    int force_study_options = 0;
2210  int quiet = 0;  int quiet = 0;
2211  int size_offsets = 45;  int size_offsets = 45;
2212  int size_offsets_max;  int size_offsets_max;
2213  int *offsets = NULL;  int *offsets = NULL;
 #if !defined NOPOSIX  
 int posix = 0;  
 #endif  
2214  int debug = 0;  int debug = 0;
2215  int done = 0;  int done = 0;
2216  int all_use_dfa = 0;  int all_use_dfa = 0;
2217    int verify_jit = 0;
2218  int yield = 0;  int yield = 0;
2219  int stack_size;  int stack_size;
2220    
2221  /* These vectors store, end-to-end, a list of captured substring names. Assume  #if !defined NOPOSIX
2222  that 1024 is plenty long enough for the few names we'll be testing. */  int posix = 0;
2223    #endif
2224    #if !defined NODFA
2225    int *dfa_workspace = NULL;
2226    #endif
2227    
2228  uschar copynames[1024];  pcre_jit_stack *jit_stack = NULL;
 uschar getnames[1024];  
2229    
2230  uschar *copynamesptr;  /* These vectors store, end-to-end, a list of zero-terminated captured
2231  uschar *getnamesptr;  substring names, each list itself being terminated by an empty name. Assume
2232    that 1024 is plenty long enough for the few names we'll be testing. It is
2233    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2234    for the actual memory, to ensure alignment. */
2235    
2236    pcre_uint16 copynames[1024];
2237    pcre_uint16 getnames[1024];
2238    
2239    #ifdef SUPPORT_PCRE16
2240    pcre_uint16 *cn16ptr;
2241    pcre_uint16 *gn16ptr;
2242    #endif
2243    
2244  /* Get buffers from malloc() so that Electric Fence will check their misuse  #ifdef SUPPORT_PCRE8
2245  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2246    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2247    pcre_uint8 *cn8ptr;
2248    pcre_uint8 *gn8ptr;
2249    #endif
2250    
2251  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2252  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
2253  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
2254    
2255    buffer = (pcre_uint8 *)malloc(buffer_size);
2256    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2257    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2258    
2259  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2260    
# Line 813  it set 0x8000, but then I was advised th Line 2269  it set 0x8000, but then I was advised th
2269  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2270  #endif  #endif
2271    
2272    /* Get the version number: both pcre_version() and pcre16_version() give the
2273    same answer. We just need to ensure that we call one that is available. */
2274    
2275    #ifdef SUPPORT_PCRE8
2276    version = pcre_version();
2277    #else
2278    version = pcre16_version();
2279    #endif
2280    
2281  /* Scan options */  /* Scan options */
2282    
2283  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2284    {    {
2285    unsigned char *endptr;    pcre_uint8 *endptr;
2286      char *arg = argv[op];
2287    
2288    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(arg, "-m") == 0) showstore = 1;
2289      showstore = 1;    else if (strcmp(arg, "-s") == 0) force_study = 0;
2290    else if (strcmp(argv[op], "-q") == 0) quiet = 1;  
2291    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strncmp(arg, "-s+", 3) == 0)
2292    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;      {
2293    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;      arg += 3;
2294        if (*arg == '+') { arg++; verify_jit = TRUE; }
2295        force_study = 1;
2296        if (*arg == 0)
2297          force_study_options = jit_study_bits[6];
2298        else if (*arg >= '1' && *arg <= '7')
2299          force_study_options = jit_study_bits[*arg - '1'];
2300        else goto BAD_ARG;
2301        }
2302      else if (strcmp(arg, "-16") == 0)
2303        {
2304    #ifdef SUPPORT_PCRE16
2305        use_pcre16 = 1;
2306    #else
2307        printf("** This version of PCRE was built without 16-bit support\n");
2308        exit(1);
2309    #endif
2310        }
2311      else if (strcmp(arg, "-q") == 0) quiet = 1;
2312      else if (strcmp(arg, "-b") == 0) debug = 1;
2313      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2314      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2315      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2316  #if !defined NODFA  #if !defined NODFA
2317    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2318  #endif  #endif
2319    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2320        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2321          *endptr == 0))          *endptr == 0))
2322      {      {
2323      op++;      op++;
2324      argc--;      argc--;
2325      }      }
2326    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2327      {      {
2328      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2329      int temp;      int temp;
2330      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2331                       *endptr == 0))                       *endptr == 0))
2332        {        {
2333        timeitm = temp;        timeitm = temp;
# Line 849  while (argc > 1 && argv[op][0] == '-') Line 2337  while (argc > 1 && argv[op][0] == '-')
2337      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2338      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2339      }      }
2340    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2341        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2342          *endptr == 0))          *endptr == 0))
2343      {      {
2344  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2345      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2346      exit(1);      exit(1);
2347  #else  #else
# Line 872  while (argc > 1 && argv[op][0] == '-') Line 2360  while (argc > 1 && argv[op][0] == '-')
2360  #endif  #endif
2361      }      }
2362  #if !defined NOPOSIX  #if !defined NOPOSIX
2363    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2364  #endif  #endif
2365    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2366      {      {
2367      int rc;      int rc;
2368      printf("PCRE version %s\n", pcre_version());      unsigned long int lrc;
2369    
2370        if (argc > 2)
2371          {
2372          if (strcmp(argv[op + 1], "linksize") == 0)
2373            {
2374            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2375            printf("%d\n", rc);
2376            yield = rc;
2377            goto EXIT;
2378            }
2379          if (strcmp(argv[op + 1], "pcre8") == 0)
2380            {
2381    #ifdef SUPPORT_PCRE8
2382            printf("1\n");
2383            yield = 1;
2384    #else
2385            printf("0\n");
2386            yield = 0;
2387    #endif
2388            goto EXIT;
2389            }
2390          if (strcmp(argv[op + 1], "pcre16") == 0)
2391            {
2392    #ifdef SUPPORT_PCRE16
2393            printf("1\n");
2394            yield = 1;
2395    #else
2396            printf("0\n");
2397            yield = 0;
2398    #endif
2399            goto EXIT;
2400            }
2401          if (strcmp(argv[op + 1], "utf") == 0)
2402            {
2403    #ifdef SUPPORT_PCRE8
2404            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2405            printf("%d\n", rc);
2406            yield = rc;
2407    #else
2408            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2409            printf("%d\n", rc);
2410            yield = rc;
2411    #endif
2412            goto EXIT;
2413            }
2414          if (strcmp(argv[op + 1], "ucp") == 0)
2415            {
2416            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2417            printf("%d\n", rc);
2418            yield = rc;
2419            goto EXIT;
2420            }
2421          if (strcmp(argv[op + 1], "jit") == 0)
2422            {
2423            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2424            printf("%d\n", rc);
2425            yield = rc;
2426            goto EXIT;
2427            }
2428          if (strcmp(argv[op + 1], "newline") == 0)
2429            {
2430            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2431            /* Note that these values are always the ASCII values, even
2432            in EBCDIC environments. CR is 13 and NL is 10. */
2433            printf("%s\n", (rc == 13)? "CR" :
2434              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2435              (rc == -2)? "ANYCRLF" :
2436              (rc == -1)? "ANY" : "???");
2437            goto EXIT;
2438            }
2439          printf("Unknown -C option: %s\n", argv[op + 1]);
2440          goto EXIT;
2441          }
2442    
2443        printf("PCRE version %s\n", version);
2444      printf("Compiled with\n");      printf("Compiled with\n");
2445    
2446    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2447    are set, either both UTFs are supported or both are not supported. */
2448    
2449    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2450        printf("  8-bit and 16-bit support\n");
2451        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2452        if (rc)
2453          printf("  UTF-8 and UTF-16 support\n");
2454        else
2455          printf("  No UTF-8 or UTF-16 support\n");
2456    #elif defined SUPPORT_PCRE8
2457        printf("  8-bit support only\n");
2458      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2459      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2460      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2461        printf("  16-bit support only\n");
2462        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2463        printf("  %sUTF-16 support\n", rc? "" : "No ");
2464    #endif
2465    
2466        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2467      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2468      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2469      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      if (rc)
2470        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        {
2471          const char *arch;
2472          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2473          printf("  Just-in-time compiler support: %s\n", arch);
2474          }
2475        else
2476          printf("  No just-in-time compiler support\n");
2477        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2478        /* Note that these values are always the ASCII values, even
2479        in EBCDIC environments. CR is 13 and NL is 10. */
2480        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2481          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2482        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
2483        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2484      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2485      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2486                                       "all Unicode newlines");                                       "all Unicode newlines");
2487      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2488      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2489      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2490      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2491      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2492      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
2493      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2494      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
2495      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2496      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
2497        if (showstore)
2498          {
2499          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2500          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2501          }
2502        printf("\n");
2503      goto EXIT;      goto EXIT;
2504      }      }
2505    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
2506             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
2507      {      {
2508      usage();      usage();
2509      goto EXIT;      goto EXIT;
2510      }      }
2511    else    else
2512      {      {
2513      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
2514        printf("** Unknown or malformed option %s\n", arg);
2515      usage();      usage();
2516      yield = 1;      yield = 1;
2517      goto EXIT;      goto EXIT;
# Line 958  if (argc > 2) Line 2558  if (argc > 2)
2558    
2559  /* Set alternative malloc function */  /* Set alternative malloc function */
2560    
2561    #ifdef SUPPORT_PCRE8
2562  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2563  pcre_free = new_free;  pcre_free = new_free;
2564  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2565  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2566    #endif
2567    
2568    #ifdef SUPPORT_PCRE16
2569    pcre16_malloc = new_malloc;
2570    pcre16_free = new_free;
2571    pcre16_stack_malloc = stack_malloc;
2572    pcre16_stack_free = stack_free;
2573    #endif
2574    
2575  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2576    
2577  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2578    
2579  /* Main loop */  /* Main loop */
2580    
# Line 980  while (!done) Line 2589  while (!done)
2589  #endif  #endif
2590    
2591    const char *error;    const char *error;
2592    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
2593    unsigned char *to_file = NULL;    pcre_uint8 *p, *pp, *ppp;
2594    const unsigned char *tables = NULL;    pcre_uint8 *to_file = NULL;
2595      const pcre_uint8 *tables = NULL;
2596      unsigned long int get_options;
2597    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2598    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2599      int do_allcaps = 0;
2600      int do_mark = 0;
2601    int do_study = 0;    int do_study = 0;
2602      int no_force_study = 0;
2603    int do_debug = debug;    int do_debug = debug;
2604    int do_G = 0;    int do_G = 0;
2605    int do_g = 0;    int do_g = 0;
2606    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2607    int do_showrest = 0;    int do_showrest = 0;
2608      int do_showcaprest = 0;
2609    int do_flip = 0;    int do_flip = 0;
2610    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2611    
2612    use_utf8 = 0;  #if !defined NODFA
2613      int dfa_matched = 0;
2614    #endif
2615    
2616      use_utf = 0;
2617    debug_lengths = 1;    debug_lengths = 1;
2618    
2619    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (extend_inputline(infile, buffer) == NULL) break;  
2620    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2621    fflush(outfile);    fflush(outfile);
2622    
# Line 1010  while (!done) Line 2628  while (!done)
2628    
2629    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2630      {      {
2631      unsigned long int magic, get_options;      pcre_uint32 magic;
2632      uschar sbuf[8];      pcre_uint8 sbuf[8];
2633      FILE *f;      FILE *f;
2634    
2635      p++;      p++;
2636        if (*p == '!')
2637          {
2638          do_debug = TRUE;
2639          do_showinfo = TRUE;
2640          p++;
2641          }
2642    
2643      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2644      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2645      *pp = 0;      *pp = 0;
# Line 1026  while (!done) Line 2651  while (!done)
2651        continue;        continue;
2652        }        }
2653    
2654        first_gotten_store = 0;
2655      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2656    
2657      true_size =      true_size =
# Line 1033  while (!done) Line 2659  while (!done)
2659      true_study_size =      true_study_size =
2660        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2661    
2662      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
2663      regex_gotten_store = gotten_store;      if (re == NULL)
2664          {
2665          printf("** Failed to get %d bytes of memory for pcre object\n",
2666            (int)true_size);
2667          yield = 1;
2668          goto EXIT;
2669          }
2670        regex_gotten_store = first_gotten_store;
2671    
2672      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2673    
2674      magic = ((real_pcre *)re)->magic_number;      magic = ((REAL_PCRE *)re)->magic_number;
2675      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2676        {        {
2677        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2678          {          {
2679          do_flip = 1;          do_flip = 1;
2680          }          }
2681        else        else
2682          {          {
2683          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2684            new_free(re);
2685          fclose(f);          fclose(f);
2686          continue;          continue;
2687          }          }
2688        }        }
2689    
2690      fprintf(outfile, "Compiled regex%s loaded from %s\n",      /* We hide the byte-invert info for little and big endian tests. */
2691        do_flip? " (byte-inverted)" : "", p);      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2692          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
     /* Need to know if UTF-8 for printing data strings */  
2693    
2694      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      /* Now see if there is any following study data. */
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
   
     /* Now see if there is any following study data */  
2695    
2696      if (true_study_size != 0)      if (true_study_size != 0)
2697        {        {
# Line 1077  while (!done) Line 2707  while (!done)
2707          {          {
2708          FAIL_READ:          FAIL_READ:
2709          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2710          if (extra != NULL) new_free(extra);          if (extra != NULL)
2711          if (re != NULL) new_free(re);            {
2712              PCRE_FREE_STUDY(extra);
2713              }
2714            new_free(re);
2715          fclose(f);          fclose(f);
2716          continue;          continue;
2717          }          }
# Line 1087  while (!done) Line 2720  while (!done)
2720        }        }
2721      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2722    
2723        /* Flip the necessary bytes. */
2724        if (do_flip)
2725          {
2726          int rc;
2727          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2728          if (rc == PCRE_ERROR_BADMODE)
2729            {
2730            /* Simulate the result of the function call below. */
2731            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2732              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2733            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2734              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2735            new_free(re);
2736            fclose(f);
2737            continue;
2738            }
2739          }
2740    
2741        /* Need to know if UTF-8 for printing data strings. */
2742    
2743        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2744          {
2745          new_free(re);
2746          fclose(f);
2747          continue;
2748          }
2749        use_utf = (get_options & PCRE_UTF8) != 0;
2750    
2751      fclose(f);      fclose(f);
2752      goto SHOW_INFO;      goto SHOW_INFO;
2753      }      }
2754    
2755    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2756    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2757    
2758    delimiter = *p++;    delimiter = *p++;
2759    
2760    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
2761      {      {
2762      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2763      goto SKIP_DATA;      goto SKIP_DATA;
2764      }      }
2765    
2766    pp = p;    pp = p;
2767    poffset = p - buffer;    poffset = (int)(p - buffer);
2768    
2769    for(;;)    for(;;)
2770      {      {
# Line 1114  while (!done) Line 2775  while (!done)
2775        pp++;        pp++;
2776        }        }
2777      if (*pp != 0) break;      if (*pp != 0) break;
2778      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if ((pp = extend_inputline(infile, pp)) == NULL)  
2779        {        {
2780        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
2781        done = 1;        done = 1;
# Line 1144  while (!done) Line 2804  while (!done)
2804    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2805    
2806    options = 0;    options = 0;
2807    study_options = 0;    study_options = force_study_options;
2808    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
2809    
2810    while (*pp != 0)    while (*pp != 0)
# Line 1158  while (!done) Line 2818  while (!done)
2818        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2819        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2820    
2821        case '+': do_showrest = 1; break;        case '+':
2822          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2823          break;
2824    
2825          case '=': do_allcaps = 1; break;
2826        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2827        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2828        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1168  while (!done) Line 2832  while (!done)
2832        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2833        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2834        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
2835          case 'K': do_mark = 1; break;
2836        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2837        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2838    
# Line 1175  while (!done) Line 2840  while (!done)
2840        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2841  #endif  #endif
2842    
2843        case 'S': do_study = 1; break;        case 'S':
2844          do_study = 1;
2845          for (;;)
2846            {
2847            switch (*pp++)
2848              {
2849              case 'S':
2850              do_study = 0;
2851              no_force_study = 1;
2852              break;
2853    
2854              case '!':
2855              study_options |= PCRE_STUDY_EXTRA_NEEDED;
2856              break;
2857    
2858              case '+':
2859              if (*pp == '+')
2860                {
2861                verify_jit = TRUE;
2862                pp++;
2863                }
2864              if (*pp >= '1' && *pp <= '7')
2865                study_options |= jit_study_bits[*pp++ - '1'];
2866              else
2867                study_options |= jit_study_bits[6];
2868              break;
2869    
2870              case '-':
2871              study_options &= ~PCRE_STUDY_ALLJIT;
2872              break;
2873    
2874              default:
2875              pp--;
2876              goto ENDLOOP;
2877              }
2878            }
2879          ENDLOOP:
2880          break;
2881    
2882        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2883          case 'W': options |= PCRE_UCP; break;
2884        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2885          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2886        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2887        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2888        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2889    
2890          case 'T':
2891          switch (*pp++)
2892            {
2893            case '0': tables = tables0; break;
2894            case '1': tables = tables1; break;
2895    
2896            case '\r':
2897            case '\n':
2898            case ' ':
2899            case 0:
2900            fprintf(outfile, "** Missing table number after /T\n");
2901            goto SKIP_DATA;
2902    
2903            default:
2904            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2905            goto SKIP_DATA;
2906            }
2907          break;
2908    
2909        case 'L':        case 'L':
2910        ppp = pp;        ppp = pp;
2911        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1194  while (!done) Line 2918  while (!done)
2918          goto SKIP_DATA;          goto SKIP_DATA;
2919          }          }
2920        locale_set = 1;        locale_set = 1;
2921        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2922        pp = ppp;        pp = ppp;
2923        break;        break;
2924    
# Line 1207  while (!done) Line 2931  while (!done)
2931    
2932        case '<':        case '<':
2933          {          {
2934          int x = check_newline(pp, outfile);          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2935          if (x == 0) goto SKIP_DATA;            {
2936          options |= x;            options |= PCRE_JAVASCRIPT_COMPAT;
2937          while (*pp++ != '>');            pp += 3;
2938              }
2939            else
2940              {
2941              int x = check_newline(pp, outfile);
2942              if (x == 0) goto SKIP_DATA;
2943              options |= x;
2944              while (*pp++ != '>');
2945              }
2946          }          }
2947        break;        break;
2948    
# Line 1227  while (!done) Line 2959  while (!done)
2959    
2960    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2961    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2962    local character tables. */    local character tables. Neither does it have 16-bit support. */
2963    
2964  #if !defined NOPOSIX  #if !defined NOPOSIX
2965    if (posix || do_posix)    if (posix || do_posix)
# Line 1240  while (!done) Line 2972  while (!done)
2972      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2973      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2974      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2975        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2976        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2977    
2978        first_gotten_store = 0;
2979      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2980    
2981      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1260  while (!done) Line 2995  while (!done)
2995  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2996    
2997      {      {
2998        /* In 16-bit mode, convert the input. */
2999    
3000    #ifdef SUPPORT_PCRE16
3001        if (use_pcre16)
3002          {
3003          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3004            {
3005            case -1:
3006            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3007              "converted to UTF-16\n");
3008            goto SKIP_DATA;
3009    
3010            case -2:
3011            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3012              "cannot be converted to UTF-16\n");
3013            goto SKIP_DATA;
3014    
3015            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3016            fprintf(outfile, "**Failed: character value greater than 0xffff "
3017              "cannot be converted to 16-bit in non-UTF mode\n");
3018            goto SKIP_DATA;
3019    
3020            default:
3021            break;
3022            }
3023          p = (pcre_uint8 *)buffer16;
3024          }
3025    #endif
3026    
3027        /* Compile many times when timing */
3028    
3029      if (timeit > 0)      if (timeit > 0)
3030        {        {
3031        register int i;        register int i;
# Line 1267  while (!done) Line 3033  while (!done)
3033        clock_t start_time = clock();        clock_t start_time = clock();
3034        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
3035          {          {
3036          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3037          if (re != NULL) free(re);          if (re != NULL) free(re);
3038          }          }
3039        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1276  while (!done) Line 3042  while (!done)
3042            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
3043        }        }
3044    
3045      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
3046        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3047    
3048      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
3049      if non-interactive. */      if non-interactive. */
# Line 1289  while (!done) Line 3056  while (!done)
3056          {          {
3057          for (;;)          for (;;)
3058            {            {
3059            if (extend_inputline(infile, buffer) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
3060              {              {
3061              done = 1;              done = 1;
3062              goto CONTINUE;              goto CONTINUE;
# Line 1303  while (!done) Line 3070  while (!done)
3070        goto CONTINUE;        goto CONTINUE;
3071        }        }
3072    
3073      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
3074      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
3075      returns only limited data. Check that it agrees with the newer one. */      lines. */
3076    
3077      if (log_store)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3078        fprintf(outfile, "Memory allocation (code space): %d\n",        goto SKIP_DATA;
3079          (int)(gotten_store -      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
3080    
3081      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3082      and remember the store that was got. */      and remember the store that was got. */
3083    
3084      true_size = ((real_pcre *)re)->size;      true_size = ((REAL_PCRE *)re)->size;
3085      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3086    
3087        /* Output code size information if requested */
3088    
3089      /* If /S was present, study the regexp to generate additional info to      if (log_store)
3090      help with the matching. */        fprintf(outfile, "Memory allocation (code space): %d\n",
3091            (int)(first_gotten_store -
3092                  sizeof(REAL_PCRE) -
3093                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3094    
3095        /* If -s or /S was present, study the regex to generate additional info to
3096        help with the matching, unless the pattern has the SS option, which
3097        suppresses the effect of /S (used for a few test patterns where studying is
3098        never sensible). */
3099    
3100      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
3101        {        {
3102        if (timeit > 0)        if (timeit > 0)
3103          {          {
# Line 1330  while (!done) Line 3105  while (!done)
3105          clock_t time_taken;          clock_t time_taken;
3106          clock_t start_time = clock();          clock_t start_time = clock();
3107          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3108            extra = pcre_study(re, study_options, &error);            {
3109              PCRE_STUDY(extra, re, study_options, &error);
3110              }
3111          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3112          if (extra != NULL) free(extra);          if (extra != NULL)
3113              {
3114              PCRE_FREE_STUDY(extra);
3115              }
3116          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3117            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3118              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3119          }          }
3120        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options, &error);
3121        if (error != NULL)        if (error != NULL)
3122          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3123        else if (extra != NULL)        else if (extra != NULL)
3124            {
3125          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3126            if (log_store)
3127              {
3128              size_t jitsize;
3129              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3130                  jitsize != 0)
3131                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3132              }
3133            }
3134        }        }
3135    
3136      /* If the 'F' option was present, we flip the bytes of all the integer      /* If /K was present, we set up for handling MARK data. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
3137    
3138      if (do_flip)      if (do_mark)
3139        {        {
3140        real_pcre *rre = (real_pcre *)re;        if (extra == NULL)
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
3141          {          {
3142          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3143          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          extra->flags = 0;
         rsd->options = byteflip(rsd->options, sizeof(rsd->options));  
3144          }          }
3145          extra->mark = &markptr;
3146          extra->flags |= PCRE_EXTRA_MARK;
3147        }        }
3148    
3149      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
3150    
3151      SHOW_INFO:      SHOW_INFO:
3152    
3153      if (do_debug)      if (do_debug)
3154        {        {
3155        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3156        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3157        }        }
3158    
3159        /* We already have the options in get_options (see above) */
3160    
3161      if (do_showinfo)      if (do_showinfo)
3162        {        {
3163        unsigned long int get_options, all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3164        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3165          hascrorlf;          hascrorlf, maxlookbehind;
3166        int nameentrysize, namecount;        int nameentrysize, namecount;
3167        const uschar *nametable;        const pcre_uint8 *nametable;
3168    
3169        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3170        new_info(re, NULL, PCRE_INFO_SIZE, &size);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3171        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3172        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3173        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3174        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3175        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3176        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3177        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3178        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3179        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3180        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3181              != 0)
3182  #if !defined NOINFOCHECK          goto SKIP_DATA;
       old_count = pcre_info(re, &old_options, &old_first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3183    
3184        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3185          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1447  while (!done) Line 3194  while (!done)
3194          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3195          while (namecount-- > 0)          while (namecount-- > 0)
3196            {            {
3197            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3198              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3199              GET2(nametable, 0));  #else
3200              int imm2_size = IMM2_SIZE;
3201    #endif
3202              int length = (int)STRLEN(nametable + imm2_size);
3203              fprintf(outfile, "  ");
3204              PCHARSV(nametable, imm2_size, length, outfile);
3205              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3206    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3207              fprintf(outfile, "%3d\n", use_pcre16?
3208                 (int)(((PCRE_SPTR16)nametable)[0])
3209                :((int)nametable[0] << 8) | (int)nametable[1]);
3210              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3211    #else
3212              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3213    #ifdef SUPPORT_PCRE8
3214            nametable += nameentrysize;            nametable += nameentrysize;
3215    #else
3216              nametable += nameentrysize * 2;
3217    #endif
3218    #endif
3219            }            }
3220          }          }
3221    
3222        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3223        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3224    
3225        all_options = ((real_pcre *)re)->options;        all_options = ((REAL_PCRE *)re)->options;
3226        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3227    
3228        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3229          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3230            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3231            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3232            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1474  while (!done) Line 3239  while (!done)
3239            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3240            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3241            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3242            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3243            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3244              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3245              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3246            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3247    
3248        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1516  while (!done) Line 3283  while (!done)
3283          }          }
3284        else        else
3285          {          {
3286          int ch = first_char & 255;          const char *caseless =
3287          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3288            "" : " (caseless)";            "" : " (caseless)";
3289          if (PRINTHEX(ch))  
3290            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3291              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3292          else          else
3293            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3294              fprintf(outfile, "First char = ");
3295              pchar(first_char, outfile);
3296              fprintf(outfile, "%s\n", caseless);
3297              }
3298          }          }
3299    
3300        if (need_char < 0)        if (need_char < 0)
# Line 1531  while (!done) Line 3303  while (!done)
3303          }          }
3304        else        else
3305          {          {
3306          int ch = need_char & 255;          const char *caseless =
3307          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3308            "" : " (caseless)";            "" : " (caseless)";
3309          if (PRINTHEX(ch))  
3310            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3311              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3312          else          else
3313            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3314              fprintf(outfile, "Need char = ");
3315              pchar(need_char, outfile);
3316              fprintf(outfile, "%s\n", caseless);
3317              }
3318          }          }
3319    
3320          if (maxlookbehind > 0)
3321            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3322    
3323        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3324        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3325        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
3326        flipped.) */        flipped.) If study was forced by an external -s, don't show this
3327          information unless -i or -d was also present. This means that, except
3328          when auto-callouts are involved, the output from runs with and without
3329          -s should be identical. */
3330    
3331        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3332          {          {
3333          if (extra == NULL)          if (extra == NULL)
3334            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3335          else          else
3336            {            {
3337            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3338            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
3339    
3340            if (start_bits == NULL)            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3341              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3342            else  
3343              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3344              {              {
3345              int i;              if (start_bits == NULL)
3346              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3347              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3348                {                {
3349                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3350                  int c = 24;
3351                  fprintf(outfile, "Starting byte set: ");
3352                  for (i = 0; i < 256; i++)
3353                  {                  {
3354                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
3355                    {                    {
3356                    fprintf(outfile, "\n  ");                    if (c > 75)
3357                    c = 2;                      {
3358                    }                      fprintf(outfile, "\n  ");
3359                  if (PRINTHEX(i) && i != ' ')                      c = 2;
3360                    {                      }
3361                    fprintf(outfile, "%c ", i);                    if (PRINTOK(i) && i != ' ')
3362                    c += 2;                      {
3363                    }                      fprintf(outfile, "%c ", i);
3364                  else                      c += 2;
3365                    {                      }
3366                    fprintf(outfile, "\\x%02x ", i);                    else
3367                    c += 5;                      {
3368                        fprintf(outfile, "\\x%02x ", i);
3369                        c += 5;
3370                        }
3371                    }                    }
3372                  }                  }
3373                  fprintf(outfile, "\n");
3374                }                }
3375              fprintf(outfile, "\n");              }
3376              }
3377    
3378            /* Show this only if the JIT was set by /S, not by -s. */
3379    
3380            if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3381                (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3382              {
3383              int jit;
3384              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3385                {
3386                if (jit)
3387                  fprintf(outfile, "JIT study was successful\n");
3388                else
3389    #ifdef SUPPORT_JIT
3390                  fprintf(outfile, "JIT study was not successful\n");
3391    #else
3392                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3393    #endif
3394              }              }
3395            }            }
3396          }          }
# Line 1601  while (!done) Line 3409  while (!done)
3409          }          }
3410        else        else
3411          {          {
3412          uschar sbuf[8];          pcre_uint8 sbuf[8];
3413          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3414          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3415          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3416          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3417            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3418          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3419          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3420          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3421          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3422            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3423    
3424          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3425              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1619  while (!done) Line 3428  while (!done)
3428            }            }
3429          else          else
3430            {            {
3431            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3432    
3433              /* If there is study data, write it. */
3434    
3435            if (extra != NULL)            if (extra != NULL)
3436              {              {
3437              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1629  while (!done) Line 3441  while (!done)
3441                  strerror(errno));                  strerror(errno));
3442                }                }
3443              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
3444              }              }
3445            }            }
3446          fclose(f);          fclose(f);
3447          }          }
3448    
3449        new_free(re);        new_free(re);
3450        if (extra != NULL) new_free(extra);        if (extra != NULL)
3451        if (tables != NULL) new_free((void *)tables);          {
3452            PCRE_FREE_STUDY(extra);
3453            }
3454          if (locale_set)
3455            {
3456            new_free((void *)tables);
3457            setlocale(LC_CTYPE, "C");
3458            locale_set = 0;
3459            }
3460        continue;  /* With next regex */        continue;  /* With next regex */
3461        }        }
3462      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1646  while (!done) Line 3465  while (!done)
3465    
3466    for (;;)    for (;;)
3467      {      {
3468      uschar *q;      pcre_uint8 *q;
3469      uschar *bptr;      pcre_uint8 *bptr;
3470      int *use_offsets = offsets;      int *use_offsets = offsets;
3471      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3472      int callout_data = 0;      int callout_data = 0;
3473      int callout_data_set = 0;      int callout_data_set = 0;
3474      int count, c;      int count, c;
3475      int copystrings = 0;      int copystrings = 0;
3476      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
3477      int getstrings = 0;      int getstrings = 0;
3478      int getlist = 0;      int getlist = 0;
3479      int gmatched = 0;      int gmatched = 0;
3480      int start_offset = 0;      int start_offset = 0;
3481        int start_offset_sign = 1;
3482      int g_notempty = 0;      int g_notempty = 0;
3483      int use_dfa = 0;      int use_dfa = 0;
3484    
     options = 0;  
   
3485      *copynames = 0;      *copynames = 0;
3486      *getnames = 0;      *getnames = 0;
3487    
3488      copynamesptr = copynames;  #ifdef SUPPORT_PCRE16
3489      getnamesptr = getnames;      cn16ptr = copynames;
3490        gn16ptr = getnames;
3491    #endif
3492    #ifdef SUPPORT_PCRE8
3493        cn8ptr = copynames8;
3494        gn8ptr = getnames8;
3495    #endif
3496    
3497      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3498      first_callout = 1;      first_callout = 1;
3499        last_callout_mark = NULL;
3500      callout_extra = 0;      callout_extra = 0;
3501      callout_count = 0;      callout_count = 0;
3502      callout_fail_count = 999999;      callout_fail_count = 999999;
3503      callout_fail_id = -1;      callout_fail_id = -1;
3504      show_malloc = 0;      show_malloc = 0;
3505        options = 0;
3506    
3507      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3508        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 1684  while (!done) Line 3510  while (!done)
3510      len = 0;      len = 0;
3511      for (;;)      for (;;)
3512        {        {
3513        if (infile == stdin) printf("data> ");        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
       if (extend_inputline(infile, buffer + len) == NULL)  
3514          {          {
3515          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
3516              {
3517              fprintf(outfile, "\n");
3518              break;
3519              }
3520          done = 1;          done = 1;
3521          goto CONTINUE;          goto CONTINUE;
3522          }          }
# Line 1709  while (!done) Line 3538  while (!done)
3538        int i = 0;        int i = 0;
3539        int n = 0;        int n = 0;
3540    
3541        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3542          In non-UTF mode, allow the value of the byte to fall through to later,
3543          where values greater than 127 are turned into UTF-8 when running in
3544          16-bit mode. */
3545    
3546          if (c != '\\')
3547            {
3548            if (use_utf)
3549              {
3550              *q++ = c;
3551              continue;
3552              }
3553            }
3554    
3555          /* Handle backslash escapes */
3556    
3557          else switch ((c = *p++))
3558          {          {
3559          case 'a': c =    7; break;          case 'a': c =    7; break;
3560          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 1725  while (!done) Line 3570  while (!done)
3570          c -= '0';          c -= '0';
3571          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3572            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3573          break;          break;
3574    
3575          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3576          if (*p == '{')          if (*p == '{')
3577            {            {
3578            unsigned char *pt = p;            pcre_uint8 *pt = p;
3579            c = 0;            c = 0;
3580            while (isxdigit(*(++pt)))  
3581              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3582              when isxdigit() is a macro that refers to its argument more than
3583              once. This is banned by the C Standard, but apparently happens in at
3584              least one MacOS environment. */
3585    
3586              for (pt++; isxdigit(*pt); pt++)
3587                {
3588                if (++i == 9)
3589                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3590                                   "using only the first eight.\n");
3591                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3592                }
3593            if (*pt == '}')            if (*pt == '}')
3594              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             utn = ord2utf8(c, buff8);  
             for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
             c = buff8[ii];   /* Last byte */  
3595              p = pt + 1;              p = pt + 1;
3596              break;              break;
3597              }              }
3598            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3599            }            }
 #endif  
3600    
3601          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3602            allows UTF-8 characters to be constructed byte by byte, and also allows
3603            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3604            Otherwise, pass it down to later code so that it can be turned into
3605            UTF-8 when running in 16-bit mode. */
3606    
3607          c = 0;          c = 0;
3608          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3609            {            {
3610            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3611            p++;            p++;
3612            }            }
3613            if (use_utf)
3614              {
3615              *q++ = c;
3616              continue;
3617              }
3618          break;          break;
3619    
3620          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 1778  while (!done) Line 3622  while (!done)
3622          continue;          continue;
3623    
3624          case '>':          case '>':
3625            if (*p == '-')
3626              {
3627              start_offset_sign = -1;
3628              p++;
3629              }
3630          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3631            start_offset *= start_offset_sign;
3632          continue;          continue;
3633    
3634          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1797  while (!done) Line 3647  while (!done)
3647            }            }
3648          else if (isalnum(*p))          else if (isalnum(*p))
3649            {            {
3650            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3651            }            }
3652          else if (*p == '+')          else if (*p == '+')
3653            {            {
# Line 1813  while (!done) Line 3656  while (!done)
3656            }            }
3657          else if (*p == '-')          else if (*p == '-')
3658            {            {
3659            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3660            p++;            p++;
3661            }            }
3662          else if (*p == '!')          else if (*p == '!')
# Line 1851  while (!done) Line 3694  while (!done)
3694  #endif  #endif
3695            use_dfa = 1;            use_dfa = 1;
3696          continue;          continue;
3697    #endif
3698    
3699    #if !defined NODFA
3700          case 'F':          case 'F':
3701          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
3702          continue;          continue;
# Line 1865  while (!done) Line 3710  while (!done)
3710            }            }
3711          else if (isalnum(*p))          else if (isalnum(*p))
3712            {            {
3713            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3714            while (isalnum(*p)) *npp++ = *p++;            }
3715            *npp++ = 0;          continue;
3716            *npp = 0;  
3717            n = pcre_get_stringnumber(re, (char *)getnamesptr);          case 'J':
3718            if (n < 0)          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3719              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);          if (extra != NULL
3720            getnamesptr = npp;              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3721                && extra->executable_jit != NULL)
3722              {
3723              if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3724              jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3725              PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3726            }            }
3727          continue;          continue;
3728    
# Line 1885  while (!done) Line 3735  while (!done)
3735          continue;          continue;
3736    
3737          case 'N':          case 'N':
3738          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
3739              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3740            else
3741              options |= PCRE_NOTEMPTY;
3742          continue;          continue;
3743    
3744          case 'O':          case 'O':
# Line 1905  while (!done) Line 3758  while (!done)
3758            }            }
3759          use_size_offsets = n;          use_size_offsets = n;
3760          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
3761              else use_offsets = offsets + size_offsets_max - n;  /* To catch overruns */
3762          continue;          continue;
3763    
3764          case 'P':          case 'P':
3765          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3766              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3767          continue;          continue;
3768    
3769          case 'Q':          case 'Q':
# Line 1943  while (!done) Line 3798  while (!done)
3798          show_malloc = 1;          show_malloc = 1;
3799          continue;          continue;
3800    
3801            case 'Y':
3802            options |= PCRE_NO_START_OPTIMIZE;
3803            continue;
3804    
3805          case 'Z':          case 'Z':
3806          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
3807          continue;          continue;
# Line 1960  while (!done) Line 3819  while (!done)
3819            }            }
3820          continue;          continue;
3821          }          }
3822        *q++ = c;  
3823          /* We now have a character value in c that may be greater than 255. In
3824          16-bit mode, we always convert characters to UTF-8 so that values greater
3825          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3826          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3827          mode must have come from \x{...} or octal constructs because values from
3828          \x.. get this far only in non-UTF mode. */
3829    
3830    #if !defined NOUTF || defined SUPPORT_PCRE16
3831          if (use_pcre16 || use_utf)
3832            {
3833            pcre_uint8 buff8[8];
3834            int ii, utn;
3835            utn = ord2utf8(c, buff8);
3836            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3837            }
3838          else
3839    #endif
3840            {
3841            if (c > 255)
3842              {
3843              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3844                "and UTF-8 mode is not enabled.\n", c);
3845              fprintf(outfile, "** Truncation will probably give the wrong "
3846                "result.\n");
3847              }
3848            *q++ = c;
3849            }
3850        }        }
3851    
3852        /* Reached end of subject string */
3853    
3854      *q = 0;      *q = 0;
3855      len = q - dbuffer;      len = (int)(q - dbuffer);
3856    
3857        /* Move the data to the end of the buffer so that a read over the end of
3858        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3859        we are using the POSIX interface, we must include the terminating zero. */
3860    
3861    #if !defined NOPOSIX
3862        if (posix || do_posix)
3863          {
3864          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3865          bptr += buffer_size - len - 1;
3866          }
3867        else
3868    #endif
3869          {
3870          memmove(bptr + buffer_size - len, bptr, len);
3871          bptr += buffer_size - len;
3872          }
3873    
3874      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
3875        {        {
# Line 1984  while (!done) Line 3890  while (!done)
3890          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3891        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3892        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3893          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3894    
3895        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3896    
# Line 2005  while (!done) Line 3912  while (!done)
3912            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3913              {              {
3914              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3915              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3916                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3917              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3918              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3919                {                {
3920                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3921                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3922                  outfile);                  outfile);
3923                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3924                }                }
# Line 2019  while (!done) Line 3926  while (!done)
3926            }            }
3927          }          }
3928        free(pmatch);        free(pmatch);
3929          goto NEXT_DATA;
3930        }        }
3931    
3932    #endif  /* !defined NOPOSIX */
3933    
3934      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3935    
3936      else  #ifdef SUPPORT_PCRE16
3937  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3938          {
3939          len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3940          switch(len)
3941            {
3942            case -1:
3943            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3944              "converted to UTF-16\n");
3945            goto NEXT_DATA;
3946    
3947            case -2:
3948            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3949              "cannot be converted to UTF-16\n");
3950            goto NEXT_DATA;
3951    
3952            case -3:
3953            fprintf(outfile, "**Failed: character value greater than 0xffff "
3954              "cannot be converted to 16-bit in non-UTF mode\n");
3955            goto NEXT_DATA;
3956    
3957            default:
3958            break;
3959            }
3960          bptr = (pcre_uint8 *)buffer16;
3961          }
3962    #endif
3963    
3964        /* Ensure that there is a JIT callback if we want to verify that JIT was
3965        actually used. If jit_stack == NULL, no stack has yet been assigned. */
3966    
3967        if (verify_jit && jit_stack == NULL && extra != NULL)
3968           { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
3969    
3970      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3971        {        {
3972          markptr = NULL;
3973          jit_was_used = FALSE;
3974    
3975        if (timeitm > 0)        if (timeitm > 0)
3976          {          {
3977          register int i;          register int i;
# Line 2037  while (!done) Line 3981  while (!done)
3981  #if !defined NODFA  #if !defined NODFA