/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 227 by ph10, Tue Aug 21 15:00:15 2007 UTC revision 890 by ph10, Wed Jan 18 16:25:19 2012 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
51  #include <config.h>  #include "config.h"
52  #endif  #endif
53    
54  #include <ctype.h>  #include <ctype.h>
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 59  POSSIBILITY OF SUCH DAMAGE.
59  #include <locale.h>  #include <locale.h>
60  #include <errno.h>  #include <errno.h>
61    
62    #ifdef SUPPORT_LIBREADLINE
63    #ifdef HAVE_UNISTD_H
64    #include <unistd.h>
65    #endif
66    #include <readline/readline.h>
67    #include <readline/history.h>
68    #endif
69    
70    
71  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
72  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 63  input mode under Windows. */ Line 82  input mode under Windows. */
82  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
83  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
84    
85    #ifndef isatty
86    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
87    #endif                         /* though in some environments they seem to   */
88                                   /* be already defined, hence the #ifndefs.    */
89    #ifndef fileno
90    #define fileno _fileno
91    #endif
92    
93    /* A user sent this fix for Borland Builder 5 under Windows. */
94    
95    #ifdef __BORLANDC__
96    #define _setmode(handle, mode) setmode(handle, mode)
97    #endif
98    
99    /* Not Windows */
100    
101  #else  #else
102  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
103  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 81  here before pcre_internal.h so that the Line 116  here before pcre_internal.h so that the
116  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
117    
118  #include "pcre.h"  #include "pcre.h"
119    
120    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121    /* Configure internal macros to 16 bit mode. */
122    #define COMPILE_PCRE16
123    #endif
124    
125  #include "pcre_internal.h"  #include "pcre_internal.h"
126    
127  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* The pcre_printint() function, which prints the internal form of a compiled
128  two copies, we include the source file here, changing the names of the external  regex, is held in a separate file so that (a) it can be compiled in either
129  symbols to prevent clashes. */  8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130    when that is compiled in debug mode. */
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_OP_lengths       OP_lengths  
131    
132  #include "pcre_tables.c"  #ifdef SUPPORT_PCRE8
133    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134    #endif
135    #ifdef SUPPORT_PCRE16
136    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137    #endif
138    
139    /* We need access to some of the data tables that PCRE uses. So as not to have
140    to keep two copies, we include the source file here, changing the names of the
141    external symbols to prevent clashes. */
142    
143    #define PCRE_INCLUDED
144    #undef PRIV
145    #define PRIV(name) name
146    
147  /* We also need the pcre_printint() function for printing out compiled  #include "pcre_tables.c"
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled.  
148    
149  The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
150  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
151  contained in this file. We uses it here also, in cases when the locale has not  the same as in the printint.src file. We uses it here in cases when the locale
152  been explicitly changed, so as to get consistent output from systems that  has not been explicitly changed, so as to get consistent output from systems
153  differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
154    
155  #include "pcre_printint.src"  #ifdef EBCDIC
156    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157    #else
158    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159    #endif
160    
161  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162    
163    /* Posix support is disabled in 16 bit only mode. */
164    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165    #define NOPOSIX
166    #endif
167    
168  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
169  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 121  Makefile. */ Line 173  Makefile. */
173  #include "pcreposix.h"  #include "pcreposix.h"
174  #endif  #endif
175    
176  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
177  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
180  UTF8 support if PCRE is built without it. */  
181    #ifndef SUPPORT_UTF
182  #ifndef SUPPORT_UTF8  #ifndef NOUTF
183  #ifndef NOUTF8  #define NOUTF
 #define NOUTF8  
184  #endif  #endif
185  #endif  #endif
186    
187    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189    only from one place and is handled differently). I couldn't dream up any way of
190    using a single macro to do this in a generic way, because of the many different
191    argument requirements. We know that at least one of SUPPORT_PCRE8 and
192    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193    use these in the definitions of generic macros.
194    
195    **** Special note about the PCHARSxxx macros: the address of the string to be
196    printed is always given as two arguments: a base address followed by an offset.
197    The base address is cast to the correct data size for 8 or 16 bit data; the
198    offset is in units of this size. If the string were given as base+offset in one
199    argument, the casting might be incorrectly applied. */
200    
201    #ifdef SUPPORT_PCRE8
202    
203    #define PCHARS8(lv, p, offset, len, f) \
204      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205    
206    #define PCHARSV8(p, offset, len, f) \
207      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208    
209    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210      p = read_capture_name8(p, cn8, re)
211    
212    #define STRLEN8(p) ((int)strlen((char *)p))
213    
214    #define SET_PCRE_CALLOUT8(callout) \
215      pcre_callout = callout
216    
217    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
218       pcre_assign_jit_stack(extra, callback, userdata)
219    
220    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
221      re = pcre_compile((char *)pat, options, error, erroffset, tables)
222    
223    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
224        namesptr, cbuffer, size) \
225      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
226        (char *)namesptr, cbuffer, size)
227    
228    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
229      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
230    
231    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
232        offsets, size_offsets, workspace, size_workspace) \
233      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
234        offsets, size_offsets, workspace, size_workspace)
235    
236    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
237        offsets, size_offsets) \
238      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
239        offsets, size_offsets)
240    
241    #define PCRE_FREE_STUDY8(extra) \
242      pcre_free_study(extra)
243    
244    #define PCRE_FREE_SUBSTRING8(substring) \
245      pcre_free_substring(substring)
246    
247    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
248      pcre_free_substring_list(listptr)
249    
250    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251        getnamesptr, subsptr) \
252      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
253        (char *)getnamesptr, subsptr)
254    
255    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
256      n = pcre_get_stringnumber(re, (char *)ptr)
257    
258    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
259      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
260    
261    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
262      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
263    
264    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
265      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
266    
267    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
268      pcre_printint(re, outfile, debug_lengths)
269    
270    #define PCRE_STUDY8(extra, re, options, error) \
271      extra = pcre_study(re, options, error)
272    
273    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
274      pcre_jit_stack_alloc(startsize, maxsize)
275    
276    #define PCRE_JIT_STACK_FREE8(stack) \
277      pcre_jit_stack_free(stack)
278    
279    #endif /* SUPPORT_PCRE8 */
280    
281    /* -----------------------------------------------------------*/
282    
283    #ifdef SUPPORT_PCRE16
284    
285    #define PCHARS16(lv, p, offset, len, f) \
286      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
287    
288    #define PCHARSV16(p, offset, len, f) \
289      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
290    
291    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
292      p = read_capture_name16(p, cn16, re)
293    
294    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
295    
296    #define SET_PCRE_CALLOUT16(callout) \
297      pcre16_callout = (int (*)(pcre16_callout_block *))callout
298    
299    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
300      pcre16_assign_jit_stack((pcre16_extra *)extra, \
301        (pcre16_jit_callback)callback, userdata)
302    
303    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
304      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
305        tables)
306    
307    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
308        namesptr, cbuffer, size) \
309      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
310        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
311    
312    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
313      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
314        (PCRE_UCHAR16 *)cbuffer, size/2)
315    
316    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
317        offsets, size_offsets, workspace, size_workspace) \
318      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
319        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
320        workspace, size_workspace)
321    
322    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
323        offsets, size_offsets) \
324      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
325        len, start_offset, options, offsets, size_offsets)
326    
327    #define PCRE_FREE_STUDY16(extra) \
328      pcre16_free_study((pcre16_extra *)extra)
329    
330    #define PCRE_FREE_SUBSTRING16(substring) \
331      pcre16_free_substring((PCRE_SPTR16)substring)
332    
333    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
334      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
335    
336    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
337        getnamesptr, subsptr) \
338      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
339        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
340    
341    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
342      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
343    
344    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
345      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
346        (PCRE_SPTR16 *)(void*)subsptr)
347    
348    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
349      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
350        (PCRE_SPTR16 **)(void*)listptr)
351    
352    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
353      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
354        tables)
355    
356    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
357      pcre16_printint(re, outfile, debug_lengths)
358    
359    #define PCRE_STUDY16(extra, re, options, error) \
360      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
361    
362    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
363      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
364    
365    #define PCRE_JIT_STACK_FREE16(stack) \
366      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
367    
368    #endif /* SUPPORT_PCRE16 */
369    
370    
371    /* ----- Both modes are supported; a runtime test is needed, except for
372    pcre_config(), and the JIT stack functions, when it doesn't matter which
373    version is called. ----- */
374    
375    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
376    
377    #define CHAR_SIZE (use_pcre16? 2:1)
378    
379    #define PCHARS(lv, p, offset, len, f) \
380      if (use_pcre16) \
381        PCHARS16(lv, p, offset, len, f); \
382      else \
383        PCHARS8(lv, p, offset, len, f)
384    
385    #define PCHARSV(p, offset, len, f) \
386      if (use_pcre16) \
387        PCHARSV16(p, offset, len, f); \
388      else \
389        PCHARSV8(p, offset, len, f)
390    
391    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
392      if (use_pcre16) \
393        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
394      else \
395        READ_CAPTURE_NAME8(p, cn8, cn16, re)
396    
397    #define SET_PCRE_CALLOUT(callout) \
398      if (use_pcre16) \
399        SET_PCRE_CALLOUT16(callout); \
400      else \
401        SET_PCRE_CALLOUT8(callout)
402    
403    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
404    
405    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
406      if (use_pcre16) \
407        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
408      else \
409        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
410    
411    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
412      if (use_pcre16) \
413        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
414      else \
415        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
416    
417    #define PCRE_CONFIG pcre_config
418    
419    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
420        namesptr, cbuffer, size) \
421      if (use_pcre16) \
422        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
423          namesptr, cbuffer, size); \
424      else \
425        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
426          namesptr, cbuffer, size)
427    
428    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
429      if (use_pcre16) \
430        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
431      else \
432        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
433    
434    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
435        offsets, size_offsets, workspace, size_workspace) \
436      if (use_pcre16) \
437        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
438          offsets, size_offsets, workspace, size_workspace); \
439      else \
440        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
441          offsets, size_offsets, workspace, size_workspace)
442    
443    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
444        offsets, size_offsets) \
445      if (use_pcre16) \
446        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
447          offsets, size_offsets); \
448      else \
449        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
450          offsets, size_offsets)
451    
452    #define PCRE_FREE_STUDY(extra) \
453      if (use_pcre16) \
454        PCRE_FREE_STUDY16(extra); \
455      else \
456        PCRE_FREE_STUDY8(extra)
457    
458    #define PCRE_FREE_SUBSTRING(substring) \
459      if (use_pcre16) \
460        PCRE_FREE_SUBSTRING16(substring); \
461      else \
462        PCRE_FREE_SUBSTRING8(substring)
463    
464    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
465      if (use_pcre16) \
466        PCRE_FREE_SUBSTRING_LIST16(listptr); \
467      else \
468        PCRE_FREE_SUBSTRING_LIST8(listptr)
469    
470    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
471        getnamesptr, subsptr) \
472      if (use_pcre16) \
473        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
474          getnamesptr, subsptr); \
475      else \
476        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
477          getnamesptr, subsptr)
478    
479    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
480      if (use_pcre16) \
481        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
482      else \
483        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
484    
485    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
486      if (use_pcre16) \
487        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
488      else \
489        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
490    
491    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
492      if (use_pcre16) \
493        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
494      else \
495        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
496    
497    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
498      (use_pcre16 ? \
499         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
500        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
501    
502    #define PCRE_JIT_STACK_FREE(stack) \
503      if (use_pcre16) \
504        PCRE_JIT_STACK_FREE16(stack); \
505      else \
506        PCRE_JIT_STACK_FREE8(stack)
507    
508    #define PCRE_MAKETABLES \
509      (use_pcre16? pcre16_maketables() : pcre_maketables())
510    
511    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
512      if (use_pcre16) \
513        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
514      else \
515        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
516    
517    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
518      if (use_pcre16) \
519        PCRE_PRINTINT16(re, outfile, debug_lengths); \
520      else \
521        PCRE_PRINTINT8(re, outfile, debug_lengths)
522    
523    #define PCRE_STUDY(extra, re, options, error) \
524      if (use_pcre16) \
525        PCRE_STUDY16(extra, re, options, error); \
526      else \
527        PCRE_STUDY8(extra, re, options, error)
528    
529    /* ----- Only 8-bit mode is supported ----- */
530    
531    #elif defined SUPPORT_PCRE8
532    #define CHAR_SIZE                 1
533    #define PCHARS                    PCHARS8
534    #define PCHARSV                   PCHARSV8
535    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
536    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
537    #define STRLEN                    STRLEN8
538    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
539    #define PCRE_COMPILE              PCRE_COMPILE8
540    #define PCRE_CONFIG               pcre_config
541    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
542    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
543    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
544    #define PCRE_EXEC                 PCRE_EXEC8
545    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
546    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
547    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
548    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
549    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
550    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
551    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
552    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
553    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
554    #define PCRE_MAKETABLES           pcre_maketables()
555    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
556    #define PCRE_PRINTINT             PCRE_PRINTINT8
557    #define PCRE_STUDY                PCRE_STUDY8
558    
559    /* ----- Only 16-bit mode is supported ----- */
560    
561    #else
562    #define CHAR_SIZE                 2
563    #define PCHARS                    PCHARS16
564    #define PCHARSV                   PCHARSV16
565    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
566    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
567    #define STRLEN                    STRLEN16
568    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
569    #define PCRE_COMPILE              PCRE_COMPILE16
570    #define PCRE_CONFIG               pcre16_config
571    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
572    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
573    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
574    #define PCRE_EXEC                 PCRE_EXEC16
575    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
576    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
577    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
578    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
579    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
580    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
581    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
582    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
583    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
584    #define PCRE_MAKETABLES           pcre16_maketables()
585    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
586    #define PCRE_PRINTINT             PCRE_PRINTINT16
587    #define PCRE_STUDY                PCRE_STUDY16
588    #endif
589    
590    /* ----- End of mode-specific function call macros ----- */
591    
592    
593  /* Other parameters */  /* Other parameters */
594    
# Line 160  static int debug_lengths; Line 616  static int debug_lengths;
616  static int first_callout;  static int first_callout;
617  static int locale_set = 0;  static int locale_set = 0;
618  static int show_malloc;  static int show_malloc;
619  static int use_utf8;  static int use_utf;
620  static size_t gotten_store;  static size_t gotten_store;
621    static size_t first_gotten_store = 0;
622    static const unsigned char *last_callout_mark = NULL;
623    
624  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
625    
626  static int buffer_size = 50000;  static int buffer_size = 50000;
627  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
628  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
629  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
630    
631    /* Another buffer is needed translation to 16-bit character strings. It will
632    obtained and extended as required. */
633    
634    #ifdef SUPPORT_PCRE16
635    static int buffer16_size = 0;
636    static pcre_uint16 *buffer16 = NULL;
637    
638    #ifdef SUPPORT_PCRE8
639    
640    /* We need the table of operator lengths that is used for 16-bit compiling, in
641    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
642    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
643    appropriately for the 16-bit world. Just as a safety check, make sure that
644    COMPILE_PCRE16 is *not* set. */
645    
646    #ifdef COMPILE_PCRE16
647    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
648    #endif
649    
650    #if LINK_SIZE == 2
651    #undef LINK_SIZE
652    #define LINK_SIZE 1
653    #elif LINK_SIZE == 3 || LINK_SIZE == 4
654    #undef LINK_SIZE
655    #define LINK_SIZE 2
656    #else
657    #error LINK_SIZE must be either 2, 3, or 4
658    #endif
659    
660  /*************************************************  #undef IMM2_SIZE
661  *        Read or extend an input line            *  #define IMM2_SIZE 1
 *************************************************/  
   
 /* Input lines are read into buffer, but both patterns and data lines can be  
 continued over multiple input lines. In addition, if the buffer fills up, we  
 want to automatically expand it so as to be able to handle extremely large  
 lines that are needed for certain stress tests. When the input buffer is  
 expanded, the other two buffers must also be expanded likewise, and the  
 contents of pbuffer, which are a copy of the input for callouts, must be  
 preserved (for when expansion happens for a data line). This is not the most  
 optimal way of handling this, but hey, this is just a test program!  
   
 Arguments:  
   f            the file to read  
   start        where in buffer to start (this *must* be within buffer)  
662    
663  Returns:       pointer to the start of new data  #endif /* SUPPORT_PCRE8 */
                could be a copy of start, or could be moved  
                NULL if no data read and EOF reached  
 */  
664    
665  static uschar *  static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
666  extend_inputline(FILE *f, uschar *start)  #endif  /* SUPPORT_PCRE16 */
 {  
 uschar *here = start;  
667    
668  for (;;)  /* If we have 8-bit support, default use_pcre16 to false; if there is also
669    {  16-bit support, it can be changed by an option. If there is no 8-bit support,
670    int rlen = buffer_size - (here - buffer);  there must be 16-bit support, so default it to 1. */
671    
672    if (rlen > 1000)  #ifdef SUPPORT_PCRE8
673      {  static int use_pcre16 = 0;
674      int dlen;  #else
675      if (fgets((char *)here, rlen,  f) == NULL)  static int use_pcre16 = 1;
676        return (here == start)? NULL : start;  #endif
     dlen = (int)strlen((char *)here);  
     if (dlen > 0 && here[dlen - 1] == '\n') return start;  
     here += dlen;  
     }  
677    
678    else  /* Textual explanations for runtime error codes */
     {  
     int new_buffer_size = 2*buffer_size;  
     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);  
     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);  
     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);  
679    
680      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)  static const char *errtexts[] = {
681        {    NULL,  /* 0 is no error */
682        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);    NULL,  /* NOMATCH is handled specially */
683        exit(1);    "NULL argument passed",
684        }    "bad option value",
685      "magic number missing",
686      "unknown opcode - pattern overwritten?",
687      "no more memory",
688      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
689      "match limit exceeded",
690      "callout error code",
691      NULL,  /* BADUTF8/16 is handled specially */
692      NULL,  /* BADUTF8/16 offset is handled specially */
693      NULL,  /* PARTIAL is handled specially */
694      "not used - internal error",
695      "internal error - pattern overwritten?",
696      "bad count value",
697      "item unsupported for DFA matching",
698      "backreference condition or recursion test not supported for DFA matching",
699      "match limit not supported for DFA matching",
700      "workspace size exceeded in DFA matching",
701      "too much recursion for DFA matching",
702      "recursion limit exceeded",
703      "not used - internal error",
704      "invalid combination of newline options",
705      "bad offset value",
706      NULL,  /* SHORTUTF8/16 is handled specially */
707      "nested recursion at the same subject position",
708      "JIT stack limit reached",
709      "pattern compiled in wrong mode: 8-bit/16-bit error"
710    };
711    
     memcpy(new_buffer, buffer, buffer_size);  
     memcpy(new_pbuffer, pbuffer, buffer_size);  
712    
713      buffer_size = new_buffer_size;  /*************************************************
714    *         Alternate character tables             *
715    *************************************************/
716    
717      start = new_buffer + (start - buffer);  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
718      here = new_buffer + (here - buffer);  using the default tables of the library. However, the T option can be used to
719    select alternate sets of tables, for different kinds of testing. Note also that
720    the L (locale) option also adjusts the tables. */
721    
722    /* This is the set of tables distributed as default with PCRE. It recognizes
723    only ASCII characters. */
724    
725    static const pcre_uint8 tables0[] = {
726    
727    /* This table is a lower casing table. */
728    
729        0,  1,  2,  3,  4,  5,  6,  7,
730        8,  9, 10, 11, 12, 13, 14, 15,
731       16, 17, 18, 19, 20, 21, 22, 23,
732       24, 25, 26, 27, 28, 29, 30, 31,
733       32, 33, 34, 35, 36, 37, 38, 39,
734       40, 41, 42, 43, 44, 45, 46, 47,
735       48, 49, 50, 51, 52, 53, 54, 55,
736       56, 57, 58, 59, 60, 61, 62, 63,
737       64, 97, 98, 99,100,101,102,103,
738      104,105,106,107,108,109,110,111,
739      112,113,114,115,116,117,118,119,
740      120,121,122, 91, 92, 93, 94, 95,
741       96, 97, 98, 99,100,101,102,103,
742      104,105,106,107,108,109,110,111,
743      112,113,114,115,116,117,118,119,
744      120,121,122,123,124,125,126,127,
745      128,129,130,131,132,133,134,135,
746      136,137,138,139,140,141,142,143,
747      144,145,146,147,148,149,150,151,
748      152,153,154,155,156,157,158,159,
749      160,161,162,163,164,165,166,167,
750      168,169,170,171,172,173,174,175,
751      176,177,178,179,180,181,182,183,
752      184,185,186,187,188,189,190,191,
753      192,193,194,195,196,197,198,199,
754      200,201,202,203,204,205,206,207,
755      208,209,210,211,212,213,214,215,
756      216,217,218,219,220,221,222,223,
757      224,225,226,227,228,229,230,231,
758      232,233,234,235,236,237,238,239,
759      240,241,242,243,244,245,246,247,
760      248,249,250,251,252,253,254,255,
761    
762    /* This table is a case flipping table. */
763    
764        0,  1,  2,  3,  4,  5,  6,  7,
765        8,  9, 10, 11, 12, 13, 14, 15,
766       16, 17, 18, 19, 20, 21, 22, 23,
767       24, 25, 26, 27, 28, 29, 30, 31,
768       32, 33, 34, 35, 36, 37, 38, 39,
769       40, 41, 42, 43, 44, 45, 46, 47,
770       48, 49, 50, 51, 52, 53, 54, 55,
771       56, 57, 58, 59, 60, 61, 62, 63,
772       64, 97, 98, 99,100,101,102,103,
773      104,105,106,107,108,109,110,111,
774      112,113,114,115,116,117,118,119,
775      120,121,122, 91, 92, 93, 94, 95,
776       96, 65, 66, 67, 68, 69, 70, 71,
777       72, 73, 74, 75, 76, 77, 78, 79,
778       80, 81, 82, 83, 84, 85, 86, 87,
779       88, 89, 90,123,124,125,126,127,
780      128,129,130,131,132,133,134,135,
781      136,137,138,139,140,141,142,143,
782      144,145,146,147,148,149,150,151,
783      152,153,154,155,156,157,158,159,
784      160,161,162,163,164,165,166,167,
785      168,169,170,171,172,173,174,175,
786      176,177,178,179,180,181,182,183,
787      184,185,186,187,188,189,190,191,
788      192,193,194,195,196,197,198,199,
789      200,201,202,203,204,205,206,207,
790      208,209,210,211,212,213,214,215,
791      216,217,218,219,220,221,222,223,
792      224,225,226,227,228,229,230,231,
793      232,233,234,235,236,237,238,239,
794      240,241,242,243,244,245,246,247,
795      248,249,250,251,252,253,254,255,
796    
797    /* This table contains bit maps for various character classes. Each map is 32
798    bytes long and the bits run from the least significant end of each byte. The
799    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
800    graph, print, punct, and cntrl. Other classes are built from combinations. */
801    
802      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
803      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
806    
807      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
808      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
809      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
811    
812      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
813      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
816    
817      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
819      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
821    
822      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
824      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826    
827      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
828      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
829      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831    
832      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
833      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
834      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836    
837      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
838      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
839      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841    
842      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
843      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
844      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846    
847      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
849      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851    
852    /* This table identifies various classes of character by individual bits:
853      0x01   white space character
854      0x02   letter
855      0x04   decimal digit
856      0x08   hexadecimal digit
857      0x10   alphanumeric or '_'
858      0x80   regular expression metacharacter or binary zero
859    */
860    
861      free(buffer);    0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
862      free(dbuffer);    0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
863      free(pbuffer);    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
864      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
865      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
866      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
867      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
868      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
869      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
870      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
871      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
872      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
873      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
874      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
875      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
876      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
877      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
878      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
879      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
880      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
881      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
882      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
883      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
884      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
885      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
886      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
887      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
888      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
889      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
890      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
891      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
892      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
893    
894    /* This is a set of tables that came orginally from a Windows user. It seems to
895    be at least an approximation of ISO 8859. In particular, there are characters
896    greater than 128 that are marked as spaces, letters, etc. */
897    
898    static const pcre_uint8 tables1[] = {
899    0,1,2,3,4,5,6,7,
900    8,9,10,11,12,13,14,15,
901    16,17,18,19,20,21,22,23,
902    24,25,26,27,28,29,30,31,
903    32,33,34,35,36,37,38,39,
904    40,41,42,43,44,45,46,47,
905    48,49,50,51,52,53,54,55,
906    56,57,58,59,60,61,62,63,
907    64,97,98,99,100,101,102,103,
908    104,105,106,107,108,109,110,111,
909    112,113,114,115,116,117,118,119,
910    120,121,122,91,92,93,94,95,
911    96,97,98,99,100,101,102,103,
912    104,105,106,107,108,109,110,111,
913    112,113,114,115,116,117,118,119,
914    120,121,122,123,124,125,126,127,
915    128,129,130,131,132,133,134,135,
916    136,137,138,139,140,141,142,143,
917    144,145,146,147,148,149,150,151,
918    152,153,154,155,156,157,158,159,
919    160,161,162,163,164,165,166,167,
920    168,169,170,171,172,173,174,175,
921    176,177,178,179,180,181,182,183,
922    184,185,186,187,188,189,190,191,
923    224,225,226,227,228,229,230,231,
924    232,233,234,235,236,237,238,239,
925    240,241,242,243,244,245,246,215,
926    248,249,250,251,252,253,254,223,
927    224,225,226,227,228,229,230,231,
928    232,233,234,235,236,237,238,239,
929    240,241,242,243,244,245,246,247,
930    248,249,250,251,252,253,254,255,
931    0,1,2,3,4,5,6,7,
932    8,9,10,11,12,13,14,15,
933    16,17,18,19,20,21,22,23,
934    24,25,26,27,28,29,30,31,
935    32,33,34,35,36,37,38,39,
936    40,41,42,43,44,45,46,47,
937    48,49,50,51,52,53,54,55,
938    56,57,58,59,60,61,62,63,
939    64,97,98,99,100,101,102,103,
940    104,105,106,107,108,109,110,111,
941    112,113,114,115,116,117,118,119,
942    120,121,122,91,92,93,94,95,
943    96,65,66,67,68,69,70,71,
944    72,73,74,75,76,77,78,79,
945    80,81,82,83,84,85,86,87,
946    88,89,90,123,124,125,126,127,
947    128,129,130,131,132,133,134,135,
948    136,137,138,139,140,141,142,143,
949    144,145,146,147,148,149,150,151,
950    152,153,154,155,156,157,158,159,
951    160,161,162,163,164,165,166,167,
952    168,169,170,171,172,173,174,175,
953    176,177,178,179,180,181,182,183,
954    184,185,186,187,188,189,190,191,
955    224,225,226,227,228,229,230,231,
956    232,233,234,235,236,237,238,239,
957    240,241,242,243,244,245,246,215,
958    248,249,250,251,252,253,254,223,
959    192,193,194,195,196,197,198,199,
960    200,201,202,203,204,205,206,207,
961    208,209,210,211,212,213,214,247,
962    216,217,218,219,220,221,222,255,
963    0,62,0,0,1,0,0,0,
964    0,0,0,0,0,0,0,0,
965    32,0,0,0,1,0,0,0,
966    0,0,0,0,0,0,0,0,
967    0,0,0,0,0,0,255,3,
968    126,0,0,0,126,0,0,0,
969    0,0,0,0,0,0,0,0,
970    0,0,0,0,0,0,0,0,
971    0,0,0,0,0,0,255,3,
972    0,0,0,0,0,0,0,0,
973    0,0,0,0,0,0,12,2,
974    0,0,0,0,0,0,0,0,
975    0,0,0,0,0,0,0,0,
976    254,255,255,7,0,0,0,0,
977    0,0,0,0,0,0,0,0,
978    255,255,127,127,0,0,0,0,
979    0,0,0,0,0,0,0,0,
980    0,0,0,0,254,255,255,7,
981    0,0,0,0,0,4,32,4,
982    0,0,0,128,255,255,127,255,
983    0,0,0,0,0,0,255,3,
984    254,255,255,135,254,255,255,7,
985    0,0,0,0,0,4,44,6,
986    255,255,127,255,255,255,127,255,
987    0,0,0,0,254,255,255,255,
988    255,255,255,255,255,255,255,127,
989    0,0,0,0,254,255,255,255,
990    255,255,255,255,255,255,255,255,
991    0,2,0,0,255,255,255,255,
992    255,255,255,255,255,255,255,127,
993    0,0,0,0,255,255,255,255,
994    255,255,255,255,255,255,255,255,
995    0,0,0,0,254,255,0,252,
996    1,0,0,248,1,0,0,120,
997    0,0,0,0,254,255,255,255,
998    0,0,128,0,0,0,128,0,
999    255,255,255,255,0,0,0,0,
1000    0,0,0,0,0,0,0,128,
1001    255,255,255,255,0,0,0,0,
1002    0,0,0,0,0,0,0,0,
1003    128,0,0,0,0,0,0,0,
1004    0,1,1,0,1,1,0,0,
1005    0,0,0,0,0,0,0,0,
1006    0,0,0,0,0,0,0,0,
1007    1,0,0,0,128,0,0,0,
1008    128,128,128,128,0,0,128,0,
1009    28,28,28,28,28,28,28,28,
1010    28,28,0,0,0,0,0,128,
1011    0,26,26,26,26,26,26,18,
1012    18,18,18,18,18,18,18,18,
1013    18,18,18,18,18,18,18,18,
1014    18,18,18,128,128,0,128,16,
1015    0,26,26,26,26,26,26,18,
1016    18,18,18,18,18,18,18,18,
1017    18,18,18,18,18,18,18,18,
1018    18,18,18,128,128,0,0,0,
1019    0,0,0,0,0,1,0,0,
1020    0,0,0,0,0,0,0,0,
1021    0,0,0,0,0,0,0,0,
1022    0,0,0,0,0,0,0,0,
1023    1,0,0,0,0,0,0,0,
1024    0,0,18,0,0,0,0,0,
1025    0,0,20,20,0,18,0,0,
1026    0,20,18,0,0,0,0,0,
1027    18,18,18,18,18,18,18,18,
1028    18,18,18,18,18,18,18,18,
1029    18,18,18,18,18,18,18,0,
1030    18,18,18,18,18,18,18,18,
1031    18,18,18,18,18,18,18,18,
1032    18,18,18,18,18,18,18,18,
1033    18,18,18,18,18,18,18,0,
1034    18,18,18,18,18,18,18,18
1035    };
1036    
     buffer = new_buffer;  
     dbuffer = new_dbuffer;  
     pbuffer = new_pbuffer;  
     }  
   }  
1037    
 return NULL;  /* Control never gets here */  
 }  
1038    
1039    
1040    #ifndef HAVE_STRERROR
1041    /*************************************************
1042    *     Provide strerror() for non-ANSI libraries  *
1043    *************************************************/
1044    
1045    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1046    in their libraries, but can provide the same facility by this simple
1047    alternative function. */
1048    
1049    extern int   sys_nerr;
1050    extern char *sys_errlist[];
1051    
1052    char *
1053    strerror(int n)
1054    {
1055    if (n < 0 || n >= sys_nerr) return "unknown error number";
1056    return sys_errlist[n];
1057    }
1058    #endif /* HAVE_STRERROR */
1059    
1060    
1061  /*************************************************  /*************************************************
1062  *          Read number from string               *  *         JIT memory callback                    *
1063  *************************************************/  *************************************************/
1064    
1065  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  static pcre_jit_stack* jit_callback(void *arg)
 around with conditional compilation, just do the job by hand. It is only used  
 for unpicking arguments, so just keep it simple.  
   
 Arguments:  
   str           string to be converted  
   endptr        where to put the end pointer  
   
 Returns:        the unsigned long  
 */  
   
 static int  
 get_value(unsigned char *str, unsigned char **endptr)  
1066  {  {
1067  int result = 0;  return (pcre_jit_stack *)arg;
 while(*str != 0 && isspace(*str)) str++;  
 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');  
 *endptr = str;  
 return(result);  
1068  }  }
1069    
1070    
1071    #if !defined NOUTF || defined SUPPORT_PCRE16
   
1072  /*************************************************  /*************************************************
1073  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
1074  *************************************************/  *************************************************/
# Line 296  Returns:      >  0 => the number of byte Line 1084  Returns:      >  0 => the number of byte
1084                -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
1085  */  */
1086    
 #if !defined NOUTF8  
   
1087  static int  static int
1088  utf82ord(unsigned char *utf8bytes, int *vptr)  utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1089  {  {
1090  int c = *utf8bytes++;  int c = *utf8bytes++;
1091  int d = c;  int d = c;
# Line 338  if (j != i) return -(i+1); Line 1124  if (j != i) return -(i+1);
1124  *vptr = d;  *vptr = d;
1125  return i+1;  return i+1;
1126  }  }
1127    #endif /* NOUTF || SUPPORT_PCRE16 */
 #endif  
1128    
1129    
1130    
1131    #if !defined NOUTF || defined SUPPORT_PCRE16
1132  /*************************************************  /*************************************************
1133  *       Convert character value to UTF-8         *  *       Convert character value to UTF-8         *
1134  *************************************************/  *************************************************/
# Line 357  Arguments: Line 1143  Arguments:
1143  Returns:     number of characters placed in the buffer  Returns:     number of characters placed in the buffer
1144  */  */
1145    
 #if !defined NOUTF8  
   
1146  static int  static int
1147  ord2utf8(int cvalue, uschar *utf8bytes)  ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1148  {  {
1149  register int i, j;  register int i, j;
1150  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
# Line 374  for (j = i; j > 0; j--) Line 1158  for (j = i; j > 0; j--)
1158  *utf8bytes = utf8_table2[i] | cvalue;  *utf8bytes = utf8_table2[i] | cvalue;
1159  return i + 1;  return i + 1;
1160  }  }
1161    #endif
1162    
1163    
1164    #ifdef SUPPORT_PCRE16
1165    /*************************************************
1166    *         Convert a string to 16-bit             *
1167    *************************************************/
1168    
1169    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1170    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1171    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1172    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1173    result is always left in buffer16.
1174    
1175    Note that this function does not object to surrogate values. This is
1176    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1177    for the purpose of testing that they are correctly faulted.
1178    
1179    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1180    in UTF-8 so that values greater than 255 can be handled.
1181    
1182    Arguments:
1183      data       TRUE if converting a data line; FALSE for a regex
1184      p          points to a byte string
1185      utf        true if UTF-8 (to be converted to UTF-16)
1186      len        number of bytes in the string (excluding trailing zero)
1187    
1188    Returns:     number of 16-bit data items used (excluding trailing zero)
1189                 OR -1 if a UTF-8 string is malformed
1190                 OR -2 if a value > 0x10ffff is encountered
1191                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1192    */
1193    
1194    static int
1195    to16(int data, pcre_uint8 *p, int utf, int len)
1196    {
1197    pcre_uint16 *pp;
1198    
1199    if (buffer16_size < 2*len + 2)
1200      {
1201      if (buffer16 != NULL) free(buffer16);
1202      buffer16_size = 2*len + 2;
1203      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1204      if (buffer16 == NULL)
1205        {
1206        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1207        exit(1);
1208        }
1209      }
1210    
1211    pp = buffer16;
1212    
1213    if (!utf && !data)
1214      {
1215      while (len-- > 0) *pp++ = *p++;
1216      }
1217    
1218    else
1219      {
1220      int c = 0;
1221      while (len > 0)
1222        {
1223        int chlen = utf82ord(p, &c);
1224        if (chlen <= 0) return -1;
1225        if (c > 0x10ffff) return -2;
1226        p += chlen;
1227        len -= chlen;
1228        if (c < 0x10000) *pp++ = c; else
1229          {
1230          if (!utf) return -3;
1231          c -= 0x10000;
1232          *pp++ = 0xD800 | (c >> 10);
1233          *pp++ = 0xDC00 | (c & 0x3ff);
1234          }
1235        }
1236      }
1237    
1238    *pp = 0;
1239    return pp - buffer16;
1240    }
1241    #endif
1242    
1243    
1244    /*************************************************
1245    *        Read or extend an input line            *
1246    *************************************************/
1247    
1248    /* Input lines are read into buffer, but both patterns and data lines can be
1249    continued over multiple input lines. In addition, if the buffer fills up, we
1250    want to automatically expand it so as to be able to handle extremely large
1251    lines that are needed for certain stress tests. When the input buffer is
1252    expanded, the other two buffers must also be expanded likewise, and the
1253    contents of pbuffer, which are a copy of the input for callouts, must be
1254    preserved (for when expansion happens for a data line). This is not the most
1255    optimal way of handling this, but hey, this is just a test program!
1256    
1257    Arguments:
1258      f            the file to read
1259      start        where in buffer to start (this *must* be within buffer)
1260      prompt       for stdin or readline()
1261    
1262    Returns:       pointer to the start of new data
1263                   could be a copy of start, or could be moved
1264                   NULL if no data read and EOF reached
1265    */
1266    
1267    static pcre_uint8 *
1268    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1269    {
1270    pcre_uint8 *here = start;
1271    
1272    for (;;)
1273      {
1274      int rlen = (int)(buffer_size - (here - buffer));
1275    
1276      if (rlen > 1000)
1277        {
1278        int dlen;
1279    
1280        /* If libreadline support is required, use readline() to read a line if the
1281        input is a terminal. Note that readline() removes the trailing newline, so
1282        we must put it back again, to be compatible with fgets(). */
1283    
1284    #ifdef SUPPORT_LIBREADLINE
1285        if (isatty(fileno(f)))
1286          {
1287          size_t len;
1288          char *s = readline(prompt);
1289          if (s == NULL) return (here == start)? NULL : start;
1290          len = strlen(s);
1291          if (len > 0) add_history(s);
1292          if (len > rlen - 1) len = rlen - 1;
1293          memcpy(here, s, len);
1294          here[len] = '\n';
1295          here[len+1] = 0;
1296          free(s);
1297          }
1298        else
1299  #endif  #endif
1300    
1301        /* Read the next line by normal means, prompting if the file is stdin. */
1302    
1303          {
1304          if (f == stdin) printf("%s", prompt);
1305          if (fgets((char *)here, rlen,  f) == NULL)
1306            return (here == start)? NULL : start;
1307          }
1308    
1309        dlen = (int)strlen((char *)here);
1310        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1311        here += dlen;
1312        }
1313    
1314      else
1315        {
1316        int new_buffer_size = 2*buffer_size;
1317        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1318        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1320    
1321        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1322          {
1323          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1324          exit(1);
1325          }
1326    
1327        memcpy(new_buffer, buffer, buffer_size);
1328        memcpy(new_pbuffer, pbuffer, buffer_size);
1329    
1330        buffer_size = new_buffer_size;
1331    
1332        start = new_buffer + (start - buffer);
1333        here = new_buffer + (here - buffer);
1334    
1335        free(buffer);
1336        free(dbuffer);
1337        free(pbuffer);
1338    
1339        buffer = new_buffer;
1340        dbuffer = new_dbuffer;
1341        pbuffer = new_pbuffer;
1342        }
1343      }
1344    
1345    return NULL;  /* Control never gets here */
1346    }
1347    
1348    
1349    
1350  /*************************************************  /*************************************************
1351  *             Print character string             *  *          Read number from string               *
1352  *************************************************/  *************************************************/
1353    
1354  /* Character string printing function. Must handle UTF-8 strings in utf8  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1355  mode. Yields number of characters printed. If handed a NULL file, just counts  around with conditional compilation, just do the job by hand. It is only used
1356  chars without printing. */  for unpicking arguments, so just keep it simple.
1357    
1358    Arguments:
1359      str           string to be converted
1360      endptr        where to put the end pointer
1361    
1362  static int pchars(unsigned char *p, int length, FILE *f)  Returns:        the unsigned long
1363    */
1364    
1365    static int
1366    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1367    {
1368    int result = 0;
1369    while(*str != 0 && isspace(*str)) str++;
1370    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1371    *endptr = str;
1372    return(result);
1373    }
1374    
1375    
1376    
1377    /*************************************************
1378    *             Print one character                *
1379    *************************************************/
1380    
1381    /* Print a single character either literally, or as a hex escape. */
1382    
1383    static int pchar(int c, FILE *f)
1384    {
1385    if (PRINTOK(c))
1386      {
1387      if (f != NULL) fprintf(f, "%c", c);
1388      return 1;
1389      }
1390    
1391    if (c < 0x100)
1392      {
1393      if (use_utf)
1394        {
1395        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1396        return 6;
1397        }
1398      else
1399        {
1400        if (f != NULL) fprintf(f, "\\x%02x", c);
1401        return 4;
1402        }
1403      }
1404    
1405    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1406    return (c <= 0x000000ff)? 6 :
1407           (c <= 0x00000fff)? 7 :
1408           (c <= 0x0000ffff)? 8 :
1409           (c <= 0x000fffff)? 9 : 10;
1410    }
1411    
1412    
1413    
1414    #ifdef SUPPORT_PCRE8
1415    /*************************************************
1416    *         Print 8-bit character string           *
1417    *************************************************/
1418    
1419    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1420    If handed a NULL file, just counts chars without printing. */
1421    
1422    static int pchars(pcre_uint8 *p, int length, FILE *f)
1423  {  {
1424  int c = 0;  int c = 0;
1425  int yield = 0;  int yield = 0;
1426    
1427    if (length < 0)
1428      length = strlen((char *)p);
1429    
1430  while (length-- > 0)  while (length-- > 0)
1431    {    {
1432  #if !defined NOUTF8  #if !defined NOUTF
1433    if (use_utf8)    if (use_utf)
1434      {      {
1435      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
   
1436      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1437        {        {
1438        length -= rc - 1;        length -= rc - 1;
1439        p += rc;        p += rc;
1440        if (PRINTHEX(c))        yield += pchar(c, f);
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
1441        continue;        continue;
1442        }        }
1443      }      }
1444  #endif  #endif
1445      c = *p++;
1446      yield += pchar(c, f);
1447      }
1448    
1449    return yield;
1450    }
1451    #endif
1452    
    /* Not UTF-8, or malformed UTF-8  */  
1453    
1454    c = *p++;  
1455    if (PRINTHEX(c))  #ifdef SUPPORT_PCRE16
1456      {  /*************************************************
1457      if (f != NULL) fprintf(f, "%c", c);  *    Find length of 0-terminated 16-bit string   *
1458      yield++;  *************************************************/
1459      }  
1460    else  static int strlen16(PCRE_SPTR16 p)
1461    {
1462    int len = 0;
1463    while (*p++ != 0) len++;
1464    return len;
1465    }
1466    #endif  /* SUPPORT_PCRE16 */
1467    
1468    
1469    #ifdef SUPPORT_PCRE16
1470    /*************************************************
1471    *           Print 16-bit character string        *
1472    *************************************************/
1473    
1474    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1475    If handed a NULL file, just counts chars without printing. */
1476    
1477    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1478    {
1479    int yield = 0;
1480    
1481    if (length < 0)
1482      length = strlen16(p);
1483    
1484    while (length-- > 0)
1485      {
1486      int c = *p++ & 0xffff;
1487    #if !defined NOUTF
1488      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1489      {      {
1490      if (f != NULL) fprintf(f, "\\x%02x", c);      int d = *p & 0xffff;
1491      yield += 4;      if (d >= 0xDC00 && d < 0xDFFF)
1492          {
1493          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1494          length--;
1495          p++;
1496          }
1497      }      }
1498    #endif
1499      yield += pchar(c, f);
1500    }    }
1501    
1502  return yield;  return yield;
1503  }  }
1504    #endif  /* SUPPORT_PCRE16 */
1505    
1506    
1507    
1508    #ifdef SUPPORT_PCRE8
1509    /*************************************************
1510    *     Read a capture name (8-bit) and check it   *
1511    *************************************************/
1512    
1513    static pcre_uint8 *
1514    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1515    {
1516    pcre_uint8 *npp = *pp;
1517    while (isalnum(*p)) *npp++ = *p++;
1518    *npp++ = 0;
1519    *npp = 0;
1520    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1521      {
1522      fprintf(outfile, "no parentheses with name \"");
1523      PCHARSV(*pp, 0, -1, outfile);
1524      fprintf(outfile, "\"\n");
1525      }
1526    
1527    *pp = npp;
1528    return p;
1529    }
1530    #endif  /* SUPPORT_PCRE8 */
1531    
1532    
1533    
1534    #ifdef SUPPORT_PCRE16
1535    /*************************************************
1536    *     Read a capture name (16-bit) and check it  *
1537    *************************************************/
1538    
1539    /* Note that the text being read is 8-bit. */
1540    
1541    static pcre_uint8 *
1542    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1543    {
1544    pcre_uint16 *npp = *pp;
1545    while (isalnum(*p)) *npp++ = *p++;
1546    *npp++ = 0;
1547    *npp = 0;
1548    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1549      {
1550      fprintf(outfile, "no parentheses with name \"");
1551      PCHARSV(*pp, 0, -1, outfile);
1552      fprintf(outfile, "\"\n");
1553      }
1554    *pp = npp;
1555    return p;
1556    }
1557    #endif  /* SUPPORT_PCRE16 */
1558    
1559    
1560    
# Line 467  if (callout_extra) Line 1583  if (callout_extra)
1583      else      else
1584        {        {
1585        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1586        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1587          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1588        fprintf(f, "\n");        fprintf(f, "\n");
1589        }        }
# Line 480  printed lengths of the substrings. */ Line 1596  printed lengths of the substrings. */
1596    
1597  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1598    
1599  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1600  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1601    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1602    
1603  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1604    
1605  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1606    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1607    
1608  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 523  fprintf(outfile, "%.*s", (cb->next_item_ Line 1639  fprintf(outfile, "%.*s", (cb->next_item_
1639  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1640  first_callout = 0;  first_callout = 0;
1641    
1642    if (cb->mark != last_callout_mark)
1643      {
1644      if (cb->mark == NULL)
1645        fprintf(outfile, "Latest Mark: <unset>\n");
1646      else
1647        {
1648        fprintf(outfile, "Latest Mark: ");
1649        PCHARSV(cb->mark, 0, -1, outfile);
1650        putc('\n', outfile);
1651        }
1652      last_callout_mark = cb->mark;
1653      }
1654    
1655  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1656    {    {
1657    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 542  return (cb->callout_number != callout_fa Line 1671  return (cb->callout_number != callout_fa
1671  *            Local malloc functions              *  *            Local malloc functions              *
1672  *************************************************/  *************************************************/
1673    
1674  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1675  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1676    show_malloc variable is set only during matching. */
1677    
1678  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1679  {  {
1680  void *block = malloc(size);  void *block = malloc(size);
1681  gotten_store = size;  gotten_store = size;
1682    if (first_gotten_store == 0) first_gotten_store = size;
1683  if (show_malloc)  if (show_malloc)
1684    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1685  return block;  return block;
# Line 561  if (show_malloc) Line 1692  if (show_malloc)
1692  free(block);  free(block);
1693  }  }
1694    
   
1695  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1696    
1697  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 584  free(block); Line 1714  free(block);
1714  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1715  *************************************************/  *************************************************/
1716    
1717  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1718    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1719    value, but the code is defensive.
1720    
1721    Arguments:
1722      re        compiled regex
1723      study     study data
1724      option    PCRE_INFO_xxx option
1725      ptr       where to put the data
1726    
1727  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  Returns:    0 when OK, < 0 on error
1728    */
1729    
1730    static int
1731    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1732  {  {
1733  int rc;  int rc;
1734  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1735    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1736    #ifdef SUPPORT_PCRE16
1737      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1738    #else
1739      rc = PCRE_ERROR_BADMODE;
1740    #endif
1741    else
1742    #ifdef SUPPORT_PCRE8
1743      rc = pcre_fullinfo(re, study, option, ptr);
1744    #else
1745      rc = PCRE_ERROR_BADMODE;
1746    #endif
1747    
1748    if (rc < 0)
1749      {
1750      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1751        use_pcre16? "16" : "", option);
1752      if (rc == PCRE_ERROR_BADMODE)
1753        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1754          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1755      }
1756    
1757    return rc;
1758  }  }
1759    
1760    
1761    
1762  /*************************************************  /*************************************************
1763  *         Byte flipping function                 *  *             Swap byte functions                *
1764  *************************************************/  *************************************************/
1765    
1766  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1767  byteflip(unsigned long int value, int n)  value, respectively.
1768    
1769    Arguments:
1770      value        any number
1771    
1772    Returns:       the byte swapped value
1773    */
1774    
1775    static pcre_uint32
1776    swap_uint32(pcre_uint32 value)
1777  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1778  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1779         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1780         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1781         ((value & 0xff000000) >> 24);         (value >> 24);
1782  }  }
1783    
1784    static pcre_uint16
1785    swap_uint16(pcre_uint16 value)
1786    {
1787    return (value >> 8) | (value << 8);
1788    }
1789    
1790    
1791    
1792    /*************************************************
1793    *        Flip bytes in a compiled pattern        *
1794    *************************************************/
1795    
1796    /* This function is called if the 'F' option was present on a pattern that is
1797    to be written to a file. We flip the bytes of all the integer fields in the
1798    regex data block and the study block. In 16-bit mode this also flips relevant
1799    bytes in the pattern itself. This is to make it possible to test PCRE's
1800    ability to reload byte-flipped patterns, e.g. those compiled on a different
1801    architecture. */
1802    
1803    static void
1804    regexflip(pcre *ere, pcre_extra *extra)
1805    {
1806    REAL_PCRE *re = (REAL_PCRE *)ere;
1807    #ifdef SUPPORT_PCRE16
1808    int op;
1809    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1810    int length = re->name_count * re->name_entry_size;
1811    #ifdef SUPPORT_UTF
1812    BOOL utf = (re->options & PCRE_UTF16) != 0;
1813    BOOL utf16_char = FALSE;
1814    #endif /* SUPPORT_UTF */
1815    #endif /* SUPPORT_PCRE16 */
1816    
1817    /* Always flip the bytes in the main data block and study blocks. */
1818    
1819    re->magic_number = REVERSED_MAGIC_NUMBER;
1820    re->size = swap_uint32(re->size);
1821    re->options = swap_uint32(re->options);
1822    re->flags = swap_uint16(re->flags);
1823    re->top_bracket = swap_uint16(re->top_bracket);
1824    re->top_backref = swap_uint16(re->top_backref);
1825    re->first_char = swap_uint16(re->first_char);
1826    re->req_char = swap_uint16(re->req_char);
1827    re->name_table_offset = swap_uint16(re->name_table_offset);
1828    re->name_entry_size = swap_uint16(re->name_entry_size);
1829    re->name_count = swap_uint16(re->name_count);
1830    
1831    if (extra != NULL)
1832      {
1833      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1834      rsd->size = swap_uint32(rsd->size);
1835      rsd->flags = swap_uint32(rsd->flags);
1836      rsd->minlength = swap_uint32(rsd->minlength);
1837      }
1838    
1839    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1840    in the name table, if present, and then in the pattern itself. */
1841    
1842    #ifdef SUPPORT_PCRE16
1843    if (!use_pcre16) return;
1844    
1845    while(TRUE)
1846      {
1847      /* Swap previous characters. */
1848      while (length-- > 0)
1849        {
1850        *ptr = swap_uint16(*ptr);
1851        ptr++;
1852        }
1853    #ifdef SUPPORT_UTF
1854      if (utf16_char)
1855        {
1856        if ((ptr[-1] & 0xfc00) == 0xd800)
1857          {
1858          /* We know that there is only one extra character in UTF-16. */
1859          *ptr = swap_uint16(*ptr);
1860          ptr++;
1861          }
1862        }
1863      utf16_char = FALSE;
1864    #endif /* SUPPORT_UTF */
1865    
1866      /* Get next opcode. */
1867    
1868      length = 0;
1869      op = *ptr;
1870      *ptr++ = swap_uint16(op);
1871    
1872      switch (op)
1873        {
1874        case OP_END:
1875        return;
1876    
1877    #ifdef SUPPORT_UTF
1878        case OP_CHAR:
1879        case OP_CHARI:
1880        case OP_NOT:
1881        case OP_NOTI:
1882        case OP_STAR:
1883        case OP_MINSTAR:
1884        case OP_PLUS:
1885        case OP_MINPLUS:
1886        case OP_QUERY:
1887        case OP_MINQUERY:
1888        case OP_UPTO:
1889        case OP_MINUPTO:
1890        case OP_EXACT:
1891        case OP_POSSTAR:
1892        case OP_POSPLUS:
1893        case OP_POSQUERY:
1894        case OP_POSUPTO:
1895        case OP_STARI:
1896        case OP_MINSTARI:
1897        case OP_PLUSI:
1898        case OP_MINPLUSI:
1899        case OP_QUERYI:
1900        case OP_MINQUERYI:
1901        case OP_UPTOI:
1902        case OP_MINUPTOI:
1903        case OP_EXACTI:
1904        case OP_POSSTARI:
1905        case OP_POSPLUSI:
1906        case OP_POSQUERYI:
1907        case OP_POSUPTOI:
1908        case OP_NOTSTAR:
1909        case OP_NOTMINSTAR:
1910        case OP_NOTPLUS:
1911        case OP_NOTMINPLUS:
1912        case OP_NOTQUERY:
1913        case OP_NOTMINQUERY:
1914        case OP_NOTUPTO:
1915        case OP_NOTMINUPTO:
1916        case OP_NOTEXACT:
1917        case OP_NOTPOSSTAR:
1918        case OP_NOTPOSPLUS:
1919        case OP_NOTPOSQUERY:
1920        case OP_NOTPOSUPTO:
1921        case OP_NOTSTARI:
1922        case OP_NOTMINSTARI:
1923        case OP_NOTPLUSI:
1924        case OP_NOTMINPLUSI:
1925        case OP_NOTQUERYI:
1926        case OP_NOTMINQUERYI:
1927        case OP_NOTUPTOI:
1928        case OP_NOTMINUPTOI:
1929        case OP_NOTEXACTI:
1930        case OP_NOTPOSSTARI:
1931        case OP_NOTPOSPLUSI:
1932        case OP_NOTPOSQUERYI:
1933        case OP_NOTPOSUPTOI:
1934        if (utf) utf16_char = TRUE;
1935    #endif
1936        /* Fall through. */
1937    
1938        default:
1939        length = OP_lengths16[op] - 1;
1940        break;
1941    
1942        case OP_CLASS:
1943        case OP_NCLASS:
1944        /* Skip the character bit map. */
1945        ptr += 32/sizeof(pcre_uint16);
1946        length = 0;
1947        break;
1948    
1949        case OP_XCLASS:
1950        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1951        if (LINK_SIZE > 1)
1952          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1953            - (1 + LINK_SIZE + 1));
1954        else
1955          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1956    
1957        /* Reverse the size of the XCLASS instance. */
1958        *ptr = swap_uint16(*ptr);
1959        ptr++;
1960        if (LINK_SIZE > 1)
1961          {
1962          *ptr = swap_uint16(*ptr);
1963          ptr++;
1964          }
1965    
1966        op = *ptr;
1967        *ptr = swap_uint16(op);
1968        ptr++;
1969        if ((op & XCL_MAP) != 0)
1970          {
1971          /* Skip the character bit map. */
1972          ptr += 32/sizeof(pcre_uint16);
1973          length -= 32/sizeof(pcre_uint16);
1974          }
1975        break;
1976        }
1977      }
1978    /* Control should never reach here in 16 bit mode. */
1979    #endif /* SUPPORT_PCRE16 */
1980    }
1981    
1982    
1983    
# Line 617  return ((value & 0x000000ff) << 24) | Line 1986  return ((value & 0x000000ff) << 24) |
1986  *************************************************/  *************************************************/
1987    
1988  static int  static int
1989  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1990    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
1991    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
1992  {  {
# Line 632  for (;;) Line 2001  for (;;)
2001    {    {
2002    *limit = mid;    *limit = mid;
2003    
2004    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2005      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2006    
2007    if (count == errnumber)    if (count == errnumber)
# Line 677  Returns:    < 0, = 0, or > 0, according Line 2046  Returns:    < 0, = 0, or > 0, according
2046  */  */
2047    
2048  static int  static int
2049  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2050  {  {
2051  while (n--)  while (n--)
2052    {    {
# Line 693  return 0; Line 2062  return 0;
2062  *         Check newline indicator                *  *         Check newline indicator                *
2063  *************************************************/  *************************************************/
2064    
2065  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
2066  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
2067    
2068  Arguments:  Arguments:
2069    p           points after the leading '<'    p           points after the leading '<'
# Line 705  Returns:      appropriate PCRE_NEWLINE_x Line 2073  Returns:      appropriate PCRE_NEWLINE_x
2073  */  */
2074    
2075  static int  static int
2076  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2077  {  {
2078  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2079  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2080  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2081  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2082  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2083    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2084    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2085  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2086  return 0;  return 0;
2087  }  }
# Line 725  return 0; Line 2095  return 0;
2095  static void  static void
2096  usage(void)  usage(void)
2097  {  {
2098  printf("Usage:     pcretest [options] [<input> [<output>]]\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2099  printf("  -b       show compiled code (bytecode)\n");  printf("Input and output default to stdin and stdout.\n");
2100    #ifdef SUPPORT_LIBREADLINE
2101    printf("If input is a terminal, readline() is used to read from it.\n");
2102    #else
2103    printf("This version of pcretest is not linked with readline().\n");
2104    #endif
2105    printf("\nOptions:\n");
2106    #ifdef SUPPORT_PCRE16
2107    printf("  -16      use the 16-bit library\n");
2108    #endif
2109    printf("  -b       show compiled code\n");
2110  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2111    printf("  -C arg   show a specific compile-time option\n");
2112    printf("           and exit with its value. The arg can be:\n");
2113    printf("     linksize     internal link size [2, 3, 4]\n");
2114    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2115    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2116    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2117    printf("     ucp          Unicode Properties supported [0, 1]\n");
2118    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2119    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2120  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2121  #if !defined NODFA  #if !defined NODFA
2122  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
2123  #endif  #endif
2124  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
2125  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
2126           "  -M       find MATCH_LIMIT minimum for each subject\n"
2127         "  -m       output memory used information\n"         "  -m       output memory used information\n"
2128         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
2129  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 741  printf("  -p       use POSIX interface\n Line 2131  printf("  -p       use POSIX interface\n
2131  #endif  #endif
2132  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
2133  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2134  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
2135           "  -s+      force each pattern to be studied, using JIT if available\n"
2136         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2137  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2138  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 761  options, followed by a set of test data, Line 2152  options, followed by a set of test data,
2152  int main(int argc, char **argv)  int main(int argc, char **argv)
2153  {  {
2154  FILE *infile = stdin;  FILE *infile = stdin;
2155    const char *version;
2156  int options = 0;  int options = 0;
2157  int study_options = 0;  int study_options = 0;
2158    int default_find_match_limit = FALSE;
2159  int op = 1;  int op = 1;
2160  int timeit = 0;  int timeit = 0;
2161  int timeitm = 0;  int timeitm = 0;
2162  int showinfo = 0;  int showinfo = 0;
2163  int showstore = 0;  int showstore = 0;
2164    int force_study = -1;
2165    int force_study_options = 0;
2166  int quiet = 0;  int quiet = 0;
2167  int size_offsets = 45;  int size_offsets = 45;
2168  int size_offsets_max;  int size_offsets_max;
# Line 781  int all_use_dfa = 0; Line 2176  int all_use_dfa = 0;
2176  int yield = 0;  int yield = 0;
2177  int stack_size;  int stack_size;
2178    
2179  /* These vectors store, end-to-end, a list of captured substring names. Assume  pcre_jit_stack *jit_stack = NULL;
 that 1024 is plenty long enough for the few names we'll be testing. */  
   
 uschar copynames[1024];  
 uschar getnames[1024];  
2180    
2181  uschar *copynamesptr;  /* These vectors store, end-to-end, a list of zero-terminated captured
2182  uschar *getnamesptr;  substring names, each list itself being terminated by an empty name. Assume
2183    that 1024 is plenty long enough for the few names we'll be testing. It is
2184    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2185    for the actual memory, to ensure alignment. */
2186    
2187    pcre_uint16 copynames[1024];
2188    pcre_uint16 getnames[1024];
2189    
2190    #ifdef SUPPORT_PCRE16
2191    pcre_uint16 *cn16ptr;
2192    pcre_uint16 *gn16ptr;
2193    #endif
2194    
2195  /* Get buffers from malloc() so that Electric Fence will check their misuse  #ifdef SUPPORT_PCRE8
2196  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2197    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2198    pcre_uint8 *cn8ptr;
2199    pcre_uint8 *gn8ptr;
2200    #endif
2201    
2202  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2203  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
2204  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
2205    
2206    buffer = (pcre_uint8 *)malloc(buffer_size);
2207    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2208    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2209    
2210  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2211    
# Line 810  it set 0x8000, but then I was advised th Line 2220  it set 0x8000, but then I was advised th
2220  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2221  #endif  #endif
2222    
2223    /* Get the version number: both pcre_version() and pcre16_version() give the
2224    same answer. We just need to ensure that we call one that is available. */
2225    
2226    #ifdef SUPPORT_PCRE8
2227    version = pcre_version();
2228    #else
2229    version = pcre16_version();
2230    #endif
2231    
2232  /* Scan options */  /* Scan options */
2233    
2234  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2235    {    {
2236    unsigned char *endptr;    pcre_uint8 *endptr;
2237    
2238    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-m") == 0) showstore = 1;
2239      showstore = 1;    else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2240      else if (strcmp(argv[op], "-s+") == 0)
2241        {
2242        force_study = 1;
2243        force_study_options = PCRE_STUDY_JIT_COMPILE;
2244        }
2245      else if (strcmp(argv[op], "-16") == 0)
2246        {
2247    #ifdef SUPPORT_PCRE16
2248        use_pcre16 = 1;
2249    #else
2250        printf("** This version of PCRE was built without 16-bit support\n");
2251        exit(1);
2252    #endif
2253        }
2254    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2255    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
2256    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2257    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2258      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2259  #if !defined NODFA  #if !defined NODFA
2260    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2261  #endif  #endif
2262    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2263        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2264          *endptr == 0))          *endptr == 0))
2265      {      {
2266      op++;      op++;
# Line 836  while (argc > 1 && argv[op][0] == '-') Line 2270  while (argc > 1 && argv[op][0] == '-')
2270      {      {
2271      int both = argv[op][2] == 0;      int both = argv[op][2] == 0;
2272      int temp;      int temp;
2273      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2274                       *endptr == 0))                       *endptr == 0))
2275        {        {
2276        timeitm = temp;        timeitm = temp;
# Line 847  while (argc > 1 && argv[op][0] == '-') Line 2281  while (argc > 1 && argv[op][0] == '-')
2281      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2282      }      }
2283    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2284        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2285          *endptr == 0))          *endptr == 0))
2286      {      {
2287  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2288      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2289      exit(1);      exit(1);
2290  #else  #else
# Line 874  while (argc > 1 && argv[op][0] == '-') Line 2308  while (argc > 1 && argv[op][0] == '-')
2308    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
2309      {      {
2310      int rc;      int rc;
2311      printf("PCRE version %s\n", pcre_version());      unsigned long int lrc;
2312    
2313        if (argc > 2)
2314          {
2315          if (strcmp(argv[op + 1], "linksize") == 0)
2316            {
2317            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2318            printf("%d\n", rc);
2319            yield = rc;
2320            goto EXIT;
2321            }
2322          if (strcmp(argv[op + 1], "pcre8") == 0)
2323            {
2324    #ifdef SUPPORT_PCRE8
2325            printf("1\n");
2326            yield = 1;
2327    #else
2328            printf("0\n");
2329            yield = 0;
2330    #endif
2331            goto EXIT;
2332            }
2333          if (strcmp(argv[op + 1], "pcre16") == 0)
2334            {
2335    #ifdef SUPPORT_PCRE16
2336            printf("1\n");
2337            yield = 1;
2338    #else
2339            printf("0\n");
2340            yield = 0;
2341    #endif
2342            goto EXIT;
2343            }
2344          if (strcmp(argv[op + 1], "utf") == 0)
2345            {
2346    #ifdef SUPPORT_PCRE8
2347            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2348            printf("%d\n", rc);
2349            yield = rc;
2350    #else
2351            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2352            printf("%d\n", rc);
2353            yield = rc;
2354    #endif
2355            goto EXIT;
2356            }
2357          if (strcmp(argv[op + 1], "ucp") == 0)
2358            {
2359            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2360            printf("%d\n", rc);
2361            yield = rc;
2362            goto EXIT;
2363            }
2364          if (strcmp(argv[op + 1], "jit") == 0)
2365            {
2366            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2367            printf("%d\n", rc);
2368            yield = rc;
2369            goto EXIT;
2370            }
2371          if (strcmp(argv[op + 1], "newline") == 0)
2372            {
2373            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2374            /* Note that these values are always the ASCII values, even
2375            in EBCDIC environments. CR is 13 and NL is 10. */
2376            printf("%s\n", (rc == 13)? "CR" :
2377              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2378              (rc == -2)? "ANYCRLF" :
2379              (rc == -1)? "ANY" : "???");
2380            goto EXIT;
2381            }
2382          printf("Unknown -C option: %s\n", argv[op + 1]);
2383          goto EXIT;
2384          }
2385    
2386        printf("PCRE version %s\n", version);
2387      printf("Compiled with\n");      printf("Compiled with\n");
2388    
2389    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2390    are set, either both UTFs are supported or both are not supported. */
2391    
2392    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2393        printf("  8-bit and 16-bit support\n");
2394        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2395        if (rc)
2396          printf("  UTF-8 and UTF-16 support\n");
2397        else
2398          printf("  No UTF-8 or UTF-16 support\n");
2399    #elif defined SUPPORT_PCRE8
2400        printf("  8-bit support only\n");
2401      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2402      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2403      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2404        printf("  16-bit support only\n");
2405        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2406        printf("  %sUTF-16 support\n", rc? "" : "No ");
2407    #endif
2408    
2409        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2410      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2411      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2412      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      if (rc)
2413        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        {
2414          const char *arch;
2415          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, &arch);
2416          printf("  Just-in-time compiler support: %s\n", arch);
2417          }
2418        else
2419          printf("  No just-in-time compiler support\n");
2420        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2421        /* Note that these values are always the ASCII values, even
2422        in EBCDIC environments. CR is 13 and NL is 10. */
2423        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2424          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2425        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
2426        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2427      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2428        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2429                                         "all Unicode newlines");
2430        (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2431      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2432      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2433      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2434      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2435      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
2436      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2437      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
2438      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2439      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
2440      goto EXIT;      goto EXIT;
2441      }      }
# Line 952  if (argc > 2) Line 2494  if (argc > 2)
2494    
2495  /* Set alternative malloc function */  /* Set alternative malloc function */
2496    
2497    #ifdef SUPPORT_PCRE8
2498  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2499  pcre_free = new_free;  pcre_free = new_free;
2500  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2501  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2502    #endif
2503    
2504    #ifdef SUPPORT_PCRE16
2505    pcre16_malloc = new_malloc;
2506    pcre16_free = new_free;
2507    pcre16_stack_malloc = stack_malloc;
2508    pcre16_stack_free = stack_free;
2509    #endif
2510    
2511  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2512    
2513  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2514    
2515  /* Main loop */  /* Main loop */
2516    
# Line 974  while (!done) Line 2525  while (!done)
2525  #endif  #endif
2526    
2527    const char *error;    const char *error;
2528    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
2529    unsigned char *to_file = NULL;    pcre_uint8 *p, *pp, *ppp;
2530    const unsigned char *tables = NULL;    pcre_uint8 *to_file = NULL;
2531      const pcre_uint8 *tables = NULL;
2532      unsigned long int get_options;
2533    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2534    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2535      int do_allcaps = 0;
2536      int do_mark = 0;
2537    int do_study = 0;    int do_study = 0;
2538      int no_force_study = 0;
2539    int do_debug = debug;    int do_debug = debug;
2540    int do_G = 0;    int do_G = 0;
2541    int do_g = 0;    int do_g = 0;
2542    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2543    int do_showrest = 0;    int do_showrest = 0;
2544      int do_showcaprest = 0;
2545    int do_flip = 0;    int do_flip = 0;
2546    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2547    
2548    use_utf8 = 0;    use_utf = 0;
2549    debug_lengths = 1;    debug_lengths = 1;
2550    
2551    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (extend_inputline(infile, buffer) == NULL) break;  
2552    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2553    fflush(outfile);    fflush(outfile);
2554    
# Line 1004  while (!done) Line 2560  while (!done)
2560    
2561    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2562      {      {
2563      unsigned long int magic, get_options;      pcre_uint32 magic;
2564      uschar sbuf[8];      pcre_uint8 sbuf[8];
2565      FILE *f;      FILE *f;
2566    
2567      p++;      p++;
2568        if (*p == '!')
2569          {
2570          do_debug = TRUE;
2571          do_showinfo = TRUE;
2572          p++;
2573          }
2574    
2575      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2576      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2577      *pp = 0;      *pp = 0;
# Line 1020  while (!done) Line 2583  while (!done)
2583        continue;        continue;
2584        }        }
2585    
2586        first_gotten_store = 0;
2587      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2588    
2589      true_size =      true_size =
# Line 1027  while (!done) Line 2591  while (!done)
2591      true_study_size =      true_study_size =
2592        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2593    
2594      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
2595      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2596    
2597      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2598    
2599      magic = ((real_pcre *)re)->magic_number;      magic = ((REAL_PCRE *)re)->magic_number;
2600      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2601        {        {
2602        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2603          {          {
2604          do_flip = 1;          do_flip = 1;
2605          }          }
# Line 1047  while (!done) Line 2611  while (!done)
2611          }          }
2612        }        }
2613    
2614      fprintf(outfile, "Compiled regex%s loaded from %s\n",      /* We hide the byte-invert info for little and big endian tests. */
2615        do_flip? " (byte-inverted)" : "", p);      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2616          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
     /* Need to know if UTF-8 for printing data strings */  
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
2617    
2618      /* Now see if there is any following study data */      /* Now see if there is any following study data. */
2619    
2620      if (true_study_size != 0)      if (true_study_size != 0)
2621        {        {
# Line 1071  while (!done) Line 2631  while (!done)
2631          {          {
2632          FAIL_READ:          FAIL_READ:
2633          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2634          if (extra != NULL) new_free(extra);          if (extra != NULL)
2635              {
2636              PCRE_FREE_STUDY(extra);
2637              }
2638          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
2639          fclose(f);          fclose(f);
2640          continue;          continue;
# Line 1081  while (!done) Line 2644  while (!done)
2644        }        }
2645      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2646    
2647        /* Flip the necessary bytes. */
2648        if (do_flip)
2649          {
2650          int rc;
2651          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2652          if (rc == PCRE_ERROR_BADMODE)
2653            {
2654            /* Simulate the result of the function call below. */
2655            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2656              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2657            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2658              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2659            continue;
2660            }
2661          }
2662    
2663        /* Need to know if UTF-8 for printing data strings. */
2664    
2665        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2666        use_utf = (get_options & PCRE_UTF8) != 0;
2667    
2668      fclose(f);      fclose(f);
2669      goto SHOW_INFO;      goto SHOW_INFO;
2670      }      }
2671    
2672    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2673    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2674    
2675    delimiter = *p++;    delimiter = *p++;
2676    
2677    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
2678      {      {
2679      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2680      goto SKIP_DATA;      goto SKIP_DATA;
2681      }      }
2682    
2683    pp = p;    pp = p;
2684    poffset = p - buffer;    poffset = (int)(p - buffer);
2685    
2686    for(;;)    for(;;)
2687      {      {
# Line 1108  while (!done) Line 2692  while (!done)
2692        pp++;        pp++;
2693        }        }
2694      if (*pp != 0) break;      if (*pp != 0) break;
2695      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if ((pp = extend_inputline(infile, pp)) == NULL)  
2696        {        {
2697        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
2698        done = 1;        done = 1;
# Line 1152  while (!done) Line 2735  while (!done)
2735        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2736        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2737    
2738        case '+': do_showrest = 1; break;        case '+':
2739          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2740          break;
2741    
2742          case '=': do_allcaps = 1; break;
2743        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2744        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2745        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1162  while (!done) Line 2749  while (!done)
2749        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2750        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2751        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
2752          case 'K': do_mark = 1; break;
2753        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2754        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2755    
# Line 1169  while (!done) Line 2757  while (!done)
2757        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2758  #endif  #endif
2759    
2760        case 'S': do_study = 1; break;        case 'S':
2761          if (do_study == 0)
2762            {
2763            do_study = 1;
2764            if (*pp == '+')
2765              {
2766              study_options |= PCRE_STUDY_JIT_COMPILE;
2767              pp++;
2768              }
2769            }
2770          else
2771            {
2772            do_study = 0;
2773            no_force_study = 1;
2774            }
2775          break;
2776    
2777        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2778          case 'W': options |= PCRE_UCP; break;
2779        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2780          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2781        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2782        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2783        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2784    
2785          case 'T':
2786          switch (*pp++)
2787            {
2788            case '0': tables = tables0; break;
2789            case '1': tables = tables1; break;
2790    
2791            case '\r':
2792            case '\n':
2793            case ' ':
2794            case 0:
2795            fprintf(outfile, "** Missing table number after /T\n");
2796            goto SKIP_DATA;
2797    
2798            default:
2799            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2800            goto SKIP_DATA;
2801            }
2802          break;
2803    
2804        case 'L':        case 'L':
2805        ppp = pp;        ppp = pp;
2806        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1188  while (!done) Line 2813  while (!done)
2813          goto SKIP_DATA;          goto SKIP_DATA;
2814          }          }
2815        locale_set = 1;        locale_set = 1;
2816        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2817        pp = ppp;        pp = ppp;
2818        break;        break;
2819    
# Line 1201  while (!done) Line 2826  while (!done)
2826    
2827        case '<':        case '<':
2828          {          {
2829          int x = check_newline(pp, outfile);          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2830          if (x == 0) goto SKIP_DATA;            {
2831          options |= x;            options |= PCRE_JAVASCRIPT_COMPAT;
2832          while (*pp++ != '>');            pp += 3;
2833              }
2834            else
2835              {
2836              int x = check_newline(pp, outfile);
2837              if (x == 0) goto SKIP_DATA;
2838              options |= x;
2839              while (*pp++ != '>');
2840              }
2841          }          }
2842        break;        break;
2843    
# Line 1221  while (!done) Line 2854  while (!done)
2854    
2855    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2856    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2857    local character tables. */    local character tables. Neither does it have 16-bit support. */
2858    
2859  #if !defined NOPOSIX  #if !defined NOPOSIX
2860    if (posix || do_posix)    if (posix || do_posix)
# Line 1234  while (!done) Line 2867  while (!done)
2867      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2868      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2869      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2870        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2871        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2872    
2873        first_gotten_store = 0;
2874      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2875    
2876      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1254  while (!done) Line 2890  while (!done)
2890  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2891    
2892      {      {
2893        /* In 16-bit mode, convert the input. */
2894    
2895    #ifdef SUPPORT_PCRE16
2896        if (use_pcre16)
2897          {
2898          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2899            {
2900            case -1:
2901            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2902              "converted to UTF-16\n");
2903            goto SKIP_DATA;
2904    
2905            case -2:
2906            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2907              "cannot be converted to UTF-16\n");
2908            goto SKIP_DATA;
2909    
2910            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2911            fprintf(outfile, "**Failed: character value greater than 0xffff "
2912              "cannot be converted to 16-bit in non-UTF mode\n");
2913            goto SKIP_DATA;
2914    
2915            default:
2916            break;
2917            }
2918          p = (pcre_uint8 *)buffer16;
2919          }
2920    #endif
2921    
2922        /* Compile many times when timing */
2923    
2924      if (timeit > 0)      if (timeit > 0)
2925        {        {
2926        register int i;        register int i;
# Line 1261  while (!done) Line 2928  while (!done)
2928        clock_t start_time = clock();        clock_t start_time = clock();
2929        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2930          {          {
2931          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2932          if (re != NULL) free(re);          if (re != NULL) free(re);
2933          }          }
2934        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1270  while (!done) Line 2937  while (!done)
2937            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2938        }        }
2939    
2940      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2941        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2942    
2943      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2944      if non-interactive. */      if non-interactive. */
# Line 1283  while (!done) Line 2951  while (!done)
2951          {          {
2952          for (;;)          for (;;)
2953            {            {
2954            if (extend_inputline(infile, buffer) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
2955              {              {
2956              done = 1;              done = 1;
2957              goto CONTINUE;              goto CONTINUE;
# Line 1297  while (!done) Line 2965  while (!done)
2965        goto CONTINUE;        goto CONTINUE;
2966        }        }
2967    
2968      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
2969      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
2970      returns only limited data. Check that it agrees with the newer one. */      lines. */
2971    
2972      if (log_store)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2973        fprintf(outfile, "Memory allocation (code space): %d\n",        goto SKIP_DATA;
2974          (int)(gotten_store -      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
2975    
2976      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
2977      and remember the store that was got. */      and remember the store that was got. */
2978    
2979      true_size = ((real_pcre *)re)->size;      true_size = ((REAL_PCRE *)re)->size;
2980      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2981    
2982        /* Output code size information if requested */
2983    
2984      /* If /S was present, study the regexp to generate additional info to      if (log_store)
2985      help with the matching. */        fprintf(outfile, "Memory allocation (code space): %d\n",
2986            (int)(first_gotten_store -
2987                  sizeof(REAL_PCRE) -
2988                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
2989    
2990        /* If -s or /S was present, study the regex to generate additional info to
2991        help with the matching, unless the pattern has the SS option, which
2992        suppresses the effect of /S (used for a few test patterns where studying is
2993        never sensible). */
2994    
2995      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
2996        {        {
2997        if (timeit > 0)        if (timeit > 0)
2998          {          {
# Line 1324  while (!done) Line 3000  while (!done)
3000          clock_t time_taken;          clock_t time_taken;
3001          clock_t start_time = clock();          clock_t start_time = clock();
3002          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3003            extra = pcre_study(re, study_options, &error);            {
3004              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3005              }
3006          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3007          if (extra != NULL) free(extra);          if (extra != NULL)
3008              {
3009              PCRE_FREE_STUDY(extra);
3010              }
3011          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3012            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3013              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3014          }          }
3015        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3016        if (error != NULL)        if (error != NULL)
3017          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3018        else if (extra != NULL)        else if (extra != NULL)
3019            {
3020          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3021            if (log_store)
3022              {
3023              size_t jitsize;
3024              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3025                  jitsize != 0)
3026                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3027              }
3028            }
3029        }        }
3030    
3031      /* If the 'F' option was present, we flip the bytes of all the integer      /* If /K was present, we set up for handling MARK data. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
3032    
3033      if (do_flip)      if (do_mark)
3034        {        {
3035        real_pcre *rre = (real_pcre *)re;        if (extra == NULL)
       rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));  
   
       if (extra != NULL)  
3036          {          {
3037          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3038          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          extra->flags = 0;
         rsd->options = byteflip(rsd->options, sizeof(rsd->options));  
3039          }          }
3040          extra->mark = &markptr;
3041          extra->flags |= PCRE_EXTRA_MARK;
3042        }        }
3043    
3044      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
3045    
3046      SHOW_INFO:      SHOW_INFO:
3047    
3048      if (do_debug)      if (do_debug)
3049        {        {
3050        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3051        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3052        }        }
3053    
3054        /* We already have the options in get_options (see above) */
3055    
3056      if (do_showinfo)      if (do_showinfo)
3057        {        {
3058        unsigned long int get_options, all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3059        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3060          hascrorlf;          hascrorlf;
3061        int nameentrysize, namecount;        int nameentrysize, namecount;
3062        const uschar *nametable;        const pcre_uint8 *nametable;
3063    
3064        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3065        new_info(re, NULL, PCRE_INFO_SIZE, &size);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3066        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3067        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3068        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3069        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3070        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3071        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3072        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3073        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3074        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3075        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            != 0)
3076            goto SKIP_DATA;
 #if !defined NOINFOCHECK  
       old_count = pcre_info(re, &old_options, &old_first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3077    
3078        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3079          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1434  while (!done) Line 3088  while (!done)
3088          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3089          while (namecount-- > 0)          while (namecount-- > 0)
3090            {            {
3091            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3092              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3093              GET2(nametable, 0));  #else
3094              int imm2_size = IMM2_SIZE;
3095    #endif
3096              int length = (int)STRLEN(nametable + imm2_size);
3097              fprintf(outfile, "  ");
3098              PCHARSV(nametable, imm2_size, length, outfile);
3099              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3100    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3101              fprintf(outfile, "%3d\n", use_pcre16?
3102                 (int)(((PCRE_SPTR16)nametable)[0])
3103                :((int)nametable[0] << 8) | (int)nametable[1]);
3104              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3105    #else
3106              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3107    #ifdef SUPPORT_PCRE8
3108            nametable += nameentrysize;            nametable += nameentrysize;
3109    #else
3110              nametable += nameentrysize * 2;
3111    #endif
3112    #endif
3113            }            }
3114          }          }
3115    
3116        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3117        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3118    
3119        all_options = ((real_pcre *)re)->options;        all_options = ((REAL_PCRE *)re)->options;
3120        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3121    
3122        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3123          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3124            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3125            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3126            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3127            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3128            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3129            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3130              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3131              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3132            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3133            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3134            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3135            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3136            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3137            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3138              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3139              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3140            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3141    
3142        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1501  while (!done) Line 3177  while (!done)
3177          }          }
3178        else        else
3179          {          {
3180          int ch = first_char & 255;          const char *caseless =
3181          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3182            "" : " (caseless)";            "" : " (caseless)";
3183          if (PRINTHEX(ch))  
3184            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3185              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3186          else          else
3187            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3188              fprintf(outfile, "First char = ");
3189              pchar(first_char, outfile);
3190              fprintf(outfile, "%s\n", caseless);
3191              }
3192          }          }
3193    
3194        if (need_char < 0)        if (need_char < 0)
# Line 1516  while (!done) Line 3197  while (!done)
3197          }          }
3198        else        else
3199          {          {
3200          int ch = need_char & 255;          const char *caseless =
3201          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3202            "" : " (caseless)";            "" : " (caseless)";
3203          if (PRINTHEX(ch))  
3204            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3205              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3206          else          else
3207            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3208              fprintf(outfile, "Need char = ");
3209              pchar(need_char, outfile);
3210              fprintf(outfile, "%s\n", caseless);
3211              }
3212          }          }
3213    
3214        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3215        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3216        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
3217        flipped.) */        flipped.) If study was forced by an external -s, don't show this
3218          information unless -i or -d was also present. This means that, except
3219          when auto-callouts are involved, the output from runs with and without
3220          -s should be identical. */
3221    
3222        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3223          {          {
3224          if (extra == NULL)          if (extra == NULL)
3225            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3226          else          else
3227            {            {
3228            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3229            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
3230    
3231            if (start_bits == NULL)            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3232              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3233            else  
3234              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3235              {              {
3236              int i;              if (start_bits == NULL)
3237              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3238              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3239                {                {
3240                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3241                  int c = 24;
3242                  fprintf(outfile, "Starting byte set: ");
3243                  for (i = 0; i < 256; i++)
3244                  {                  {
3245                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
                   {  
                   fprintf(outfile, "%c ", i);  
                   c += 2;  
                   }  
                 else  
3246                    {                    {
3247                    fprintf(outfile, "\\x%02x ", i);                    if (c > 75)
3248                    c += 5;                      {
3249                        fprintf(outfile, "\n  ");
3250                        c = 2;
3251                        }
3252                      if (PRINTOK(i) && i != ' ')
3253                        {
3254                        fprintf(outfile, "%c ", i);
3255                        c += 2;
3256                        }
3257                      else
3258                        {
3259                        fprintf(outfile, "\\x%02x ", i);
3260                        c += 5;
3261                        }
3262                    }                    }
3263                  }                  }
3264                  fprintf(outfile, "\n");
3265                }                }
3266              fprintf(outfile, "\n");              }
3267              }
3268    
3269            /* Show this only if the JIT was set by /S, not by -s. */
3270    
3271            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3272              {
3273              int jit;
3274              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3275                {
3276                if (jit)
3277                  fprintf(outfile, "JIT study was successful\n");
3278                else
3279    #ifdef SUPPORT_JIT
3280                  fprintf(outfile, "JIT study was not successful\n");
3281    #else
3282                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3283    #endif
3284              }              }
3285            }            }
3286          }          }
# Line 1586  while (!done) Line 3299  while (!done)
3299          }          }
3300        else        else
3301          {          {
3302          uschar sbuf[8];          pcre_uint8 sbuf[8];
3303          sbuf[0] = (true_size >> 24)  & 255;  
3304          sbuf[1] = (true_size >> 16)  & 255;          if (do_flip) regexflip(re, extra);
3305          sbuf[2] = (true_size >>  8)  & 255;          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3306          sbuf[3] = (true_size)  & 255;          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3307            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3308          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[3] = (pcre_uint8)((true_size) & 255);
3309          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3310          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3311          sbuf[7] = (true_study_size)  & 255;          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3312            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3313    
3314          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3315              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1604  while (!done) Line 3318  while (!done)
3318            }            }
3319          else          else
3320            {            {
3321            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3322    
3323              /* If there is study data, write it. */
3324    
3325            if (extra != NULL)            if (extra != NULL)
3326              {              {
3327              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1614  while (!done) Line 3331  while (!done)
3331                  strerror(errno));                  strerror(errno));
3332                }                }
3333              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
3334              }              }
3335            }            }
3336          fclose(f);          fclose(f);
3337          }          }
3338    
3339        new_free(re);        new_free(re);
3340        if (extra != NULL) new_free(extra);        if (extra != NULL)
3341        if (tables != NULL) new_free((void *)tables);          {
3342            PCRE_FREE_STUDY(extra);
3343            }
3344          if (locale_set)
3345            {
3346            new_free((void *)tables);
3347            setlocale(LC_CTYPE, "C");
3348            locale_set = 0;
3349            }
3350        continue;  /* With next regex */        continue;  /* With next regex */
3351        }        }
3352      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1631  while (!done) Line 3355  while (!done)
3355    
3356    for (;;)    for (;;)
3357      {      {
3358      uschar *q;      pcre_uint8 *q;
3359      uschar *bptr;      pcre_uint8 *bptr;
3360      int *use_offsets = offsets;      int *use_offsets = offsets;
3361      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3362      int callout_data = 0;      int callout_data = 0;
3363      int callout_data_set = 0;      int callout_data_set = 0;
3364      int count, c;      int count, c;
3365      int copystrings = 0;      int copystrings = 0;
3366      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
3367      int getstrings = 0;      int getstrings = 0;
3368      int getlist = 0;      int getlist = 0;
3369      int gmatched = 0;      int gmatched = 0;
3370      int start_offset = 0;      int start_offset = 0;
3371        int start_offset_sign = 1;
3372      int g_notempty = 0;      int g_notempty = 0;
3373      int use_dfa = 0;      int use_dfa = 0;
3374    
     options = 0;  
   
3375      *copynames = 0;      *copynames = 0;
3376      *getnames = 0;      *getnames = 0;
3377    
3378      copynamesptr = copynames;  #ifdef SUPPORT_PCRE16
3379      getnamesptr = getnames;      cn16ptr = copynames;
3380        gn16ptr = getnames;
3381    #endif
3382    #ifdef SUPPORT_PCRE8
3383        cn8ptr = copynames8;
3384        gn8ptr = getnames8;
3385    #endif
3386    
3387      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3388      first_callout = 1;      first_callout = 1;
3389        last_callout_mark = NULL;
3390      callout_extra = 0;      callout_extra = 0;
3391      callout_count = 0;      callout_count = 0;
3392      callout_fail_count = 999999;      callout_fail_count = 999999;
3393      callout_fail_id = -1;      callout_fail_id = -1;
3394      show_malloc = 0;      show_malloc = 0;
3395        options = 0;
3396    
3397      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3398        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 1669  while (!done) Line 3400  while (!done)
3400      len = 0;      len = 0;
3401      for (;;)      for (;;)
3402        {        {
3403        if (infile == stdin) printf("data> ");        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
       if (extend_inputline(infile, buffer + len) == NULL)  
3404          {          {
3405          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
3406              {
3407              fprintf(outfile, "\n");
3408              break;
3409              }
3410          done = 1;          done = 1;
3411          goto CONTINUE;          goto CONTINUE;
3412          }          }
# Line 1694  while (!done) Line 3428  while (!done)
3428        int i = 0;        int i = 0;
3429        int n = 0;        int n = 0;
3430    
3431        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3432          In non-UTF mode, allow the value of the byte to fall through to later,
3433          where values greater than 127 are turned into UTF-8 when running in
3434          16-bit mode. */
3435    
3436          if (c != '\\')
3437            {
3438            if (use_utf)
3439              {
3440              *q++ = c;
3441              continue;
3442              }
3443            }
3444    
3445          /* Handle backslash escapes */
3446    
3447          else switch ((c = *p++))
3448          {          {
3449          case 'a': c =    7; break;          case 'a': c =    7; break;
3450          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 1710  while (!done) Line 3460  while (!done)
3460          c -= '0';          c -= '0';
3461          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3462            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3463          break;          break;
3464    
3465          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3466          if (*p == '{')          if (*p == '{')
3467            {            {
3468            unsigned char *pt = p;            pcre_uint8 *pt = p;
3469            c = 0;            c = 0;
3470            while (isxdigit(*(++pt)))  
3471              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3472              when isxdigit() is a macro that refers to its argument more than
3473              once. This is banned by the C Standard, but apparently happens in at
3474              least one MacOS environment. */
3475    
3476              for (pt++; isxdigit(*pt); pt++)
3477                {
3478                if (++i == 9)
3479                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3480                                   "using only the first eight.\n");
3481                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3482                }
3483            if (*pt == '}')            if (*pt == '}')
3484              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             utn = ord2utf8(c, buff8);  
             for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
             c = buff8[ii];   /* Last byte */  
3485              p = pt + 1;              p = pt + 1;
3486              break;              break;
3487              }              }
3488            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3489            }            }
 #endif  
3490    
3491          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3492            allows UTF-8 characters to be constructed byte by byte, and also allows
3493            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3494            Otherwise, pass it down to later code so that it can be turned into
3495            UTF-8 when running in 16-bit mode. */
3496    
3497          c = 0;          c = 0;
3498          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3499            {            {
3500            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3501            p++;            p++;
3502            }            }
3503            if (use_utf)
3504              {
3505              *q++ = c;
3506              continue;
3507              }
3508          break;          break;
3509    
3510          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 1763  while (!done) Line 3512  while (!done)
3512          continue;          continue;
3513    
3514          case '>':          case '>':
3515            if (*p == '-')
3516              {
3517              start_offset_sign = -1;
3518              p++;
3519              }
3520          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3521            start_offset *= start_offset_sign;
3522          continue;          continue;
3523    
3524          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1782  while (!done) Line 3537  while (!done)
3537            }            }
3538          else if (isalnum(*p))          else if (isalnum(*p))
3539            {            {
3540            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3541            }            }
3542          else if (*p == '+')          else if (*p == '+')
3543            {            {
# Line 1798  while (!done) Line 3546  while (!done)
3546            }            }
3547          else if (*p == '-')          else if (*p == '-')
3548            {            {
3549            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3550            p++;            p++;
3551            }            }
3552          else if (*p == '!')          else if (*p == '!')
# Line 1836  while (!done) Line 3584  while (!done)
3584  #endif  #endif
3585            use_dfa = 1;            use_dfa = 1;
3586          continue;          continue;
3587    #endif
3588    
3589    #if !defined NODFA
3590          case 'F':          case 'F':
3591          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
3592          continue;          continue;
# Line 1850  while (!done) Line 3600  while (!done)
3600            }            }
3601          else if (isalnum(*p))          else if (isalnum(*p))
3602            {            {
3603            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3604            while (isalnum(*p)) *npp++ = *p++;            }
3605            *npp++ = 0;          continue;
3606            *npp = 0;  
3607            n = pcre_get_stringnumber(re, (char *)getnamesptr);          case 'J':
3608            if (n < 0)          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3609              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);          if (extra != NULL
3610            getnamesptr = npp;              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3611                && extra->executable_jit != NULL)
3612              {
3613              if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3614              jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3615              PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3616            }            }
3617          continue;          continue;
3618    
# Line 1870  while (!done) Line 3625  while (!done)
3625          continue;          continue;
3626    
3627          case 'N':          case 'N':
3628          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
3629              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3630            else
3631              options |= PCRE_NOTEMPTY;
3632          continue;          continue;
3633    
3634          case 'O':          case 'O':
# Line 1893  while (!done) Line 3651  while (!done)
3651          continue;          continue;
3652    
3653          case 'P':          case 'P':
3654          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3655              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3656          continue;          continue;
3657    
3658          case 'Q':          case 'Q':
# Line 1928  while (!done) Line 3687  while (!done)
3687          show_malloc = 1;          show_malloc = 1;
3688          continue;          continue;
3689    
3690            case 'Y':
3691            options |= PCRE_NO_START_OPTIMIZE;
3692            continue;
3693    
3694          case 'Z':          case 'Z':
3695          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
3696          continue;          continue;
# Line 1945  while (!done) Line 3708  while (!done)
3708            }            }
3709          continue;          continue;
3710          }          }
3711        *q++ = c;  
3712          /* We now have a character value in c that may be greater than 255. In
3713          16-bit mode, we always convert characters to UTF-8 so that values greater
3714          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3715          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3716          mode must have come from \x{...} or octal constructs because values from
3717          \x.. get this far only in non-UTF mode. */
3718    
3719    #if !defined NOUTF || defined SUPPORT_PCRE16
3720          if (use_pcre16 || use_utf)
3721            {
3722            pcre_uint8 buff8[8];
3723            int ii, utn;
3724            utn = ord2utf8(c, buff8);
3725            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3726            }
3727          else
3728    #endif
3729            {
3730            if (c > 255)
3731              {
3732              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3733                "and UTF-8 mode is not enabled.\n", c);
3734              fprintf(outfile, "** Truncation will probably give the wrong "
3735                "result.\n");
3736              }
3737            *q++ = c;
3738            }
3739        }        }
3740    
3741        /* Reached end of subject string */
3742    
3743      *q = 0;      *q = 0;
3744      len = q - dbuffer;      len = (int)(q - dbuffer);
3745    
3746        /* Move the data to the end of the buffer so that a read over the end of
3747        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3748        we are using the POSIX interface, we must include the terminating zero. */
3749    
3750    #if !defined NOPOSIX
3751        if (posix || do_posix)
3752          {
3753          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3754          bptr += buffer_size - len - 1;
3755          }
3756        else
3757    #endif
3758          {
3759          memmove(bptr + buffer_size - len, bptr, len);
3760          bptr += buffer_size - len;
3761          }
3762    
3763      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
3764        {        {
# Line 1969  while (!done) Line 3779  while (!done)
3779          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3780        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3781        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3782          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3783    
3784        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3785    
# Line 1990  while (!done) Line 3801  while (!done)
3801            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3802              {              {
3803              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3804              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3805                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3806              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3807              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3808                {                {
3809                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3810                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3811                  outfile);                  outfile);
3812                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3813                }                }
# Line 2004  while (!done) Line 3815  while (!done)
3815            }            }
3816          }          }
3817        free(pmatch);        free(pmatch);
3818          goto NEXT_DATA;
3819        }        }
3820    
3821    #endif  /* !defined NOPOSIX */
3822    
3823      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3824    
3825      else  #ifdef SUPPORT_PCRE16
3826  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3827          {
3828          len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3829          switch(len)
3830            {
3831            case -1:
3832            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3833              "converted to UTF-16\n");
3834            goto NEXT_DATA;
3835    
3836            case -2:
3837            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3838              "cannot be converted to UTF-16\n");
3839            goto NEXT_DATA;
3840    
3841            case -3:
3842            fprintf(outfile, "**Failed: character value greater than 0xffff "
3843              "cannot be converted to 16-bit in non-UTF mode\n");
3844            goto NEXT_DATA;
3845    
3846            default:
3847            break;
3848            }
3849          bptr = (pcre_uint8 *)buffer16;
3850          }
3851    #endif
3852    
3853      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3854        {        {
3855          markptr = NULL;
3856    
3857        if (timeitm > 0)        if (timeitm > 0)
3858          {          {
3859          register int i;          register int i;
# Line 2024  while (!done) Line 3865  while (!done)
3865            {            {
3866            int workspace[1000];            int workspace[1000];
3867            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3868              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              {
3869                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3870                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets, workspace,
3871                  (sizeof(workspace)/sizeof(int)));
3872                }
3873            }            }
3874          else          else
3875  #endif  #endif
3876    
3877          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3878            count = pcre_exec(re, extra, (char *)bptr, len,            {
3879              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3880                (options | g_notempty), use_offsets, use_size_offsets);
3881              }
3882          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3883          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3884            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2043  while (!done) Line 3887  while (!done)
3887    
3888        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
3889        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
3890        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
3891          running of pcre_exec(), so disable the JIT optimization. This makes it
3892          possible to run the same set of tests with and without JIT externally
3893          requested. */
3894    
3895        if (find_match_limit)        if (find_match_limit)
3896          {          {
# Line 2052  while (!done) Line 3899  while (!done)
3899            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3900            extra->flags = 0;            extra->flags = 0;
3901            }            }
3902            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3903    
3904          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
3905            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2075  while (!done) Line 3923  while (!done)
3923            }            }
3924          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3925          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3926          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3927            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3928          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3929          }          }
# Line 2087  while (!done) Line 3935  while (!done)
3935        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
3936          {          {
3937          int workspace[1000];          int workspace[1000];
3938          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3939            options | g_notempty, use_offsets, use_size_offsets, workspace,            (options | g_notempty), use_offsets, use_size_offsets, workspace,
3940            sizeof(workspace)/sizeof(int));            (sizeof(workspace)/sizeof(int)));
3941          if (count == 0)          if (count == 0)
3942            {            {
3943            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 2100  while (!done) Line 3948  while (!done)
3948    
3949        else        else
3950          {          {
3951          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3952            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3953          if (count == 0)          if (count == 0)
3954            {            {
3955            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
# Line 2114  while (!done) Line 3962  while (!done)
3962        if (count >= 0)        if (count >= 0)
3963          {          {
3964          int i, maxcount;          int i, maxcount;
3965            void *cnptr, *gnptr;
3966    
3967  #if !defined NODFA  #if !defined NODFA
3968          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
# Line 2135  while (!done) Line 3984  while (!done)
3984              }              }
3985            }            }
3986    
3987            /* do_allcaps requests showing of all captures in the pattern, to check
3988            unset ones at the end. */
3989    
3990            if (do_allcaps)
3991              {
3992              if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3993                goto SKIP_DATA;
3994              count++;   /* Allow for full match */
3995              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3996              }
3997    
3998            /* Output the captured substrings */
3999    
4000          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
4001            {            {
4002            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
4003                {
4004                if (use_offsets[i] != -1)
4005                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4006                    use_offsets[i], i);
4007                if (use_offsets[i+1] != -1)
4008                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4009                    use_offsets[i+1], i+1);
4010              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
4011                }
4012            else            else
4013              {              {
4014              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
4015              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr, use_offsets[i],
4016                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
4017              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4018              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
4019                {                {
4020                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
4021                  {                PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4022                  fprintf(outfile, " 0+ ");                  outfile);
4023                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
4024                }                }
4025              }              }
4026            }            }
4027    
4028            if (markptr != NULL)
4029              {
4030              fprintf(outfile, "MK: ");
4031              PCHARSV(markptr, 0, -1, outfile);
4032              fprintf(outfile, "\n");
4033              }
4034    
4035          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4036            {            {
4037            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
4038              {              {
4039                int rc;
4040              char copybuffer[256];              char copybuffer[256];
4041              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4042                i, copybuffer, sizeof(copybuffer));                copybuffer, sizeof(copybuffer));
4043              if (rc < 0)              if (rc < 0)
4044                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4045              else              else
4046                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);                {
4047                  fprintf(outfile, "%2dC ", i);
4048                  PCHARSV(copybuffer, 0, rc, outfile);
4049                  fprintf(outfile, " (%d)\n", rc);
4050                  }
4051              }              }
4052            }            }
4053    
4054          for (copynamesptr = copynames;          cnptr = copynames;
4055               *copynamesptr != 0;          for (;;)
              copynamesptr += (int)strlen((char*)copynamesptr) + 1)  
4056            {            {
4057              int rc;
4058            char copybuffer[256];            char copybuffer[256];
4059            int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,  
4060              count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));            if (use_pcre16)
4061                {
4062                if (*(pcre_uint16 *)cnptr == 0) break;
4063                }
4064              else
4065                {
4066                if (*