/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 289 by ph10, Sun Dec 23 12:17:20 2007 UTC revision 1017 by ph10, Sun Aug 26 16:30:50 2012 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
50  #include "config.h"  #include "config.h"
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 58  POSSIBILITY OF SUCH DAMAGE.
58  #include <locale.h>  #include <locale.h>
59  #include <errno.h>  #include <errno.h>
60    
61  #ifdef SUPPORT_LIBREADLINE  /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67    #ifdef HAVE_UNISTD_H
68  #include <unistd.h>  #include <unistd.h>
69    #endif
70    #if defined(SUPPORT_LIBREADLINE)
71  #include <readline/readline.h>  #include <readline/readline.h>
72  #include <readline/history.h>  #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80  #endif  #endif
   
81    
82  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
83  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 69  input mode under Windows. */ Line 93  input mode under Windows. */
93  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
94  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
95    
96    #ifndef isatty
97    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
98    #endif                         /* though in some environments they seem to   */
99                                   /* be already defined, hence the #ifndefs.    */
100    #ifndef fileno
101    #define fileno _fileno
102    #endif
103    
104    /* A user sent this fix for Borland Builder 5 under Windows. */
105    
106    #ifdef __BORLANDC__
107    #define _setmode(handle, mode) setmode(handle, mode)
108    #endif
109    
110    /* Not Windows */
111    
112  #else  #else
113  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
114  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 76  input mode under Windows. */ Line 116  input mode under Windows. */
116  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
117  #endif  #endif
118    
119    #define PRIV(name) name
120    
121  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
122  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 87  here before pcre_internal.h so that the Line 128  here before pcre_internal.h so that the
128  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
129    
130  #include "pcre.h"  #include "pcre.h"
131    
132    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
133    /* Configure internal macros to 16 bit mode. */
134    #define COMPILE_PCRE16
135    #endif
136    
137  #include "pcre_internal.h"  #include "pcre_internal.h"
138    
139  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* The pcre_printint() function, which prints the internal form of a compiled
140  two copies, we include the source file here, changing the names of the external  regex, is held in a separate file so that (a) it can be compiled in either
141  symbols to prevent clashes. */  8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
142    when that is compiled in debug mode. */
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
143    
144  #include "pcre_tables.c"  #ifdef SUPPORT_PCRE8
145    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
146    #endif
147    #ifdef SUPPORT_PCRE16
148    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
149    #endif
150    
151  /* We also need the pcre_printint() function for printing out compiled  /* We need access to some of the data tables that PCRE uses. So as not to have
152  patterns. This function is in a separate file so that it can be included in  to keep two copies, we include the source file here, changing the names of the
153  pcre_compile.c when that module is compiled with debugging enabled.  external symbols to prevent clashes. */
154    
155  The definition of the macro PRINTABLE, which determines whether to print an  #define PCRE_INCLUDED
156    
157    #include "pcre_tables.c"
158    
159    /* The definition of the macro PRINTABLE, which determines whether to print an
160  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
161  contained in this file. We uses it here also, in cases when the locale has not  the same as in the printint.src file. We uses it here in cases when the locale
162  been explicitly changed, so as to get consistent output from systems that  has not been explicitly changed, so as to get consistent output from systems
163  differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
164    
165  #include "pcre_printint.src"  #ifdef EBCDIC
166    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
167    #else
168    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
169    #endif
170    
171  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
172    
173    /* Posix support is disabled in 16 bit only mode. */
174    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
175    #define NOPOSIX
176    #endif
177    
178  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
179  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 128  Makefile. */ Line 183  Makefile. */
183  #include "pcreposix.h"  #include "pcreposix.h"
184  #endif  #endif
185    
186  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
187  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
188  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
189  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
190  UTF8 support if PCRE is built without it. */  
191    #ifndef SUPPORT_UTF
192  #ifndef SUPPORT_UTF8  #ifndef NOUTF
193  #ifndef NOUTF8  #define NOUTF
 #define NOUTF8  
194  #endif  #endif
195  #endif  #endif
196    
197    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
198    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
199    only from one place and is handled differently). I couldn't dream up any way of
200    using a single macro to do this in a generic way, because of the many different
201    argument requirements. We know that at least one of SUPPORT_PCRE8 and
202    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
203    use these in the definitions of generic macros.
204    
205    **** Special note about the PCHARSxxx macros: the address of the string to be
206    printed is always given as two arguments: a base address followed by an offset.
207    The base address is cast to the correct data size for 8 or 16 bit data; the
208    offset is in units of this size. If the string were given as base+offset in one
209    argument, the casting might be incorrectly applied. */
210    
211    #ifdef SUPPORT_PCRE8
212    
213    #define PCHARS8(lv, p, offset, len, f) \
214      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
215    
216    #define PCHARSV8(p, offset, len, f) \
217      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
218    
219    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
220      p = read_capture_name8(p, cn8, re)
221    
222    #define STRLEN8(p) ((int)strlen((char *)p))
223    
224    #define SET_PCRE_CALLOUT8(callout) \
225      pcre_callout = callout
226    
227    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
228       pcre_assign_jit_stack(extra, callback, userdata)
229    
230    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
231      re = pcre_compile((char *)pat, options, error, erroffset, tables)
232    
233    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
234        namesptr, cbuffer, size) \
235      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
236        (char *)namesptr, cbuffer, size)
237    
238    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
239      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
240    
241    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
242        offsets, size_offsets, workspace, size_workspace) \
243      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
244        offsets, size_offsets, workspace, size_workspace)
245    
246    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
247        offsets, size_offsets) \
248      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
249        offsets, size_offsets)
250    
251    #define PCRE_FREE_STUDY8(extra) \
252      pcre_free_study(extra)
253    
254    #define PCRE_FREE_SUBSTRING8(substring) \
255      pcre_free_substring(substring)
256    
257    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
258      pcre_free_substring_list(listptr)
259    
260    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
261        getnamesptr, subsptr) \
262      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
263        (char *)getnamesptr, subsptr)
264    
265    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
266      n = pcre_get_stringnumber(re, (char *)ptr)
267    
268    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
269      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
270    
271    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
272      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
273    
274    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
275      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
276    
277    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
278      pcre_printint(re, outfile, debug_lengths)
279    
280    #define PCRE_STUDY8(extra, re, options, error) \
281      extra = pcre_study(re, options, error)
282    
283    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
284      pcre_jit_stack_alloc(startsize, maxsize)
285    
286    #define PCRE_JIT_STACK_FREE8(stack) \
287      pcre_jit_stack_free(stack)
288    
289    #endif /* SUPPORT_PCRE8 */
290    
291    /* -----------------------------------------------------------*/
292    
293    #ifdef SUPPORT_PCRE16
294    
295    #define PCHARS16(lv, p, offset, len, f) \
296      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
297    
298    #define PCHARSV16(p, offset, len, f) \
299      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
300    
301    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
302      p = read_capture_name16(p, cn16, re)
303    
304    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
305    
306    #define SET_PCRE_CALLOUT16(callout) \
307      pcre16_callout = (int (*)(pcre16_callout_block *))callout
308    
309    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
310      pcre16_assign_jit_stack((pcre16_extra *)extra, \
311        (pcre16_jit_callback)callback, userdata)
312    
313    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
314      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
315        tables)
316    
317    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
318        namesptr, cbuffer, size) \
319      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
320        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
321    
322    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
323      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
324        (PCRE_UCHAR16 *)cbuffer, size/2)
325    
326    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
327        offsets, size_offsets, workspace, size_workspace) \
328      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
329        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
330        workspace, size_workspace)
331    
332    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
333        offsets, size_offsets) \
334      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
335        len, start_offset, options, offsets, size_offsets)
336    
337    #define PCRE_FREE_STUDY16(extra) \
338      pcre16_free_study((pcre16_extra *)extra)
339    
340    #define PCRE_FREE_SUBSTRING16(substring) \
341      pcre16_free_substring((PCRE_SPTR16)substring)
342    
343    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
344      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
345    
346    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
347        getnamesptr, subsptr) \
348      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
349        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
350    
351    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
352      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
353    
354    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
355      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
356        (PCRE_SPTR16 *)(void*)subsptr)
357    
358    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
359      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
360        (PCRE_SPTR16 **)(void*)listptr)
361    
362    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
363      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
364        tables)
365    
366    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
367      pcre16_printint(re, outfile, debug_lengths)
368    
369    #define PCRE_STUDY16(extra, re, options, error) \
370      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
371    
372    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
373      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
374    
375    #define PCRE_JIT_STACK_FREE16(stack) \
376      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
377    
378    #endif /* SUPPORT_PCRE16 */
379    
380    
381    /* ----- Both modes are supported; a runtime test is needed, except for
382    pcre_config(), and the JIT stack functions, when it doesn't matter which
383    version is called. ----- */
384    
385    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
386    
387    #define CHAR_SIZE (use_pcre16? 2:1)
388    
389    #define PCHARS(lv, p, offset, len, f) \
390      if (use_pcre16) \
391        PCHARS16(lv, p, offset, len, f); \
392      else \
393        PCHARS8(lv, p, offset, len, f)
394    
395    #define PCHARSV(p, offset, len, f) \
396      if (use_pcre16) \
397        PCHARSV16(p, offset, len, f); \
398      else \
399        PCHARSV8(p, offset, len, f)
400    
401    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
402      if (use_pcre16) \
403        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
404      else \
405        READ_CAPTURE_NAME8(p, cn8, cn16, re)
406    
407    #define SET_PCRE_CALLOUT(callout) \
408      if (use_pcre16) \
409        SET_PCRE_CALLOUT16(callout); \
410      else \
411        SET_PCRE_CALLOUT8(callout)
412    
413    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
414    
415    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
416      if (use_pcre16) \
417        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
418      else \
419        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
420    
421    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
422      if (use_pcre16) \
423        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
424      else \
425        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
426    
427    #define PCRE_CONFIG pcre_config
428    
429    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
430        namesptr, cbuffer, size) \
431      if (use_pcre16) \
432        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
433          namesptr, cbuffer, size); \
434      else \
435        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
436          namesptr, cbuffer, size)
437    
438    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
439      if (use_pcre16) \
440        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
441      else \
442        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
443    
444    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
445        offsets, size_offsets, workspace, size_workspace) \
446      if (use_pcre16) \
447        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
448          offsets, size_offsets, workspace, size_workspace); \
449      else \
450        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
451          offsets, size_offsets, workspace, size_workspace)
452    
453    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
454        offsets, size_offsets) \
455      if (use_pcre16) \
456        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
457          offsets, size_offsets); \
458      else \
459        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
460          offsets, size_offsets)
461    
462    #define PCRE_FREE_STUDY(extra) \
463      if (use_pcre16) \
464        PCRE_FREE_STUDY16(extra); \
465      else \
466        PCRE_FREE_STUDY8(extra)
467    
468    #define PCRE_FREE_SUBSTRING(substring) \
469      if (use_pcre16) \
470        PCRE_FREE_SUBSTRING16(substring); \
471      else \
472        PCRE_FREE_SUBSTRING8(substring)
473    
474    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
475      if (use_pcre16) \
476        PCRE_FREE_SUBSTRING_LIST16(listptr); \
477      else \
478        PCRE_FREE_SUBSTRING_LIST8(listptr)
479    
480    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
481        getnamesptr, subsptr) \
482      if (use_pcre16) \
483        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
484          getnamesptr, subsptr); \
485      else \
486        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
487          getnamesptr, subsptr)
488    
489    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
490      if (use_pcre16) \
491        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
492      else \
493        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
494    
495    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
496      if (use_pcre16) \
497        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
498      else \
499        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
500    
501    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
502      if (use_pcre16) \
503        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
504      else \
505        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
506    
507    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
508      (use_pcre16 ? \
509         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
510        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
511    
512    #define PCRE_JIT_STACK_FREE(stack) \
513      if (use_pcre16) \
514        PCRE_JIT_STACK_FREE16(stack); \
515      else \
516        PCRE_JIT_STACK_FREE8(stack)
517    
518    #define PCRE_MAKETABLES \
519      (use_pcre16? pcre16_maketables() : pcre_maketables())
520    
521    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
522      if (use_pcre16) \
523        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
524      else \
525        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
526    
527    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
528      if (use_pcre16) \
529        PCRE_PRINTINT16(re, outfile, debug_lengths); \
530      else \
531        PCRE_PRINTINT8(re, outfile, debug_lengths)
532    
533    #define PCRE_STUDY(extra, re, options, error) \
534      if (use_pcre16) \
535        PCRE_STUDY16(extra, re, options, error); \
536      else \
537        PCRE_STUDY8(extra, re, options, error)
538    
539    /* ----- Only 8-bit mode is supported ----- */
540    
541    #elif defined SUPPORT_PCRE8
542    #define CHAR_SIZE                 1
543    #define PCHARS                    PCHARS8
544    #define PCHARSV                   PCHARSV8
545    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
546    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
547    #define STRLEN                    STRLEN8
548    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
549    #define PCRE_COMPILE              PCRE_COMPILE8
550    #define PCRE_CONFIG               pcre_config
551    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
552    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
553    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
554    #define PCRE_EXEC                 PCRE_EXEC8
555    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
556    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
557    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
558    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
559    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
560    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
561    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
562    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
563    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
564    #define PCRE_MAKETABLES           pcre_maketables()
565    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
566    #define PCRE_PRINTINT             PCRE_PRINTINT8
567    #define PCRE_STUDY                PCRE_STUDY8
568    
569    /* ----- Only 16-bit mode is supported ----- */
570    
571    #else
572    #define CHAR_SIZE                 2
573    #define PCHARS                    PCHARS16
574    #define PCHARSV                   PCHARSV16
575    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
576    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
577    #define STRLEN                    STRLEN16
578    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
579    #define PCRE_COMPILE              PCRE_COMPILE16
580    #define PCRE_CONFIG               pcre16_config
581    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
582    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
583    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
584    #define PCRE_EXEC                 PCRE_EXEC16
585    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
586    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
587    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
588    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
589    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
590    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
591    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
592    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
593    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
594    #define PCRE_MAKETABLES           pcre16_maketables()
595    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
596    #define PCRE_PRINTINT             PCRE_PRINTINT16
597    #define PCRE_STUDY                PCRE_STUDY16
598    #endif
599    
600    /* ----- End of mode-specific function call macros ----- */
601    
602    
603  /* Other parameters */  /* Other parameters */
604    
# Line 151  UTF8 support if PCRE is built without it Line 610  UTF8 support if PCRE is built without it
610  #endif  #endif
611  #endif  #endif
612    
613    #if !defined NODFA
614    #define DFA_WS_DIMENSION 1000
615    #endif
616    
617  /* This is the default loop count for timing. */  /* This is the default loop count for timing. */
618    
619  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
# Line 165  static int callout_fail_count; Line 628  static int callout_fail_count;
628  static int callout_fail_id;  static int callout_fail_id;
629  static int debug_lengths;  static int debug_lengths;
630  static int first_callout;  static int first_callout;
631    static int jit_was_used;
632  static int locale_set = 0;  static int locale_set = 0;
633  static int show_malloc;  static int show_malloc;
634  static int use_utf8;  static int use_utf;
635  static size_t gotten_store;  static size_t gotten_store;
636    static size_t first_gotten_store = 0;
637    static const unsigned char *last_callout_mark = NULL;
638    
639  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
640    
641  static int buffer_size = 50000;  static int buffer_size = 50000;
642  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
643  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
644  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
645    
646    /* Another buffer is needed translation to 16-bit character strings. It will
647    obtained and extended as required. */
648    
649    #ifdef SUPPORT_PCRE16
650    static int buffer16_size = 0;
651    static pcre_uint16 *buffer16 = NULL;
652    
653    #ifdef SUPPORT_PCRE8
654    
655    /* We need the table of operator lengths that is used for 16-bit compiling, in
656    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
657    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
658    appropriately for the 16-bit world. Just as a safety check, make sure that
659    COMPILE_PCRE16 is *not* set. */
660    
661    #ifdef COMPILE_PCRE16
662    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
663    #endif
664    
665    #if LINK_SIZE == 2
666    #undef LINK_SIZE
667    #define LINK_SIZE 1
668    #elif LINK_SIZE == 3 || LINK_SIZE == 4
669    #undef LINK_SIZE
670    #define LINK_SIZE 2
671    #else
672    #error LINK_SIZE must be either 2, 3, or 4
673    #endif
674    
675    #undef IMM2_SIZE
676    #define IMM2_SIZE 1
677    
678    #endif /* SUPPORT_PCRE8 */
679    
680    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
681    #endif  /* SUPPORT_PCRE16 */
682    
683    /* If we have 8-bit support, default use_pcre16 to false; if there is also
684    16-bit support, it can be changed by an option. If there is no 8-bit support,
685    there must be 16-bit support, so default it to 1. */
686    
687    #ifdef SUPPORT_PCRE8
688    static int use_pcre16 = 0;
689    #else
690    static int use_pcre16 = 1;
691    #endif
692    
693    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
694    
695    static int jit_study_bits[] =
696      {
697      PCRE_STUDY_JIT_COMPILE,
698      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
699      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
700      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
701      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
702      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
703      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
704        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
705    };
706    
707    /* Textual explanations for runtime error codes */
708    
709    static const char *errtexts[] = {
710      NULL,  /* 0 is no error */
711      NULL,  /* NOMATCH is handled specially */
712      "NULL argument passed",
713      "bad option value",
714      "magic number missing",
715      "unknown opcode - pattern overwritten?",
716      "no more memory",
717      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
718      "match limit exceeded",
719      "callout error code",
720      NULL,  /* BADUTF8/16 is handled specially */
721      NULL,  /* BADUTF8/16 offset is handled specially */
722      NULL,  /* PARTIAL is handled specially */
723      "not used - internal error",
724      "internal error - pattern overwritten?",
725      "bad count value",
726      "item unsupported for DFA matching",
727      "backreference condition or recursion test not supported for DFA matching",
728      "match limit not supported for DFA matching",
729      "workspace size exceeded in DFA matching",
730      "too much recursion for DFA matching",
731      "recursion limit exceeded",
732      "not used - internal error",
733      "invalid combination of newline options",
734      "bad offset value",
735      NULL,  /* SHORTUTF8/16 is handled specially */
736      "nested recursion at the same subject position",
737      "JIT stack limit reached",
738      "pattern compiled in wrong mode: 8-bit/16-bit error",
739      "pattern compiled with other endianness",
740      "invalid data in workspace for DFA restart"
741    };
742    
743    
744    /*************************************************
745    *         Alternate character tables             *
746    *************************************************/
747    
748    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
749    using the default tables of the library. However, the T option can be used to
750    select alternate sets of tables, for different kinds of testing. Note also that
751    the L (locale) option also adjusts the tables. */
752    
753    /* This is the set of tables distributed as default with PCRE. It recognizes
754    only ASCII characters. */
755    
756    static const pcre_uint8 tables0[] = {
757    
758    /* This table is a lower casing table. */
759    
760        0,  1,  2,  3,  4,  5,  6,  7,
761        8,  9, 10, 11, 12, 13, 14, 15,
762       16, 17, 18, 19, 20, 21, 22, 23,
763       24, 25, 26, 27, 28, 29, 30, 31,
764       32, 33, 34, 35, 36, 37, 38, 39,
765       40, 41, 42, 43, 44, 45, 46, 47,
766       48, 49, 50, 51, 52, 53, 54, 55,
767       56, 57, 58, 59, 60, 61, 62, 63,
768       64, 97, 98, 99,100,101,102,103,
769      104,105,106,107,108,109,110,111,
770      112,113,114,115,116,117,118,119,
771      120,121,122, 91, 92, 93, 94, 95,
772       96, 97, 98, 99,100,101,102,103,
773      104,105,106,107,108,109,110,111,
774      112,113,114,115,116,117,118,119,
775      120,121,122,123,124,125,126,127,
776      128,129,130,131,132,133,134,135,
777      136,137,138,139,140,141,142,143,
778      144,145,146,147,148,149,150,151,
779      152,153,154,155,156,157,158,159,
780      160,161,162,163,164,165,166,167,
781      168,169,170,171,172,173,174,175,
782      176,177,178,179,180,181,182,183,
783      184,185,186,187,188,189,190,191,
784      192,193,194,195,196,197,198,199,
785      200,201,202,203,204,205,206,207,
786      208,209,210,211,212,213,214,215,
787      216,217,218,219,220,221,222,223,
788      224,225,226,227,228,229,230,231,
789      232,233,234,235,236,237,238,239,
790      240,241,242,243,244,245,246,247,
791      248,249,250,251,252,253,254,255,
792    
793    /* This table is a case flipping table. */
794    
795        0,  1,  2,  3,  4,  5,  6,  7,
796        8,  9, 10, 11, 12, 13, 14, 15,
797       16, 17, 18, 19, 20, 21, 22, 23,
798       24, 25, 26, 27, 28, 29, 30, 31,
799       32, 33, 34, 35, 36, 37, 38, 39,
800       40, 41, 42, 43, 44, 45, 46, 47,
801       48, 49, 50, 51, 52, 53, 54, 55,
802       56, 57, 58, 59, 60, 61, 62, 63,
803       64, 97, 98, 99,100,101,102,103,
804      104,105,106,107,108,109,110,111,
805      112,113,114,115,116,117,118,119,
806      120,121,122, 91, 92, 93, 94, 95,
807       96, 65, 66, 67, 68, 69, 70, 71,
808       72, 73, 74, 75, 76, 77, 78, 79,
809       80, 81, 82, 83, 84, 85, 86, 87,
810       88, 89, 90,123,124,125,126,127,
811      128,129,130,131,132,133,134,135,
812      136,137,138,139,140,141,142,143,
813      144,145,146,147,148,149,150,151,
814      152,153,154,155,156,157,158,159,
815      160,161,162,163,164,165,166,167,
816      168,169,170,171,172,173,174,175,
817      176,177,178,179,180,181,182,183,
818      184,185,186,187,188,189,190,191,
819      192,193,194,195,196,197,198,199,
820      200,201,202,203,204,205,206,207,
821      208,209,210,211,212,213,214,215,
822      216,217,218,219,220,221,222,223,
823      224,225,226,227,228,229,230,231,
824      232,233,234,235,236,237,238,239,
825      240,241,242,243,244,245,246,247,
826      248,249,250,251,252,253,254,255,
827    
828    /* This table contains bit maps for various character classes. Each map is 32
829    bytes long and the bits run from the least significant end of each byte. The
830    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
831    graph, print, punct, and cntrl. Other classes are built from combinations. */
832    
833      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
834      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
837    
838      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
839      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
840      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
842    
843      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
844      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
847    
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
850      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
852    
853      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
855      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
856      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857    
858      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
859      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
860      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
861      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
862    
863      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
864      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
865      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
866      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
867    
868      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
869      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
870      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
871      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
872    
873      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
874      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
875      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
876      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
877    
878      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
879      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
880      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
881      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
882    
883    /* This table identifies various classes of character by individual bits:
884      0x01   white space character
885      0x02   letter
886      0x04   decimal digit
887      0x08   hexadecimal digit
888      0x10   alphanumeric or '_'
889      0x80   regular expression metacharacter or binary zero
890    */
891    
892      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
893      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
894      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
895      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
896      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
897      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
898      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
899      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
900      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
901      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
902      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
903      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
904      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
905      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
906      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
907      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
908      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
909      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
910      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
911      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
912      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
913      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
914      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
915      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
916      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
917      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
918      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
919      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
920      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
921      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
922      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
923      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
924    
925    /* This is a set of tables that came orginally from a Windows user. It seems to
926    be at least an approximation of ISO 8859. In particular, there are characters
927    greater than 128 that are marked as spaces, letters, etc. */
928    
929    static const pcre_uint8 tables1[] = {
930    0,1,2,3,4,5,6,7,
931    8,9,10,11,12,13,14,15,
932    16,17,18,19,20,21,22,23,
933    24,25,26,27,28,29,30,31,
934    32,33,34,35,36,37,38,39,
935    40,41,42,43,44,45,46,47,
936    48,49,50,51,52,53,54,55,
937    56,57,58,59,60,61,62,63,
938    64,97,98,99,100,101,102,103,
939    104,105,106,107,108,109,110,111,
940    112,113,114,115,116,117,118,119,
941    120,121,122,91,92,93,94,95,
942    96,97,98,99,100,101,102,103,
943    104,105,106,107,108,109,110,111,
944    112,113,114,115,116,117,118,119,
945    120,121,122,123,124,125,126,127,
946    128,129,130,131,132,133,134,135,
947    136,137,138,139,140,141,142,143,
948    144,145,146,147,148,149,150,151,
949    152,153,154,155,156,157,158,159,
950    160,161,162,163,164,165,166,167,
951    168,169,170,171,172,173,174,175,
952    176,177,178,179,180,181,182,183,
953    184,185,186,187,188,189,190,191,
954    224,225,226,227,228,229,230,231,
955    232,233,234,235,236,237,238,239,
956    240,241,242,243,244,245,246,215,
957    248,249,250,251,252,253,254,223,
958    224,225,226,227,228,229,230,231,
959    232,233,234,235,236,237,238,239,
960    240,241,242,243,244,245,246,247,
961    248,249,250,251,252,253,254,255,
962    0,1,2,3,4,5,6,7,
963    8,9,10,11,12,13,14,15,
964    16,17,18,19,20,21,22,23,
965    24,25,26,27,28,29,30,31,
966    32,33,34,35,36,37,38,39,
967    40,41,42,43,44,45,46,47,
968    48,49,50,51,52,53,54,55,
969    56,57,58,59,60,61,62,63,
970    64,97,98,99,100,101,102,103,
971    104,105,106,107,108,109,110,111,
972    112,113,114,115,116,117,118,119,
973    120,121,122,91,92,93,94,95,
974    96,65,66,67,68,69,70,71,
975    72,73,74,75,76,77,78,79,
976    80,81,82,83,84,85,86,87,
977    88,89,90,123,124,125,126,127,
978    128,129,130,131,132,133,134,135,
979    136,137,138,139,140,141,142,143,
980    144,145,146,147,148,149,150,151,
981    152,153,154,155,156,157,158,159,
982    160,161,162,163,164,165,166,167,
983    168,169,170,171,172,173,174,175,
984    176,177,178,179,180,181,182,183,
985    184,185,186,187,188,189,190,191,
986    224,225,226,227,228,229,230,231,
987    232,233,234,235,236,237,238,239,
988    240,241,242,243,244,245,246,215,
989    248,249,250,251,252,253,254,223,
990    192,193,194,195,196,197,198,199,
991    200,201,202,203,204,205,206,207,
992    208,209,210,211,212,213,214,247,
993    216,217,218,219,220,221,222,255,
994    0,62,0,0,1,0,0,0,
995    0,0,0,0,0,0,0,0,
996    32,0,0,0,1,0,0,0,
997    0,0,0,0,0,0,0,0,
998    0,0,0,0,0,0,255,3,
999    126,0,0,0,126,0,0,0,
1000    0,0,0,0,0,0,0,0,
1001    0,0,0,0,0,0,0,0,
1002    0,0,0,0,0,0,255,3,
1003    0,0,0,0,0,0,0,0,
1004    0,0,0,0,0,0,12,2,
1005    0,0,0,0,0,0,0,0,
1006    0,0,0,0,0,0,0,0,
1007    254,255,255,7,0,0,0,0,
1008    0,0,0,0,0,0,0,0,
1009    255,255,127,127,0,0,0,0,
1010    0,0,0,0,0,0,0,0,
1011    0,0,0,0,254,255,255,7,
1012    0,0,0,0,0,4,32,4,
1013    0,0,0,128,255,255,127,255,
1014    0,0,0,0,0,0,255,3,
1015    254,255,255,135,254,255,255,7,
1016    0,0,0,0,0,4,44,6,
1017    255,255,127,255,255,255,127,255,
1018    0,0,0,0,254,255,255,255,
1019    255,255,255,255,255,255,255,127,
1020    0,0,0,0,254,255,255,255,
1021    255,255,255,255,255,255,255,255,
1022    0,2,0,0,255,255,255,255,
1023    255,255,255,255,255,255,255,127,
1024    0,0,0,0,255,255,255,255,
1025    255,255,255,255,255,255,255,255,
1026    0,0,0,0,254,255,0,252,
1027    1,0,0,248,1,0,0,120,
1028    0,0,0,0,254,255,255,255,
1029    0,0,128,0,0,0,128,0,
1030    255,255,255,255,0,0,0,0,
1031    0,0,0,0,0,0,0,128,
1032    255,255,255,255,0,0,0,0,
1033    0,0,0,0,0,0,0,0,
1034    128,0,0,0,0,0,0,0,
1035    0,1,1,0,1,1,0,0,
1036    0,0,0,0,0,0,0,0,
1037    0,0,0,0,0,0,0,0,
1038    1,0,0,0,128,0,0,0,
1039    128,128,128,128,0,0,128,0,
1040    28,28,28,28,28,28,28,28,
1041    28,28,0,0,0,0,0,128,
1042    0,26,26,26,26,26,26,18,
1043    18,18,18,18,18,18,18,18,
1044    18,18,18,18,18,18,18,18,
1045    18,18,18,128,128,0,128,16,
1046    0,26,26,26,26,26,26,18,
1047    18,18,18,18,18,18,18,18,
1048    18,18,18,18,18,18,18,18,
1049    18,18,18,128,128,0,0,0,
1050    0,0,0,0,0,1,0,0,
1051    0,0,0,0,0,0,0,0,
1052    0,0,0,0,0,0,0,0,
1053    0,0,0,0,0,0,0,0,
1054    1,0,0,0,0,0,0,0,
1055    0,0,18,0,0,0,0,0,
1056    0,0,20,20,0,18,0,0,
1057    0,20,18,0,0,0,0,0,
1058    18,18,18,18,18,18,18,18,
1059    18,18,18,18,18,18,18,18,
1060    18,18,18,18,18,18,18,0,
1061    18,18,18,18,18,18,18,18,
1062    18,18,18,18,18,18,18,18,
1063    18,18,18,18,18,18,18,18,
1064    18,18,18,18,18,18,18,0,
1065    18,18,18,18,18,18,18,18
1066    };
1067    
1068    
1069    
1070    
1071    #ifndef HAVE_STRERROR
1072    /*************************************************
1073    *     Provide strerror() for non-ANSI libraries  *
1074    *************************************************/
1075    
1076    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1077    in their libraries, but can provide the same facility by this simple
1078    alternative function. */
1079    
1080    extern int   sys_nerr;
1081    extern char *sys_errlist[];
1082    
1083    char *
1084    strerror(int n)
1085    {
1086    if (n < 0 || n >= sys_nerr) return "unknown error number";
1087    return sys_errlist[n];
1088    }
1089    #endif /* HAVE_STRERROR */
1090    
1091    
1092    /*************************************************
1093    *         JIT memory callback                    *
1094    *************************************************/
1095    
1096    static pcre_jit_stack* jit_callback(void *arg)
1097    {
1098    jit_was_used = TRUE;
1099    return (pcre_jit_stack *)arg;
1100    }
1101    
1102    
1103    #if !defined NOUTF || defined SUPPORT_PCRE16
1104    /*************************************************
1105    *            Convert UTF-8 string to value       *
1106    *************************************************/
1107    
1108    /* This function takes one or more bytes that represents a UTF-8 character,
1109    and returns the value of the character.
1110    
1111    Argument:
1112      utf8bytes   a pointer to the byte vector
1113      vptr        a pointer to an int to receive the value
1114    
1115    Returns:      >  0 => the number of bytes consumed
1116                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1117    */
1118    
1119    static int
1120    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1121    {
1122    int c = *utf8bytes++;
1123    int d = c;
1124    int i, j, s;
1125    
1126    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1127      {
1128      if ((d & 0x80) == 0) break;
1129      d <<= 1;
1130      }
1131    
1132    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1133    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1134    
1135    /* i now has a value in the range 1-5 */
1136    
1137    s = 6*i;
1138    d = (c & utf8_table3[i]) << s;
1139    
1140    for (j = 0; j < i; j++)
1141      {
1142      c = *utf8bytes++;
1143      if ((c & 0xc0) != 0x80) return -(j+1);
1144      s -= 6;
1145      d |= (c & 0x3f) << s;
1146      }
1147    
1148    /* Check that encoding was the correct unique one */
1149    
1150    for (j = 0; j < utf8_table1_size; j++)
1151      if (d <= utf8_table1[j]) break;
1152    if (j != i) return -(i+1);
1153    
1154    /* Valid value */
1155    
1156    *vptr = d;
1157    return i+1;
1158    }
1159    #endif /* NOUTF || SUPPORT_PCRE16 */
1160    
1161    
1162    
1163    #if !defined NOUTF || defined SUPPORT_PCRE16
1164    /*************************************************
1165    *       Convert character value to UTF-8         *
1166    *************************************************/
1167    
1168    /* This function takes an integer value in the range 0 - 0x7fffffff
1169    and encodes it as a UTF-8 character in 0 to 6 bytes.
1170    
1171    Arguments:
1172      cvalue     the character value
1173      utf8bytes  pointer to buffer for result - at least 6 bytes long
1174    
1175    Returns:     number of characters placed in the buffer
1176    */
1177    
1178    static int
1179    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1180    {
1181    register int i, j;
1182    for (i = 0; i < utf8_table1_size; i++)
1183      if (cvalue <= utf8_table1[i]) break;
1184    utf8bytes += i;
1185    for (j = i; j > 0; j--)
1186     {
1187     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1188     cvalue >>= 6;
1189     }
1190    *utf8bytes = utf8_table2[i] | cvalue;
1191    return i + 1;
1192    }
1193    #endif
1194    
1195    
1196    #ifdef SUPPORT_PCRE16
1197    /*************************************************
1198    *         Convert a string to 16-bit             *
1199    *************************************************/
1200    
1201    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1202    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1203    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1204    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1205    result is always left in buffer16.
1206    
1207    Note that this function does not object to surrogate values. This is
1208    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1209    for the purpose of testing that they are correctly faulted.
1210    
1211    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1212    in UTF-8 so that values greater than 255 can be handled.
1213    
1214    Arguments:
1215      data       TRUE if converting a data line; FALSE for a regex
1216      p          points to a byte string
1217      utf        true if UTF-8 (to be converted to UTF-16)
1218      len        number of bytes in the string (excluding trailing zero)
1219    
1220    Returns:     number of 16-bit data items used (excluding trailing zero)
1221                 OR -1 if a UTF-8 string is malformed
1222                 OR -2 if a value > 0x10ffff is encountered
1223                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1224    */
1225    
1226    static int
1227    to16(int data, pcre_uint8 *p, int utf, int len)
1228    {
1229    pcre_uint16 *pp;
1230    
1231    if (buffer16_size < 2*len + 2)
1232      {
1233      if (buffer16 != NULL) free(buffer16);
1234      buffer16_size = 2*len + 2;
1235      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1236      if (buffer16 == NULL)
1237        {
1238        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1239        exit(1);
1240        }
1241      }
1242    
1243    pp = buffer16;
1244    
1245    if (!utf && !data)
1246      {
1247      while (len-- > 0) *pp++ = *p++;
1248      }
1249    
1250    else
1251      {
1252      int c = 0;
1253      while (len > 0)
1254        {
1255        int chlen = utf82ord(p, &c);
1256        if (chlen <= 0) return -1;
1257        if (c > 0x10ffff) return -2;
1258        p += chlen;
1259        len -= chlen;
1260        if (c < 0x10000) *pp++ = c; else
1261          {
1262          if (!utf) return -3;
1263          c -= 0x10000;
1264          *pp++ = 0xD800 | (c >> 10);
1265          *pp++ = 0xDC00 | (c & 0x3ff);
1266          }
1267        }
1268      }
1269    
1270    *pp = 0;
1271    return pp - buffer16;
1272    }
1273    #endif
1274    
1275    
1276  /*************************************************  /*************************************************
# Line 202  Returns:       pointer to the start of n Line 1296  Returns:       pointer to the start of n
1296                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1297  */  */
1298    
1299  static uschar *  static pcre_uint8 *
1300  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1301  {  {
1302  uschar *here = start;  pcre_uint8 *here = start;
1303    
1304  for (;;)  for (;;)
1305    {    {
1306    int rlen = buffer_size - (here - buffer);    size_t rlen = (size_t)(buffer_size - (here - buffer));
1307    
1308    if (rlen > 1000)    if (rlen > 1000)
1309      {      {
1310      int dlen;      int dlen;
1311    
1312      /* If libreadline support is required, use readline() to read a line if the      /* If libreadline or libedit support is required, use readline() to read a
1313      input is a terminal. Note that readline() removes the trailing newline, so      line if the input is a terminal. Note that readline() removes the trailing
1314      we must put it back again, to be compatible with fgets(). */      newline, so we must put it back again, to be compatible with fgets(). */
1315    
1316  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1317      if (isatty(fileno(f)))      if (isatty(fileno(f)))
1318        {        {
1319        size_t len;        size_t len;
# Line 239  for (;;) Line 1333  for (;;)
1333      /* Read the next line by normal means, prompting if the file is stdin. */      /* Read the next line by normal means, prompting if the file is stdin. */
1334    
1335        {        {
1336        if (f == stdin) printf(prompt);        if (f == stdin) printf("%s", prompt);
1337        if (fgets((char *)here, rlen,  f) == NULL)        if (fgets((char *)here, rlen,  f) == NULL)
1338          return (here == start)? NULL : start;          return (here == start)? NULL : start;
1339        }        }
# Line 252  for (;;) Line 1346  for (;;)
1346    else    else
1347      {      {
1348      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1349      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1350      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1351      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1352    
1353      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1354        {        {
# Line 285  return NULL;  /* Control never gets here Line 1379  return NULL;  /* Control never gets here
1379    
1380    
1381    
   
   
   
   
1382  /*************************************************  /*************************************************
1383  *          Read number from string               *  *          Read number from string               *
1384  *************************************************/  *************************************************/
# Line 305  Returns:        the unsigned long Line 1395  Returns:        the unsigned long
1395  */  */
1396    
1397  static int  static int
1398  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1399  {  {
1400  int result = 0;  int result = 0;
1401  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 316  return(result); Line 1406  return(result);
1406    
1407    
1408    
   
1409  /*************************************************  /*************************************************
1410  *            Convert UTF-8 string to value       *  *             Print one character                *
1411  *************************************************/  *************************************************/
1412    
1413  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
1414    
1415  Argument:  static int pchar(int c, FILE *f)
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
   
 Returns:      >  0 => the number of bytes consumed  
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1416  {  {
1417  int c = *utf8bytes++;  if (PRINTOK(c))
 int d = c;  
 int i, j, s;  
   
 for (i = -1; i < 6; i++)               /* i is number of additional bytes */  
1418    {    {
1419    if ((d & 0x80) == 0) break;    if (f != NULL) fprintf(f, "%c", c);
1420    d <<= 1;    return 1;
1421    }    }
1422    
1423  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (c < 0x100)
 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  
   
 /* i now has a value in the range 1-5 */  
   
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
   
 for (j = 0; j < i; j++)  
1424    {    {
1425    c = *utf8bytes++;    if (use_utf)
1426    if ((c & 0xc0) != 0x80) return -(j+1);      {
1427    s -= 6;      if (f != NULL) fprintf(f, "\\x{%02x}", c);
1428    d |= (c & 0x3f) << s;      return 6;
1429        }
1430      else
1431        {
1432        if (f != NULL) fprintf(f, "\\x%02x", c);
1433        return 4;
1434        }
1435    }    }
1436    
1437  /* Check that encoding was the correct unique one */  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1438    return (c <= 0x000000ff)? 6 :
1439  for (j = 0; j < utf8_table1_size; j++)         (c <= 0x00000fff)? 7 :
1440    if (d <= utf8_table1[j]) break;         (c <= 0x0000ffff)? 8 :
1441  if (j != i) return -(i+1);         (c <= 0x000fffff)? 9 : 10;
   
 /* Valid value */  
   
 *vptr = d;  
 return i+1;  
 }  
   
 #endif  
   
   
   
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 ord2utf8(int cvalue, uschar *utf8bytes)  
 {  
 register int i, j;  
 for (i = 0; i < utf8_table1_size; i++)  
   if (cvalue <= utf8_table1[i]) break;  
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1442  }  }
1443    
 #endif  
   
1444    
1445    
1446    #ifdef SUPPORT_PCRE8
1447  /*************************************************  /*************************************************
1448  *             Print character string             *  *         Print 8-bit character string           *
1449  *************************************************/  *************************************************/
1450    
1451  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1452  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1453    
1454  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1455  {  {
1456  int c = 0;  int c = 0;
1457  int yield = 0;  int yield = 0;
1458    
1459    if (length < 0)
1460      length = strlen((char *)p);
1461    
1462  while (length-- > 0)  while (length-- > 0)
1463    {    {
1464  #if !defined NOUTF8  #if !defined NOUTF
1465    if (use_utf8)    if (use_utf)
1466      {      {
1467      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
   
1468      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1469        {        {
1470        length -= rc - 1;        length -= rc - 1;
1471        p += rc;        p += rc;
1472        if (PRINTHEX(c))        yield += pchar(c, f);
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
1473        continue;        continue;
1474        }        }
1475      }      }
1476  #endif  #endif
1477      c = *p++;
1478      yield += pchar(c, f);
1479      }
1480    
1481    return yield;
1482    }
1483    #endif
1484    
    /* Not UTF-8, or malformed UTF-8  */  
1485    
1486    c = *p++;  
1487    if (PRINTHEX(c))  #ifdef SUPPORT_PCRE16
1488      {  /*************************************************
1489      if (f != NULL) fprintf(f, "%c", c);  *    Find length of 0-terminated 16-bit string   *
1490      yield++;  *************************************************/
1491      }  
1492    else  static int strlen16(PCRE_SPTR16 p)
1493    {
1494    int len = 0;
1495    while (*p++ != 0) len++;
1496    return len;
1497    }
1498    #endif  /* SUPPORT_PCRE16 */
1499    
1500    
1501    #ifdef SUPPORT_PCRE16
1502    /*************************************************
1503    *           Print 16-bit character string        *
1504    *************************************************/
1505    
1506    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1507    If handed a NULL file, just counts chars without printing. */
1508    
1509    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1510    {
1511    int yield = 0;
1512    
1513    if (length < 0)
1514      length = strlen16(p);
1515    
1516    while (length-- > 0)
1517      {
1518      int c = *p++ & 0xffff;
1519    #if !defined NOUTF
1520      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1521      {      {
1522      if (f != NULL) fprintf(f, "\\x%02x", c);      int d = *p & 0xffff;
1523      yield += 4;      if (d >= 0xDC00 && d < 0xDFFF)
1524          {
1525          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1526          length--;
1527          p++;
1528          }
1529      }      }
1530    #endif
1531      yield += pchar(c, f);
1532    }    }
1533    
1534  return yield;  return yield;
1535  }  }
1536    #endif  /* SUPPORT_PCRE16 */
1537    
1538    
1539    
1540    #ifdef SUPPORT_PCRE8
1541    /*************************************************
1542    *     Read a capture name (8-bit) and check it   *
1543    *************************************************/
1544    
1545    static pcre_uint8 *
1546    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1547    {
1548    pcre_uint8 *npp = *pp;
1549    while (isalnum(*p)) *npp++ = *p++;
1550    *npp++ = 0;
1551    *npp = 0;
1552    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1553      {
1554      fprintf(outfile, "no parentheses with name \"");
1555      PCHARSV(*pp, 0, -1, outfile);
1556      fprintf(outfile, "\"\n");
1557      }
1558    
1559    *pp = npp;
1560    return p;
1561    }
1562    #endif  /* SUPPORT_PCRE8 */
1563    
1564    
1565    
1566    #ifdef SUPPORT_PCRE16
1567    /*************************************************
1568    *     Read a capture name (16-bit) and check it  *
1569    *************************************************/
1570    
1571    /* Note that the text being read is 8-bit. */
1572    
1573    static pcre_uint8 *
1574    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1575    {
1576    pcre_uint16 *npp = *pp;
1577    while (isalnum(*p)) *npp++ = *p++;
1578    *npp++ = 0;
1579    *npp = 0;
1580    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1581      {
1582      fprintf(outfile, "no parentheses with name \"");
1583      PCHARSV(*pp, 0, -1, outfile);
1584      fprintf(outfile, "\"\n");
1585      }
1586    *pp = npp;
1587    return p;
1588    }
1589    #endif  /* SUPPORT_PCRE16 */
1590    
1591    
1592    
# Line 503  if (callout_extra) Line 1615  if (callout_extra)
1615      else      else
1616        {        {
1617        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1618        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1619          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1620        fprintf(f, "\n");        fprintf(f, "\n");
1621        }        }
# Line 516  printed lengths of the substrings. */ Line 1628  printed lengths of the substrings. */
1628    
1629  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1630    
1631  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1632  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1633    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1634    
1635  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1636    
1637  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1638    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1639    
1640  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 559  fprintf(outfile, "%.*s", (cb->next_item_ Line 1671  fprintf(outfile, "%.*s", (cb->next_item_
1671  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1672  first_callout = 0;  first_callout = 0;
1673    
1674    if (cb->mark != last_callout_mark)
1675      {
1676      if (cb->mark == NULL)
1677        fprintf(outfile, "Latest Mark: <unset>\n");
1678      else
1679        {
1680        fprintf(outfile, "Latest Mark: ");
1681        PCHARSV(cb->mark, 0, -1, outfile);
1682        putc('\n', outfile);
1683        }
1684      last_callout_mark = cb->mark;
1685      }
1686    
1687  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1688    {    {
1689    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 578  return (cb->callout_number != callout_fa Line 1703  return (cb->callout_number != callout_fa
1703  *            Local malloc functions              *  *            Local malloc functions              *
1704  *************************************************/  *************************************************/
1705    
1706  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1707  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1708    show_malloc variable is set only during matching. */
1709    
1710  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1711  {  {
1712  void *block = malloc(size);  void *block = malloc(size);
1713  gotten_store = size;  gotten_store = size;
1714    if (first_gotten_store == 0) first_gotten_store = size;
1715  if (show_malloc)  if (show_malloc)
1716    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1717  return block;  return block;
# Line 597  if (show_malloc) Line 1724  if (show_malloc)
1724  free(block);  free(block);
1725  }  }
1726    
   
1727  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1728    
1729  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 620  free(block); Line 1746  free(block);
1746  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1747  *************************************************/  *************************************************/
1748    
1749  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1750    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1751    value, but the code is defensive.
1752    
1753    Arguments:
1754      re        compiled regex
1755      study     study data
1756      option    PCRE_INFO_xxx option
1757      ptr       where to put the data
1758    
1759    Returns:    0 when OK, < 0 on error
1760    */
1761    
1762  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static int
1763    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1764  {  {
1765  int rc;  int rc;
1766  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1767    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1768    #ifdef SUPPORT_PCRE16
1769      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1770    #else
1771      rc = PCRE_ERROR_BADMODE;
1772    #endif
1773    else
1774    #ifdef SUPPORT_PCRE8
1775      rc = pcre_fullinfo(re, study, option, ptr);
1776    #else
1777      rc = PCRE_ERROR_BADMODE;
1778    #endif
1779    
1780    if (rc < 0)
1781      {
1782      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1783        use_pcre16? "16" : "", option);
1784      if (rc == PCRE_ERROR_BADMODE)
1785        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1786          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1787      }
1788    
1789    return rc;
1790  }  }
1791    
1792    
1793    
1794  /*************************************************  /*************************************************
1795  *         Byte flipping function                 *  *             Swap byte functions                *
1796  *************************************************/  *************************************************/
1797    
1798  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1799  byteflip(unsigned long int value, int n)  value, respectively.
1800    
1801    Arguments:
1802      value        any number
1803    
1804    Returns:       the byte swapped value
1805    */
1806    
1807    static pcre_uint32
1808    swap_uint32(pcre_uint32 value)
1809  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1810  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1811         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1812         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1813         ((value & 0xff000000) >> 24);         (value >> 24);
1814    }
1815    
1816    static pcre_uint16
1817    swap_uint16(pcre_uint16 value)
1818    {
1819    return (value >> 8) | (value << 8);
1820  }  }
1821    
1822    
1823    
1824    /*************************************************
1825    *        Flip bytes in a compiled pattern        *
1826    *************************************************/
1827    
1828    /* This function is called if the 'F' option was present on a pattern that is
1829    to be written to a file. We flip the bytes of all the integer fields in the
1830    regex data block and the study block. In 16-bit mode this also flips relevant
1831    bytes in the pattern itself. This is to make it possible to test PCRE's
1832    ability to reload byte-flipped patterns, e.g. those compiled on a different
1833    architecture. */
1834    
1835    static void
1836    regexflip(pcre *ere, pcre_extra *extra)
1837    {
1838    REAL_PCRE *re = (REAL_PCRE *)ere;
1839    #ifdef SUPPORT_PCRE16
1840    int op;
1841    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1842    int length = re->name_count * re->name_entry_size;
1843    #ifdef SUPPORT_UTF
1844    BOOL utf = (re->options & PCRE_UTF16) != 0;
1845    BOOL utf16_char = FALSE;
1846    #endif /* SUPPORT_UTF */
1847    #endif /* SUPPORT_PCRE16 */
1848    
1849    /* Always flip the bytes in the main data block and study blocks. */
1850    
1851    re->magic_number = REVERSED_MAGIC_NUMBER;
1852    re->size = swap_uint32(re->size);
1853    re->options = swap_uint32(re->options);
1854    re->flags = swap_uint16(re->flags);
1855    re->top_bracket = swap_uint16(re->top_bracket);
1856    re->top_backref = swap_uint16(re->top_backref);
1857    re->first_char = swap_uint16(re->first_char);
1858    re->req_char = swap_uint16(re->req_char);
1859    re->name_table_offset = swap_uint16(re->name_table_offset);
1860    re->name_entry_size = swap_uint16(re->name_entry_size);
1861    re->name_count = swap_uint16(re->name_count);
1862    
1863    if (extra != NULL)
1864      {
1865      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1866      rsd->size = swap_uint32(rsd->size);
1867      rsd->flags = swap_uint32(rsd->flags);
1868      rsd->minlength = swap_uint32(rsd->minlength);
1869      }
1870    
1871    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1872    in the name table, if present, and then in the pattern itself. */
1873    
1874    #ifdef SUPPORT_PCRE16
1875    if (!use_pcre16) return;
1876    
1877    while(TRUE)
1878      {
1879      /* Swap previous characters. */
1880      while (length-- > 0)
1881        {
1882        *ptr = swap_uint16(*ptr);
1883        ptr++;
1884        }
1885    #ifdef SUPPORT_UTF
1886      if (utf16_char)
1887        {
1888        if ((ptr[-1] & 0xfc00) == 0xd800)
1889          {
1890          /* We know that there is only one extra character in UTF-16. */
1891          *ptr = swap_uint16(*ptr);
1892          ptr++;
1893          }
1894        }
1895      utf16_char = FALSE;
1896    #endif /* SUPPORT_UTF */
1897    
1898      /* Get next opcode. */
1899    
1900      length = 0;
1901      op = *ptr;
1902      *ptr++ = swap_uint16(op);
1903    
1904      switch (op)
1905        {
1906        case OP_END:
1907        return;
1908    
1909    #ifdef SUPPORT_UTF
1910        case OP_CHAR:
1911        case OP_CHARI:
1912        case OP_NOT:
1913        case OP_NOTI:
1914        case OP_STAR:
1915        case OP_MINSTAR:
1916        case OP_PLUS:
1917        case OP_MINPLUS:
1918        case OP_QUERY:
1919        case OP_MINQUERY:
1920        case OP_UPTO:
1921        case OP_MINUPTO:
1922        case OP_EXACT:
1923        case OP_POSSTAR:
1924        case OP_POSPLUS:
1925        case OP_POSQUERY:
1926        case OP_POSUPTO:
1927        case OP_STARI:
1928        case OP_MINSTARI:
1929        case OP_PLUSI:
1930        case OP_MINPLUSI:
1931        case OP_QUERYI:
1932        case OP_MINQUERYI:
1933        case OP_UPTOI:
1934        case OP_MINUPTOI:
1935        case OP_EXACTI:
1936        case OP_POSSTARI:
1937        case OP_POSPLUSI:
1938        case OP_POSQUERYI:
1939        case OP_POSUPTOI:
1940        case OP_NOTSTAR:
1941        case OP_NOTMINSTAR:
1942        case OP_NOTPLUS:
1943        case OP_NOTMINPLUS:
1944        case OP_NOTQUERY:
1945        case OP_NOTMINQUERY:
1946        case OP_NOTUPTO:
1947        case OP_NOTMINUPTO:
1948        case OP_NOTEXACT:
1949        case OP_NOTPOSSTAR:
1950        case OP_NOTPOSPLUS:
1951        case OP_NOTPOSQUERY:
1952        case OP_NOTPOSUPTO:
1953        case OP_NOTSTARI:
1954        case OP_NOTMINSTARI:
1955        case OP_NOTPLUSI:
1956        case OP_NOTMINPLUSI:
1957        case OP_NOTQUERYI:
1958        case OP_NOTMINQUERYI:
1959        case OP_NOTUPTOI:
1960        case OP_NOTMINUPTOI:
1961        case OP_NOTEXACTI:
1962        case OP_NOTPOSSTARI:
1963        case OP_NOTPOSPLUSI:
1964        case OP_NOTPOSQUERYI:
1965        case OP_NOTPOSUPTOI:
1966        if (utf) utf16_char = TRUE;
1967    #endif
1968        /* Fall through. */
1969    
1970        default:
1971        length = OP_lengths16[op] - 1;
1972        break;
1973    
1974        case OP_CLASS:
1975        case OP_NCLASS:
1976        /* Skip the character bit map. */
1977        ptr += 32/sizeof(pcre_uint16);
1978        length = 0;
1979        break;
1980    
1981        case OP_XCLASS:
1982        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1983        if (LINK_SIZE > 1)
1984          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1985            - (1 + LINK_SIZE + 1));
1986        else
1987          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1988    
1989        /* Reverse the size of the XCLASS instance. */
1990        *ptr = swap_uint16(*ptr);
1991        ptr++;
1992        if (LINK_SIZE > 1)
1993          {
1994          *ptr = swap_uint16(*ptr);
1995          ptr++;
1996          }
1997    
1998        op = *ptr;
1999        *ptr = swap_uint16(op);
2000        ptr++;
2001        if ((op & XCL_MAP) != 0)
2002          {
2003          /* Skip the character bit map. */
2004          ptr += 32/sizeof(pcre_uint16);
2005          length -= 32/sizeof(pcre_uint16);
2006          }
2007        break;
2008        }
2009      }
2010    /* Control should never reach here in 16 bit mode. */
2011    #endif /* SUPPORT_PCRE16 */
2012    }
2013    
2014    
2015    
2016  /*************************************************  /*************************************************
2017  *        Check match or recursion limit          *  *        Check match or recursion limit          *
2018  *************************************************/  *************************************************/
2019    
2020  static int  static int
2021  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2022    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
2023    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
2024  {  {
# Line 668  for (;;) Line 2033  for (;;)
2033    {    {
2034    *limit = mid;    *limit = mid;
2035    
2036    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2037      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2038    
2039    if (count == errnumber)    if (count == errnumber)
# Line 713  Returns:    < 0, = 0, or > 0, according Line 2078  Returns:    < 0, = 0, or > 0, according
2078  */  */
2079    
2080  static int  static int
2081  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2082  {  {
2083  while (n--)  while (n--)
2084    {    {
# Line 729  return 0; Line 2094  return 0;
2094  *         Check newline indicator                *  *         Check newline indicator                *
2095  *************************************************/  *************************************************/
2096    
2097  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
2098  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
2099    
2100  Arguments:  Arguments:
2101    p           points after the leading '<'    p           points after the leading '<'
# Line 741  Returns:      appropriate PCRE_NEWLINE_x Line 2105  Returns:      appropriate PCRE_NEWLINE_x
2105  */  */
2106    
2107  static int  static int
2108  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2109  {  {
2110  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2111  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2112  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2113  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2114  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2115  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2116  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2117  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2118  return 0;  return 0;
2119  }  }
# Line 765  usage(void) Line 2129  usage(void)
2129  {  {
2130  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2131  printf("Input and output default to stdin and stdout.\n");  printf("Input and output default to stdin and stdout.\n");
2132  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2133  printf("If input is a terminal, readline() is used to read from it.\n");  printf("If input is a terminal, readline() is used to read from it.\n");
2134  #else  #else
2135  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2136  #endif  #endif
2137  printf("\nOptions:\n");  printf("\nOptions:\n");
2138  printf("  -b       show compiled code (bytecode)\n");  #ifdef SUPPORT_PCRE16
2139    printf("  -16      use the 16-bit library\n");
2140    #endif
2141    printf("  -b       show compiled code\n");
2142  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2143    printf("  -C arg   show a specific compile-time option\n");
2144    printf("           and exit with its value. The arg can be:\n");
2145    printf("     linksize     internal link size [2, 3, 4]\n");
2146    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2147    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2148    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2149    printf("     ucp          Unicode Properties supported [0, 1]\n");
2150    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2151    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2152  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2153  #if !defined NODFA  #if !defined NODFA
2154  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
2155  #endif  #endif
2156  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
2157  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
2158           "  -M       find MATCH_LIMIT minimum for each subject\n"
2159         "  -m       output memory used information\n"         "  -m       output memory used information\n"
2160         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
2161  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 786  printf("  -p       use POSIX interface\n Line 2163  printf("  -p       use POSIX interface\n
2163  #endif  #endif
2164  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
2165  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2166  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
2167           "  -s+      force each pattern to be studied, using JIT if available\n"
2168           "  -s++     ditto, verifying when JIT was actually used\n"
2169           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2170           "             where 1 <= n <= 7 selects JIT options\n"
2171           "  -s++n    ditto, verifying when JIT was actually used\n"
2172         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2173  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2174  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 806  options, followed by a set of test data, Line 2188  options, followed by a set of test data,
2188  int main(int argc, char **argv)  int main(int argc, char **argv)
2189  {  {
2190  FILE *infile = stdin;  FILE *infile = stdin;
2191    const char *version;
2192  int options = 0;  int options = 0;
2193  int study_options = 0;  int study_options = 0;
2194    int default_find_match_limit = FALSE;
2195  int op = 1;  int op = 1;
2196  int timeit = 0;  int timeit = 0;
2197  int timeitm = 0;  int timeitm = 0;
2198  int showinfo = 0;  int showinfo = 0;
2199  int showstore = 0;  int showstore = 0;
2200    int force_study = -1;
2201    int force_study_options = 0;
2202  int quiet = 0;  int quiet = 0;
2203  int size_offsets = 45;  int size_offsets = 45;
2204  int size_offsets_max;  int size_offsets_max;
2205  int *offsets = NULL;  int *offsets = NULL;
 #if !defined NOPOSIX  
 int posix = 0;  
 #endif  
2206  int debug = 0;  int debug = 0;
2207  int done = 0;  int done = 0;
2208  int all_use_dfa = 0;  int all_use_dfa = 0;
2209    int verify_jit = 0;
2210  int yield = 0;  int yield = 0;
2211  int stack_size;  int stack_size;
2212    
2213  /* These vectors store, end-to-end, a list of captured substring names. Assume  #if !defined NOPOSIX
2214  that 1024 is plenty long enough for the few names we'll be testing. */  int posix = 0;
2215    #endif
2216    #if !defined NODFA
2217    int *dfa_workspace = NULL;
2218    #endif
2219    
2220  uschar copynames[1024];  pcre_jit_stack *jit_stack = NULL;
 uschar getnames[1024];  
2221    
2222  uschar *copynamesptr;  /* These vectors store, end-to-end, a list of zero-terminated captured
2223  uschar *getnamesptr;  substring names, each list itself being terminated by an empty name. Assume
2224    that 1024 is plenty long enough for the few names we'll be testing. It is
2225    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2226    for the actual memory, to ensure alignment. */
2227    
2228    pcre_uint16 copynames[1024];
2229    pcre_uint16 getnames[1024];
2230    
2231    #ifdef SUPPORT_PCRE16
2232    pcre_uint16 *cn16ptr;
2233    pcre_uint16 *gn16ptr;
2234    #endif
2235    
2236  /* Get buffers from malloc() so that Electric Fence will check their misuse  #ifdef SUPPORT_PCRE8
2237  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2238    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2239    pcre_uint8 *cn8ptr;
2240    pcre_uint8 *gn8ptr;
2241    #endif
2242    
2243  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2244  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
2245  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
2246    
2247    buffer = (pcre_uint8 *)malloc(buffer_size);
2248    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2249    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2250    
2251  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2252    
# Line 855  it set 0x8000, but then I was advised th Line 2261  it set 0x8000, but then I was advised th
2261  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2262  #endif  #endif
2263    
2264    /* Get the version number: both pcre_version() and pcre16_version() give the
2265    same answer. We just need to ensure that we call one that is available. */
2266    
2267    #ifdef SUPPORT_PCRE8
2268    version = pcre_version();
2269    #else
2270    version = pcre16_version();
2271    #endif
2272    
2273  /* Scan options */  /* Scan options */
2274    
2275  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2276    {    {
2277    unsigned char *endptr;    pcre_uint8 *endptr;
2278      char *arg = argv[op];
2279    
2280      if (strcmp(arg, "-m") == 0) showstore = 1;
2281      else if (strcmp(arg, "-s") == 0) force_study = 0;
2282    
2283    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    else if (strncmp(arg, "-s+", 3) == 0)
2284      showstore = 1;      {
2285    else if (strcmp(argv[op], "-q") == 0) quiet = 1;      arg += 3;
2286    else if (strcmp(argv[op], "-b") == 0) debug = 1;      if (*arg == '+') { arg++; verify_jit = TRUE; }
2287    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;      force_study = 1;
2288    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;      if (*arg == 0)
2289          force_study_options = jit_study_bits[6];
2290        else if (*arg >= '1' && *arg <= '7')
2291          force_study_options = jit_study_bits[*arg - '1'];
2292        else goto BAD_ARG;
2293        }
2294      else if (strcmp(arg, "-16") == 0)
2295        {
2296    #ifdef SUPPORT_PCRE16
2297        use_pcre16 = 1;
2298    #else
2299        printf("** This version of PCRE was built without 16-bit support\n");
2300        exit(1);
2301    #endif
2302        }
2303      else if (strcmp(arg, "-q") == 0) quiet = 1;
2304      else if (strcmp(arg, "-b") == 0) debug = 1;
2305      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2306      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2307      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2308  #if !defined NODFA  #if !defined NODFA
2309    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2310  #endif  #endif
2311    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2312        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2313          *endptr == 0))          *endptr == 0))
2314      {      {
2315      op++;      op++;
2316      argc--;      argc--;
2317      }      }
2318    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2319      {      {
2320      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2321      int temp;      int temp;
2322      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2323                       *endptr == 0))                       *endptr == 0))
2324        {        {
2325        timeitm = temp;        timeitm = temp;
# Line 891  while (argc > 1 && argv[op][0] == '-') Line 2329  while (argc > 1 && argv[op][0] == '-')
2329      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2330      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2331      }      }
2332    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2333        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2334          *endptr == 0))          *endptr == 0))
2335      {      {
2336  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2337      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2338      exit(1);      exit(1);
2339  #else  #else
# Line 914  while (argc > 1 && argv[op][0] == '-') Line 2352  while (argc > 1 && argv[op][0] == '-')
2352  #endif  #endif
2353      }      }
2354  #if !defined NOPOSIX  #if !defined NOPOSIX
2355    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2356  #endif  #endif
2357    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2358      {      {
2359      int rc;      int rc;
2360      printf("PCRE version %s\n", pcre_version());      unsigned long int lrc;
2361    
2362        if (argc > 2)
2363          {
2364          if (strcmp(argv[op + 1], "linksize") == 0)
2365            {
2366            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2367            printf("%d\n", rc);
2368            yield = rc;
2369            goto EXIT;
2370            }
2371          if (strcmp(argv[op + 1], "pcre8") == 0)
2372            {
2373    #ifdef SUPPORT_PCRE8
2374            printf("1\n");
2375            yield = 1;
2376    #else
2377            printf("0\n");
2378            yield = 0;
2379    #endif
2380            goto EXIT;
2381            }
2382          if (strcmp(argv[op + 1], "pcre16") == 0)
2383            {
2384    #ifdef SUPPORT_PCRE16
2385            printf("1\n");
2386            yield = 1;
2387    #else
2388            printf("0\n");
2389            yield = 0;
2390    #endif
2391            goto EXIT;
2392            }
2393          if (strcmp(argv[op + 1], "utf") == 0)
2394            {
2395    #ifdef SUPPORT_PCRE8
2396            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2397            printf("%d\n", rc);
2398            yield = rc;
2399    #else
2400            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2401            printf("%d\n", rc);
2402            yield = rc;
2403    #endif
2404            goto EXIT;
2405            }
2406          if (strcmp(argv[op + 1], "ucp") == 0)
2407            {
2408            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2409            printf("%d\n", rc);
2410            yield = rc;
2411            goto EXIT;
2412            }
2413          if (strcmp(argv[op + 1], "jit") == 0)
2414            {
2415            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2416            printf("%d\n", rc);
2417            yield = rc;
2418            goto EXIT;
2419            }
2420          if (strcmp(argv[op + 1], "newline") == 0)
2421            {
2422            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2423            /* Note that these values are always the ASCII values, even
2424            in EBCDIC environments. CR is 13 and NL is 10. */
2425            printf("%s\n", (rc == 13)? "CR" :
2426              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2427              (rc == -2)? "ANYCRLF" :
2428              (rc == -1)? "ANY" : "???");
2429            goto EXIT;
2430            }
2431          printf("Unknown -C option: %s\n", argv[op + 1]);
2432          goto EXIT;
2433          }
2434    
2435        printf("PCRE version %s\n", version);
2436      printf("Compiled with\n");      printf("Compiled with\n");
2437    
2438    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2439    are set, either both UTFs are supported or both are not supported. */
2440    
2441    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2442        printf("  8-bit and 16-bit support\n");
2443        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2444        if (rc)
2445          printf("  UTF-8 and UTF-16 support\n");
2446        else
2447          printf("  No UTF-8 or UTF-16 support\n");
2448    #elif defined SUPPORT_PCRE8
2449        printf("  8-bit support only\n");
2450      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2451      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2452      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2453        printf("  16-bit support only\n");
2454        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2455        printf("  %sUTF-16 support\n", rc? "" : "No ");
2456    #endif
2457    
2458        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2459      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2460      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2461      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      if (rc)
2462        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        {
2463          const char *arch;
2464          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2465          printf("  Just-in-time compiler support: %s\n", arch);
2466          }
2467        else
2468          printf("  No just-in-time compiler support\n");
2469        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2470        /* Note that these values are always the ASCII values, even
2471        in EBCDIC environments. CR is 13 and NL is 10. */
2472        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2473          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2474        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
2475        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2476      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2477      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2478                                       "all Unicode newlines");                                       "all Unicode newlines");
2479      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2480      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2481      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2482      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2483      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2484      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
2485      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2486      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
2487      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2488      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
2489        if (showstore)
2490          {
2491          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2492          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2493          }
2494        printf("\n");
2495      goto EXIT;      goto EXIT;
2496      }      }
2497    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
2498             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
2499      {      {
2500      usage();      usage();
2501      goto EXIT;      goto EXIT;
2502      }      }
2503    else    else
2504      {      {
2505      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
2506        printf("** Unknown or malformed option %s\n", arg);
2507      usage();      usage();
2508      yield = 1;      yield = 1;
2509      goto EXIT;      goto EXIT;
# Line 1000  if (argc > 2) Line 2550  if (argc > 2)
2550    
2551  /* Set alternative malloc function */  /* Set alternative malloc function */
2552    
2553    #ifdef SUPPORT_PCRE8
2554  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2555  pcre_free = new_free;  pcre_free = new_free;
2556  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2557  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2558    #endif
2559    
2560    #ifdef SUPPORT_PCRE16
2561    pcre16_malloc = new_malloc;
2562    pcre16_free = new_free;
2563    pcre16_stack_malloc = stack_malloc;
2564    pcre16_stack_free = stack_free;
2565    #endif
2566    
2567  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2568    
2569  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2570    
2571  /* Main loop */  /* Main loop */
2572    
# Line 1022  while (!done) Line 2581  while (!done)
2581  #endif  #endif
2582    
2583    const char *error;    const char *error;
2584    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
2585    unsigned char *to_file = NULL;    pcre_uint8 *p, *pp, *ppp;
2586    const unsigned char *tables = NULL;    pcre_uint8 *to_file = NULL;
2587      const pcre_uint8 *tables = NULL;
2588      unsigned long int get_options;
2589    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2590    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2591      int do_allcaps = 0;
2592      int do_mark = 0;
2593    int do_study = 0;    int do_study = 0;
2594      int no_force_study = 0;
2595    int do_debug = debug;    int do_debug = debug;
2596    int do_G = 0;    int do_G = 0;
2597    int do_g = 0;    int do_g = 0;
2598    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2599    int do_showrest = 0;    int do_showrest = 0;
2600      int do_showcaprest = 0;
2601    int do_flip = 0;    int do_flip = 0;
2602    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2603    
2604    use_utf8 = 0;  #if !defined NODFA
2605      int dfa_matched = 0;
2606    #endif
2607    
2608      use_utf = 0;
2609    debug_lengths = 1;    debug_lengths = 1;
2610    
2611    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1051  while (!done) Line 2620  while (!done)
2620    
2621    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2622      {      {
2623      unsigned long int magic, get_options;      pcre_uint32 magic;
2624      uschar sbuf[8];      pcre_uint8 sbuf[8];
2625      FILE *f;      FILE *f;
2626    
2627      p++;      p++;
2628        if (*p == '!')
2629          {
2630          do_debug = TRUE;
2631          do_showinfo = TRUE;
2632          p++;
2633          }
2634    
2635      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2636      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2637      *pp = 0;      *pp = 0;
# Line 1067  while (!done) Line 2643  while (!done)
2643        continue;        continue;
2644        }        }
2645    
2646        first_gotten_store = 0;
2647      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2648    
2649      true_size =      true_size =
# Line 1074  while (!done) Line 2651  while (!done)
2651      true_study_size =      true_study_size =
2652        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2653    
2654      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
2655      regex_gotten_store = gotten_store;      if (re == NULL)
2656          {
2657          printf("** Failed to get %d bytes of memory for pcre object\n",
2658            (int)true_size);
2659          yield = 1;
2660          goto EXIT;
2661          }
2662        regex_gotten_store = first_gotten_store;
2663    
2664      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2665    
2666      magic = ((real_pcre *)re)->magic_number;      magic = ((REAL_PCRE *)re)->magic_number;
2667      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2668        {        {
2669        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2670          {          {
2671          do_flip = 1;          do_flip = 1;
2672          }          }
2673        else        else
2674          {          {
2675          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2676            new_free(re);
2677          fclose(f);          fclose(f);
2678          continue;          continue;
2679          }          }
2680        }        }
2681    
2682      fprintf(outfile, "Compiled regex%s loaded from %s\n",      /* We hide the byte-invert info for little and big endian tests. */
2683        do_flip? " (byte-inverted)" : "", p);      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2684          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2685    
2686      /* Need to know if UTF-8 for printing data strings */      /* Now see if there is any following study data. */
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
   
     /* Now see if there is any following study data */  
2687    
2688      if (true_study_size != 0)      if (true_study_size != 0)
2689        {        {
# Line 1118  while (!done) Line 2699  while (!done)
2699          {          {
2700          FAIL_READ:          FAIL_READ:
2701          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2702          if (extra != NULL) new_free(extra);          if (extra != NULL)
2703          if (re != NULL) new_free(re);            {
2704              PCRE_FREE_STUDY(extra);
2705              }
2706            new_free(re);
2707          fclose(f);          fclose(f);
2708          continue;          continue;
2709          }          }
# Line 1128  while (!done) Line 2712  while (!done)
2712        }        }
2713      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2714    
2715        /* Flip the necessary bytes. */
2716        if (do_flip)
2717          {
2718          int rc;
2719          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2720          if (rc == PCRE_ERROR_BADMODE)
2721            {
2722            /* Simulate the result of the function call below. */
2723            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2724              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2725            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2726              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2727            new_free(re);
2728            fclose(f);
2729            continue;
2730            }
2731          }
2732    
2733        /* Need to know if UTF-8 for printing data strings. */
2734    
2735        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2736          {
2737          new_free(re);
2738          fclose(f);
2739          continue;
2740          }
2741        use_utf = (get_options & PCRE_UTF8) != 0;
2742    
2743      fclose(f);      fclose(f);
2744      goto SHOW_INFO;      goto SHOW_INFO;
2745      }      }
2746    
2747    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2748    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2749    
2750    delimiter = *p++;    delimiter = *p++;
2751    
# Line 1144  while (!done) Line 2756  while (!done)
2756      }      }
2757    
2758    pp = p;    pp = p;
2759    poffset = p - buffer;    poffset = (int)(p - buffer);
2760    
2761    for(;;)    for(;;)
2762      {      {
# Line 1198  while (!done) Line 2810  while (!done)
2810        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2811        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2812    
2813        case '+': do_showrest = 1; break;        case '+':
2814          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2815          break;
2816    
2817          case '=': do_allcaps = 1; break;
2818        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2819        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2820        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1208  while (!done) Line 2824  while (!done)
2824        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2825        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2826        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
2827          case 'K': do_mark = 1; break;
2828        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2829        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2830    
# Line 1215  while (!done) Line 2832  while (!done)
2832        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2833  #endif  #endif
2834    
2835        case 'S': do_study = 1; break;        case 'S':
2836          if (do_study == 0)
2837            {
2838            do_study = 1;
2839            if (*pp == '+')
2840              {
2841              if (*(++pp) == '+')
2842                {
2843                verify_jit = TRUE;
2844                pp++;
2845                }
2846              if (*pp >= '1' && *pp <= '7')
2847                study_options |= jit_study_bits[*pp++ - '1'];
2848              else
2849                study_options |= jit_study_bits[6];
2850              }
2851            }
2852          else
2853            {
2854            do_study = 0;
2855            no_force_study = 1;
2856            }
2857          break;
2858    
2859        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2860          case 'W': options |= PCRE_UCP; break;
2861        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2862          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2863        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2864        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2865        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2866    
2867          case 'T':
2868          switch (*pp++)
2869            {
2870            case '0': tables = tables0; break;
2871            case '1': tables = tables1; break;
2872    
2873            case '\r':
2874            case '\n':
2875            case ' ':
2876            case 0:
2877            fprintf(outfile, "** Missing table number after /T\n");
2878            goto SKIP_DATA;
2879    
2880            default:
2881            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2882            goto SKIP_DATA;
2883            }
2884          break;
2885    
2886        case 'L':        case 'L':
2887        ppp = pp;        ppp = pp;
2888        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1234  while (!done) Line 2895  while (!done)
2895          goto SKIP_DATA;          goto SKIP_DATA;
2896          }          }
2897        locale_set = 1;        locale_set = 1;
2898        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2899        pp = ppp;        pp = ppp;
2900        break;        break;
2901    
# Line 1247  while (!done) Line 2908  while (!done)
2908    
2909        case '<':        case '<':
2910          {          {
2911          int x = check_newline(pp, outfile);          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2912          if (x == 0) goto SKIP_DATA;            {
2913          options |= x;            options |= PCRE_JAVASCRIPT_COMPAT;
2914          while (*pp++ != '>');            pp += 3;
2915              }
2916            else
2917              {
2918              int x = check_newline(pp, outfile);
2919              if (x == 0) goto SKIP_DATA;
2920              options |= x;
2921              while (*pp++ != '>');
2922              }
2923          }          }
2924        break;        break;
2925    
# Line 1267  while (!done) Line 2936  while (!done)
2936    
2937    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2938    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2939    local character tables. */    local character tables. Neither does it have 16-bit support. */
2940    
2941  #if !defined NOPOSIX  #if !defined NOPOSIX
2942    if (posix || do_posix)    if (posix || do_posix)
# Line 1280  while (!done) Line 2949  while (!done)
2949      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2950      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2951      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2952        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2953        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2954    
2955        first_gotten_store = 0;
2956      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2957    
2958      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1300  while (!done) Line 2972  while (!done)
2972  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2973    
2974      {      {
2975        /* In 16-bit mode, convert the input. */
2976    
2977    #ifdef SUPPORT_PCRE16
2978        if (use_pcre16)
2979          {
2980          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2981            {
2982            case -1:
2983            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2984              "converted to UTF-16\n");
2985            goto SKIP_DATA;
2986    
2987            case -2:
2988            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2989              "cannot be converted to UTF-16\n");
2990            goto SKIP_DATA;
2991    
2992            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2993            fprintf(outfile, "**Failed: character value greater than 0xffff "
2994              "cannot be converted to 16-bit in non-UTF mode\n");
2995            goto SKIP_DATA;
2996    
2997            default:
2998            break;
2999            }
3000          p = (pcre_uint8 *)buffer16;
3001          }
3002    #endif
3003    
3004        /* Compile many times when timing */
3005    
3006      if (timeit > 0)      if (timeit > 0)
3007        {        {
3008        register int i;        register int i;
# Line 1307  while (!done) Line 3010  while (!done)
3010        clock_t start_time = clock();        clock_t start_time = clock();
3011        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
3012          {          {
3013          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3014          if (re != NULL) free(re);          if (re != NULL) free(re);
3015          }          }
3016        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1316  while (!done) Line 3019  while (!done)
3019            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
3020        }        }
3021    
3022      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
3023        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3024    
3025      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
3026      if non-interactive. */      if non-interactive. */
# Line 1343  while (!done) Line 3047  while (!done)
3047        goto CONTINUE;        goto CONTINUE;
3048        }        }
3049    
3050      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
3051      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
3052      returns only limited data. Check that it agrees with the newer one. */      lines. */
3053    
3054      if (log_store)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3055        fprintf(outfile, "Memory allocation (code space): %d\n",        goto SKIP_DATA;
3056          (int)(gotten_store -      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
3057    
3058      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3059      and remember the store that was got. */      and remember the store that was got. */
3060    
3061      true_size = ((real_pcre *)re)->size;      true_size = ((REAL_PCRE *)re)->size;
3062      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3063    
3064        /* Output code size information if requested */
3065    
3066      /* If /S was present, study the regexp to generate additional info to      if (log_store)
3067      help with the matching. */        fprintf(outfile, "Memory allocation (code space): %d\n",
3068            (int)(first_gotten_store -
3069                  sizeof(REAL_PCRE) -
3070                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3071    
3072        /* If -s or /S was present, study the regex to generate additional info to
3073        help with the matching, unless the pattern has the SS option, which
3074        suppresses the effect of /S (used for a few test patterns where studying is
3075        never sensible). */
3076    
3077      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
3078        {        {
3079        if (timeit > 0)        if (timeit > 0)
3080          {          {
# Line 1370  while (!done) Line 3082  while (!done)
3082          clock_t time_taken;          clock_t time_taken;
3083          clock_t start_time = clock();          clock_t start_time = clock();
3084          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3085            extra = pcre_study(re, study_options, &error);            {
3086              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3087              }
3088          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3089          if (extra != NULL) free(extra);          if (extra != NULL)
3090              {
3091              PCRE_FREE_STUDY(extra);
3092              }
3093          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3094            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3095              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3096          }          }
3097        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3098        if (error != NULL)        if (error != NULL)
3099          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3100        else if (extra != NULL)        else if (extra != NULL)
3101            {
3102          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3103            if (log_store)
3104              {
3105              size_t jitsize;
3106              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3107                  jitsize != 0)
3108                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3109              }
3110            }
3111        }        }
3112    
3113      /* If the 'F' option was present, we flip the bytes of all the integer      /* If /K was present, we set up for handling MARK data. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
3114    
3115      if (do_flip)      if (do_mark)
3116        {        {
3117        real_pcre *rre = (real_pcre *)re;        if (extra == NULL)
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
3118          {          {
3119          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3120          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          extra->flags = 0;
         rsd->options = byteflip(rsd->options, sizeof(rsd->options));  
3121          }          }
3122          extra->mark = &markptr;
3123          extra->flags |= PCRE_EXTRA_MARK;
3124        }        }
3125    
3126      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
3127    
3128      SHOW_INFO:      SHOW_INFO:
3129    
3130      if (do_debug)      if (do_debug)
3131        {        {
3132        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3133        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3134        }        }
3135    
3136        /* We already have the options in get_options (see above) */
3137    
3138      if (do_showinfo)      if (do_showinfo)
3139        {        {
3140        unsigned long int get_options, all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3141        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3142          hascrorlf;          hascrorlf, maxlookbehind;
3143        int nameentrysize, namecount;        int nameentrysize, namecount;
3144        const uschar *nametable;        const pcre_uint8 *nametable;
3145    
3146        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3147        new_info(re, NULL, PCRE_INFO_SIZE, &size);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3148        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3149        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3150        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3151        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3152        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3153        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3154        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3155        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3156        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3157        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3158              != 0)
3159  #if !defined NOINFOCHECK          goto SKIP_DATA;
       old_count = pcre_info(re, &old_options, &old_first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3160    
3161        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3162          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1487  while (!done) Line 3171  while (!done)
3171          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3172          while (namecount-- > 0)          while (namecount-- > 0)
3173            {            {
3174            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3175              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3176              GET2(nametable, 0));  #else
3177              int imm2_size = IMM2_SIZE;
3178    #endif
3179              int length = (int)STRLEN(nametable + imm2_size);
3180              fprintf(outfile, "  ");
3181              PCHARSV(nametable, imm2_size, length, outfile);
3182              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3183    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3184              fprintf(outfile, "%3d\n", use_pcre16?
3185                 (int)(((PCRE_SPTR16)nametable)[0])
3186                :((int)nametable[0] << 8) | (int)nametable[1]);
3187              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3188    #else
3189              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3190    #ifdef SUPPORT_PCRE8
3191            nametable += nameentrysize;            nametable += nameentrysize;
3192    #else
3193              nametable += nameentrysize * 2;
3194    #endif
3195    #endif
3196            }            }
3197          }          }
3198    
3199        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3200        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3201    
3202        all_options = ((real_pcre *)re)->options;        all_options = ((REAL_PCRE *)re)->options;
3203        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3204    
3205        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3206          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3207            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3208            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3209            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1514  while (!done) Line 3216  while (!done)
3216            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3217            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3218            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3219            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3220            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3221              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3222              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3223            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3224    
3225        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1556  while (!done) Line 3260  while (!done)
3260          }          }
3261        else        else
3262          {          {
3263          int ch = first_char & 255;          const char *caseless =
3264          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3265            "" : " (caseless)";            "" : " (caseless)";
3266          if (PRINTHEX(ch))  
3267            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3268              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3269          else          else
3270            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3271              fprintf(outfile, "First char = ");
3272              pchar(first_char, outfile);
3273              fprintf(outfile, "%s\n", caseless);
3274              }
3275          }          }
3276    
3277        if (need_char < 0)        if (need_char < 0)
# Line 1571  while (!done) Line 3280  while (!done)
3280          }          }
3281        else        else
3282          {          {
3283          int ch = need_char & 255;          const char *caseless =
3284          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3285            "" : " (caseless)";            "" : " (caseless)";
3286          if (PRINTHEX(ch))  
3287            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3288              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3289          else          else
3290            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3291              fprintf(outfile, "Need char = ");
3292              pchar(need_char, outfile);
3293              fprintf(outfile, "%s\n", caseless);
3294              }
3295          }          }
3296    
3297          if (maxlookbehind > 0)
3298            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3299    
3300        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3301        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3302        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
3303        flipped.) */        flipped.) If study was forced by an external -s, don't show this
3304          information unless -i or -d was also present. This means that, except
3305          when auto-callouts are involved, the output from runs with and without
3306          -s should be identical. */
3307    
3308        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3309          {          {
3310          if (extra == NULL)          if (extra == NULL)
3311            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3312          else          else
3313            {            {
3314            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3315            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
3316    
3317            if (start_bits == NULL)            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3318              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3319            else  
3320              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3321              {              {
3322              int i;              if (start_bits == NULL)
3323              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3324              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3325                {                {
3326                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3327                  int c = 24;
3328                  fprintf(outfile, "Starting byte set: ");
3329                  for (i = 0; i < 256; i++)
3330                  {                  {
3331                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
3332                    {                    {
3333                    fprintf(outfile, "\n  ");                    if (c > 75)
3334                    c = 2;                      {
3335                    }                      fprintf(outfile, "\n  ");
3336                  if (PRINTHEX(i) && i != ' ')                      c = 2;
3337                    {                      }
3338                    fprintf(outfile, "%c ", i);                    if (PRINTOK(i) && i != ' ')
3339                    c += 2;                      {
3340                    }                      fprintf(outfile, "%c ", i);
3341                  else                      c += 2;
3342                    {                      }
3343                    fprintf(outfile, "\\x%02x ", i);                    else
3344                    c += 5;                      {
3345                        fprintf(outfile, "\\x%02x ", i);
3346                        c += 5;
3347                        }
3348                    }                    }
3349                  }                  }
3350                  fprintf(outfile, "\n");
3351                }                }
3352              fprintf(outfile, "\n");              }
3353              }
3354    
3355            /* Show this only if the JIT was set by /S, not by -s. */
3356    
3357            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3358              {
3359              int jit;
3360              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3361                {
3362                if (jit)
3363                  fprintf(outfile, "JIT study was successful\n");
3364                else
3365    #ifdef SUPPORT_JIT
3366                  fprintf(outfile, "JIT study was not successful\n");
3367    #else
3368                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3369    #endif
3370              }              }
3371            }            }
3372          }          }
# Line 1641  while (!done) Line 3385  while (!done)
3385          }          }
3386        else        else
3387          {          {
3388          uschar sbuf[8];          pcre_uint8 sbuf[8];
3389          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3390          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3391          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3392          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3393            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3394          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3395          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3396          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3397          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3398            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3399    
3400          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3401              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1659  while (!done) Line 3404  while (!done)
3404            }            }
3405          else          else
3406            {            {
3407            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3408    
3409              /* If there is study data, write it. */
3410    
3411            if (extra != NULL)            if (extra != NULL)
3412              {              {
3413              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1669  while (!done) Line 3417  while (!done)
3417                  strerror(errno));                  strerror(errno));
3418                }                }
3419              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
3420              }              }
3421            }            }
3422          fclose(f);          fclose(f);
3423          }          }
3424    
3425        new_free(re);        new_free(re);
3426        if (extra != NULL) new_free(extra);        if (extra != NULL)
3427        if (tables != NULL) new_free((void *)tables);          {
3428            PCRE_FREE_STUDY(extra);
3429            }
3430          if (locale_set)
3431            {
3432            new_free((void *)tables);
3433            setlocale(LC_CTYPE, "C");
3434            locale_set = 0;
3435            }
3436        continue;  /* With next regex */        continue;  /* With next regex */
3437        }        }
3438      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1686  while (!done) Line 3441  while (!done)
3441    
3442    for (;;)    for (;;)
3443      {      {
3444      uschar *q;      pcre_uint8 *q;
3445      uschar *bptr;      pcre_uint8 *bptr;
3446      int *use_offsets = offsets;      int *use_offsets = offsets;
3447      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3448      int callout_data = 0;      int callout_data = 0;
3449      int callout_data_set = 0;      int callout_data_set = 0;
3450      int count, c;      int count, c;
3451      int copystrings = 0;      int copystrings = 0;
3452      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
3453      int getstrings = 0;      int getstrings = 0;
3454      int getlist = 0;      int getlist = 0;
3455      int gmatched = 0;      int gmatched = 0;
3456      int start_offset = 0;      int start_offset = 0;
3457        int start_offset_sign = 1;
3458      int g_notempty = 0;      int g_notempty = 0;
3459      int use_dfa = 0;      int use_dfa = 0;
3460    
     options = 0;  
   
3461      *copynames = 0;      *copynames = 0;
3462      *getnames = 0;      *getnames = 0;
3463    
3464      copynamesptr = copynames;  #ifdef SUPPORT_PCRE16
3465      getnamesptr = getnames;      cn16ptr = copynames;
3466        gn16ptr = getnames;
3467    #endif
3468    #ifdef SUPPORT_PCRE8
3469        cn8ptr = copynames8;
3470        gn8ptr = getnames8;
3471    #endif
3472    
3473      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3474      first_callout = 1;      first_callout = 1;
3475        last_callout_mark = NULL;
3476      callout_extra = 0;      callout_extra = 0;
3477      callout_count = 0;      callout_count = 0;
3478      callout_fail_count = 999999;      callout_fail_count = 999999;
3479      callout_fail_id = -1;      callout_fail_id = -1;
3480      show_malloc = 0;      show_malloc = 0;
3481        options = 0;
3482    
3483      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3484        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 1726  while (!done) Line 3488  while (!done)
3488        {        {
3489        if (extend_inputline(infile, buffer + len, "data> ") == NULL)        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3490          {          {
3491          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
3492              {
3493              fprintf(outfile, "\n");
3494              break;
3495              }
3496          done = 1;          done = 1;
3497          goto CONTINUE;          goto CONTINUE;
3498          }          }
# Line 1748  while (!done) Line 3514  while (!done)
3514        int i = 0;        int i = 0;
3515        int n = 0;        int n = 0;
3516    
3517        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3518          In non-UTF mode, allow the value of the byte to fall through to later,
3519          where values greater than 127 are turned into UTF-8 when running in
3520          16-bit mode. */
3521    
3522          if (c != '\\')
3523            {
3524            if (use_utf)
3525              {
3526              *q++ = c;
3527              continue;
3528              }
3529            }
3530    
3531          /* Handle backslash escapes */
3532    
3533          else switch ((c = *p++))
3534          {          {
3535          case 'a': c =    7; break;          case 'a': c =    7; break;
3536          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 1764  while (!done) Line 3546  while (!done)
3546          c -= '0';          c -= '0';
3547          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3548            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3549          break;          break;
3550    
3551          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3552          if (*p == '{')          if (*p == '{')
3553            {            {
3554            unsigned char *pt = p;            pcre_uint8 *pt = p;
3555            c = 0;            c = 0;
3556            while (isxdigit(*(++pt)))  
3557              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3558              when isxdigit() is a macro that refers to its argument more than
3559              once. This is banned by the C Standard, but apparently happens in at
3560              least one MacOS environment. */
3561    
3562              for (pt++; isxdigit(*pt); pt++)
3563                {
3564                if (++i == 9)
3565                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3566                                   "using only the first eight.\n");
3567                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3568                }
3569            if (*pt == '}')            if (*pt == '}')
3570              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             utn = ord2utf8(c, buff8);  
             for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
             c = buff8[ii];   /* Last byte */  
3571              p = pt + 1;              p = pt + 1;
3572              break;              break;
3573              }              }
3574            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3575            }            }
 #endif  
3576    
3577          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3578            allows UTF-8 characters to be constructed byte by byte, and also allows
3579            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3580            Otherwise, pass it down to later code so that it can be turned into
3581            UTF-8 when running in 16-bit mode. */
3582    
3583          c = 0;          c = 0;
3584          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3585            {            {
3586            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3587            p++;            p++;
3588            }            }
3589            if (use_utf)
3590              {
3591              *q++ = c;
3592              continue;
3593              }
3594          break;          break;
3595    
3596          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 1817  while (!done) Line 3598  while (!done)
3598          continue;          continue;
3599    
3600          case '>':          case '>':
3601            if (*p == '-')
3602              {
3603              start_offset_sign = -1;
3604              p++;
3605              }
3606          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3607            start_offset *= start_offset_sign;
3608          continue;          continue;
3609    
3610          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1836  while (!done) Line 3623  while (!done)
3623            }            }
3624          else if (isalnum(*p))          else if (isalnum(*p))
3625            {            {
3626            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3627            }            }
3628          else if (*p == '+')          else if (*p == '+')
3629            {            {
# Line 1852  while (!done) Line 3632  while (!done)
3632            }            }
3633          else if (*p == '-')          else if (*p == '-')
3634            {            {
3635            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3636            p++;            p++;
3637            }            }
3638          else if (*p == '!')          else if (*p == '!')
# Line 1890  while (!done) Line 3670  while (!done)
3670  #endif  #endif
3671            use_dfa = 1;            use_dfa = 1;
3672          continue;          continue;
3673    #endif
3674    
3675    #if !defined NODFA
3676          case 'F':          case 'F':
3677          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
3678          continue;          continue;
# Line 1904  while (!done) Line 3686  while (!done)
3686            }            }
3687          else if (isalnum(*p))          else if (isalnum(*p))
3688            {            {
3689            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3690            while (isalnum(*p)) *npp++ = *p++;            }
3691            *npp++ = 0;          continue;
3692            *npp = 0;  
3693            n = pcre_get_stringnumber(re, (char *)getnamesptr);          case 'J':
3694            if (n < 0)          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3695              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);          if (extra != NULL
3696            getnamesptr = npp;              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3697                && extra->executable_jit != NULL)
3698              {
3699              if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3700              jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3701              PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3702            }            }
3703          continue;          continue;
3704    
# Line 1924  while (!done) Line 3711  while (!done)
3711          continue;          continue;
3712    
3713          case 'N':          case 'N':
3714          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
3715              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3716            else
3717              options |= PCRE_NOTEMPTY;
3718          continue;          continue;
3719    
3720          case 'O':          case 'O':
# Line 1944  while (!done) Line 3734  while (!done)
3734            }            }
3735          use_size_offsets = n;          use_size_offsets = n;
3736          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
3737              else use_offsets = offsets + size_offsets_max - n;  /* To catch overruns */
3738          continue;          continue;
3739    
3740          case 'P':          case 'P':
3741          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3742              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3743          continue;          continue;
3744    
3745          case 'Q':          case 'Q':
# Line 1982  while (!done) Line 3774  while (!done)
3774          show_malloc = 1;          show_malloc = 1;
3775          continue;          continue;
3776    
3777            case 'Y':
3778            options |= PCRE_NO_START_OPTIMIZE;
3779            continue;
3780    
3781          case 'Z':          case 'Z':
3782          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
3783          continue;          continue;
# Line 1999  while (!done) Line 3795  while (!done)
3795            }            }
3796          continue;          continue;
3797          }          }
3798        *q++ = c;  
3799          /* We now have a character value in c that may be greater than 255. In
3800          16-bit mode, we always convert characters to UTF-8 so that values greater
3801          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3802          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3803          mode must have come from \x{...} or octal constructs because values from
3804          \x.. get this far only in non-UTF mode. */
3805    
3806    #if !defined NOUTF || defined SUPPORT_PCRE16
3807          if (use_pcre16 || use_utf)
3808            {
3809            pcre_uint8 buff8[8];
3810            int ii, utn;
3811            utn = ord2utf8(c, buff8);
3812            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3813            }
3814          else
3815    #endif
3816            {
3817            if (c > 255)
3818              {
3819              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3820                "and UTF-8 mode is not enabled.\n", c);
3821              fprintf(outfile, "** Truncation will probably give the wrong "
3822                "result.\n");
3823              }
3824            *q++ = c;
3825            }
3826        }        }
3827    
3828        /* Reached end of subject string */
3829    
3830      *q = 0;      *q = 0;
3831      len = q - dbuffer;      len = (int)(q - dbuffer);
3832    
3833        /* Move the data to the end of the buffer so that a read over the end of
3834        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3835        we are using the POSIX interface, we must include the terminating zero. */
3836    
3837    #if !defined NOPOSIX
3838        if (posix || do_posix)
3839          {
3840          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3841          bptr += buffer_size - len - 1;
3842          }
3843        else
3844    #endif
3845          {
3846          memmove(bptr + buffer_size - len, bptr, len);
3847          bptr += buffer_size - len;
3848          }
3849    
3850      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
3851        {        {
# Line 2023  while (!done) Line 3866  while (!done)
3866          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3867        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3868        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3869          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3870    
3871        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3872    
# Line 2044  while (!done) Line 3888  while (!done)
3888            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3889              {              {
3890              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3891              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3892                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3893              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3894              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3895                {                {
3896                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3897                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3898                  outfile);                  outfile);
3899                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3900                }                }
# Line 2058  while (!done) Line 3902  while (!done)
3902            }            }
3903          }          }
3904        free(pmatch);        free(pmatch);
3905          goto NEXT_DATA;
3906        }        }
3907    
3908    #endif  /* !defined NOPOSIX */
3909    
3910      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3911    
3912      else  #ifdef SUPPORT_PCRE16
3913  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3914          {
3915          len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3916          switch(len)
3917            {
3918            case -1:
3919            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3920              "converted to UTF-16\n");
3921            goto NEXT_DATA;
3922    
3923            case -2:
3924            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3925              "cannot be converted to UTF-16\n");
3926            goto NEXT_DATA;
3927    
3928            case -3:
3929            fprintf(outfile, "**Failed: character value greater than 0xffff "
3930              "cannot be converted to 16-bit in non-UTF mode\n");
3931            goto NEXT_DATA;
3932    
3933            default:
3934            break;
3935            }
3936          bptr = (pcre_uint8 *)buffer16;
3937          }
3938    #endif
3939    
3940        /* Ensure that there is a JIT callback if we want to verify that JIT was
3941        actually used. If jit_stack == NULL, no stack has yet been assigned. */
3942    
3943        if (verify_jit && jit_stack == NULL && extra != NULL)
3944           { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
3945    
3946      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3947        {        {
3948          markptr = NULL;
3949          jit_was_used = FALSE;
3950    
3951        if (timeitm > 0)        if (timeitm > 0)
3952          {          {
3953          register int i;          register int i;
# Line 2076  while (!done) Line 3957  while (!done)
3957  #if !defined NODFA  #if !defined NODFA
3958          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
3959            {            {
3960            int workspace[1000];            if ((options & PCRE_DFA_RESTART) != 0)
3961                {
3962                fprintf(outfile, "Timing DFA restarts is not supported\n");
3963                break;
3964                }
3965              if (dfa_workspace == NULL)
3966                dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
3967            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3968              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              {
3969                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3970                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets,
3971                  dfa_workspace, DFA_WS_DIMENSION);
3972                }
3973            }            }
3974          else          else
3975  #endif  #endif
3976    
3977          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3978            count = pcre_exec(re, extra, (char *)bptr, len,            {
3979              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3980                (options | g_notempty), use_offsets, use_size_offsets);
3981              }
3982          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3983          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3984            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2097  while (!done) Line 3987  while (!done)
3987    
3988        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
3989        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
3990        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
3991          running of pcre_exec(), so disable the JIT optimization. This makes it
3992          possible to run the same set of tests with and without JIT externally
3993          requested. */
3994    
3995        if (find_match_limit)        if (find_match_limit)
3996          {          {
3997          if (extra == NULL)          if (extra != NULL) { PCRE_FREE_STUDY(extra); }
3998            {          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3999            extra = (pcre_extra *)malloc(sizeof(pcre_extra));          extra->flags = 0;
           extra->flags = 0;  
           }  
4000    
4001          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
4002            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2129  while (!done) Line 4020  while (!done)
4020            }            }
4021          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4022          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
4023          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4024            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
4025          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4026          }          }
# Line 2140  while (!done) Line 4031  while (!done)
4031  #if !defined NODFA  #if !defined NODFA
4032        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
4033          {          {
4034          int workspace[1000];          if (dfa_workspace == NULL)
4035          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,            dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4036            options | g_notempty, use_offsets, use_size_offsets, workspace,          if (dfa_matched++ == 0)
4037            sizeof(workspace)/sizeof(int));            dfa_workspace[0] = -1;  /* To catch bad restart */
4038            PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4039              (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4040              DFA_WS_DIMENSION);
4041          if (count == 0)          if (count == 0)
4042            {            {
4043            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 2154  while (!done) Line 4048  while (!done)
4048    
4049        else        else
4050          {          {
4051          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4052            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
4053          if (count == 0)          if (count == 0)
4054            {            {
4055            fprintf(outfile, "Matched, but too many substrings\n");