/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 645 by ph10, Sun Jul 31 17:02:18 2011 UTC revision 1027 by ph10, Mon Sep 3 14:01:38 2012 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
50  #include "config.h"  #include "config.h"
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 58  POSSIBILITY OF SUCH DAMAGE.
58  #include <locale.h>  #include <locale.h>
59  #include <errno.h>  #include <errno.h>
60    
61  #ifdef SUPPORT_LIBREADLINE  /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67  #ifdef HAVE_UNISTD_H  #ifdef HAVE_UNISTD_H
68  #include <unistd.h>  #include <unistd.h>
69  #endif  #endif
70    #if defined(SUPPORT_LIBREADLINE)
71  #include <readline/readline.h>  #include <readline/readline.h>
72  #include <readline/history.h>  #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80  #endif  #endif
   
81    
82  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
83  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 90  input mode under Windows. */ Line 112  input mode under Windows. */
112  #else  #else
113  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
114  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
115    #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
116    #define INPUT_MODE   "r"
117    #define OUTPUT_MODE  "w"
118    #else
119  #define INPUT_MODE   "rb"  #define INPUT_MODE   "rb"
120  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
121  #endif  #endif
122    #endif
123    
124    #define PRIV(name) name
125    
126  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
127  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 105  here before pcre_internal.h so that the Line 133  here before pcre_internal.h so that the
133  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
134    
135  #include "pcre.h"  #include "pcre.h"
136    
137    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
138    /* Configure internal macros to 16 bit mode. */
139    #define COMPILE_PCRE16
140    #endif
141    
142  #include "pcre_internal.h"  #include "pcre_internal.h"
143    
144    /* The pcre_printint() function, which prints the internal form of a compiled
145    regex, is held in a separate file so that (a) it can be compiled in either
146    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
147    when that is compiled in debug mode. */
148    
149    #ifdef SUPPORT_PCRE8
150    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151    #endif
152    #ifdef SUPPORT_PCRE16
153    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154    #endif
155    
156  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
157  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source file here, changing the names of the
158  external symbols to prevent clashes. */  external symbols to prevent clashes. */
159    
160  #define _pcre_ucp_gentype      ucp_gentype  #define PCRE_INCLUDED
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
161    
162  #include "pcre_tables.c"  #include "pcre_tables.c"
163    
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
   
164  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
165  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
166  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
167  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
168  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
169    
170    #ifdef EBCDIC
171    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
172    #else
173    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
174    #endif
175    
176  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
177    
178    /* Posix support is disabled in 16 bit only mode. */
179    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
180    #define NOPOSIX
181    #endif
182    
183  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
184  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 148  Makefile. */ Line 188  Makefile. */
188  #include "pcreposix.h"  #include "pcreposix.h"
189  #endif  #endif
190    
191  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
192  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
193  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
194  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
195  UTF8 support if PCRE is built without it. */  
196    #ifndef SUPPORT_UTF
197  #ifndef SUPPORT_UTF8  #ifndef NOUTF
198  #ifndef NOUTF8  #define NOUTF
 #define NOUTF8  
199  #endif  #endif
200  #endif  #endif
201    
202    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
203    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
204    only from one place and is handled differently). I couldn't dream up any way of
205    using a single macro to do this in a generic way, because of the many different
206    argument requirements. We know that at least one of SUPPORT_PCRE8 and
207    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
208    use these in the definitions of generic macros.
209    
210    **** Special note about the PCHARSxxx macros: the address of the string to be
211    printed is always given as two arguments: a base address followed by an offset.
212    The base address is cast to the correct data size for 8 or 16 bit data; the
213    offset is in units of this size. If the string were given as base+offset in one
214    argument, the casting might be incorrectly applied. */
215    
216    #ifdef SUPPORT_PCRE8
217    
218    #define PCHARS8(lv, p, offset, len, f) \
219      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
220    
221    #define PCHARSV8(p, offset, len, f) \
222      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
223    
224    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
225      p = read_capture_name8(p, cn8, re)
226    
227    #define STRLEN8(p) ((int)strlen((char *)p))
228    
229    #define SET_PCRE_CALLOUT8(callout) \
230      pcre_callout = callout
231    
232    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
233       pcre_assign_jit_stack(extra, callback, userdata)
234    
235    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
236      re = pcre_compile((char *)pat, options, error, erroffset, tables)
237    
238    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
239        namesptr, cbuffer, size) \
240      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
241        (char *)namesptr, cbuffer, size)
242    
243    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
244      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
245    
246    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
247        offsets, size_offsets, workspace, size_workspace) \
248      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
249        offsets, size_offsets, workspace, size_workspace)
250    
251    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
252        offsets, size_offsets) \
253      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
254        offsets, size_offsets)
255    
256    #define PCRE_FREE_STUDY8(extra) \
257      pcre_free_study(extra)
258    
259    #define PCRE_FREE_SUBSTRING8(substring) \
260      pcre_free_substring(substring)
261    
262    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
263      pcre_free_substring_list(listptr)
264    
265    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
266        getnamesptr, subsptr) \
267      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
268        (char *)getnamesptr, subsptr)
269    
270    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
271      n = pcre_get_stringnumber(re, (char *)ptr)
272    
273    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
274      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
275    
276    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
277      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
278    
279    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
280      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
281    
282    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
283      pcre_printint(re, outfile, debug_lengths)
284    
285    #define PCRE_STUDY8(extra, re, options, error) \
286      extra = pcre_study(re, options, error)
287    
288    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
289      pcre_jit_stack_alloc(startsize, maxsize)
290    
291    #define PCRE_JIT_STACK_FREE8(stack) \
292      pcre_jit_stack_free(stack)
293    
294    #endif /* SUPPORT_PCRE8 */
295    
296    /* -----------------------------------------------------------*/
297    
298    #ifdef SUPPORT_PCRE16
299    
300    #define PCHARS16(lv, p, offset, len, f) \
301      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
302    
303    #define PCHARSV16(p, offset, len, f) \
304      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
305    
306    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
307      p = read_capture_name16(p, cn16, re)
308    
309    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
310    
311    #define SET_PCRE_CALLOUT16(callout) \
312      pcre16_callout = (int (*)(pcre16_callout_block *))callout
313    
314    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
315      pcre16_assign_jit_stack((pcre16_extra *)extra, \
316        (pcre16_jit_callback)callback, userdata)
317    
318    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
319      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
320        tables)
321    
322    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
323        namesptr, cbuffer, size) \
324      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
325        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
326    
327    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
328      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
329        (PCRE_UCHAR16 *)cbuffer, size/2)
330    
331    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
332        offsets, size_offsets, workspace, size_workspace) \
333      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
334        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
335        workspace, size_workspace)
336    
337    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338        offsets, size_offsets) \
339      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
340        len, start_offset, options, offsets, size_offsets)
341    
342    #define PCRE_FREE_STUDY16(extra) \
343      pcre16_free_study((pcre16_extra *)extra)
344    
345    #define PCRE_FREE_SUBSTRING16(substring) \
346      pcre16_free_substring((PCRE_SPTR16)substring)
347    
348    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
349      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
350    
351    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
352        getnamesptr, subsptr) \
353      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
354        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
355    
356    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
357      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
358    
359    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
360      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
361        (PCRE_SPTR16 *)(void*)subsptr)
362    
363    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
364      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
365        (PCRE_SPTR16 **)(void*)listptr)
366    
367    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
368      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
369        tables)
370    
371    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
372      pcre16_printint(re, outfile, debug_lengths)
373    
374    #define PCRE_STUDY16(extra, re, options, error) \
375      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
376    
377    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
378      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
379    
380    #define PCRE_JIT_STACK_FREE16(stack) \
381      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
382    
383    #endif /* SUPPORT_PCRE16 */
384    
385    
386    /* ----- Both modes are supported; a runtime test is needed, except for
387    pcre_config(), and the JIT stack functions, when it doesn't matter which
388    version is called. ----- */
389    
390    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
391    
392    #define CHAR_SIZE (use_pcre16? 2:1)
393    
394    #define PCHARS(lv, p, offset, len, f) \
395      if (use_pcre16) \
396        PCHARS16(lv, p, offset, len, f); \
397      else \
398        PCHARS8(lv, p, offset, len, f)
399    
400    #define PCHARSV(p, offset, len, f) \
401      if (use_pcre16) \
402        PCHARSV16(p, offset, len, f); \
403      else \
404        PCHARSV8(p, offset, len, f)
405    
406    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
407      if (use_pcre16) \
408        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
409      else \
410        READ_CAPTURE_NAME8(p, cn8, cn16, re)
411    
412    #define SET_PCRE_CALLOUT(callout) \
413      if (use_pcre16) \
414        SET_PCRE_CALLOUT16(callout); \
415      else \
416        SET_PCRE_CALLOUT8(callout)
417    
418    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
419    
420    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
421      if (use_pcre16) \
422        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
423      else \
424        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
425    
426    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
427      if (use_pcre16) \
428        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
429      else \
430        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
431    
432    #define PCRE_CONFIG pcre_config
433    
434    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
435        namesptr, cbuffer, size) \
436      if (use_pcre16) \
437        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
438          namesptr, cbuffer, size); \
439      else \
440        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
441          namesptr, cbuffer, size)
442    
443    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
444      if (use_pcre16) \
445        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
446      else \
447        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
448    
449    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
450        offsets, size_offsets, workspace, size_workspace) \
451      if (use_pcre16) \
452        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
453          offsets, size_offsets, workspace, size_workspace); \
454      else \
455        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
456          offsets, size_offsets, workspace, size_workspace)
457    
458    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
459        offsets, size_offsets) \
460      if (use_pcre16) \
461        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
462          offsets, size_offsets); \
463      else \
464        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
465          offsets, size_offsets)
466    
467    #define PCRE_FREE_STUDY(extra) \
468      if (use_pcre16) \
469        PCRE_FREE_STUDY16(extra); \
470      else \
471        PCRE_FREE_STUDY8(extra)
472    
473    #define PCRE_FREE_SUBSTRING(substring) \
474      if (use_pcre16) \
475        PCRE_FREE_SUBSTRING16(substring); \
476      else \
477        PCRE_FREE_SUBSTRING8(substring)
478    
479    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
480      if (use_pcre16) \
481        PCRE_FREE_SUBSTRING_LIST16(listptr); \
482      else \
483        PCRE_FREE_SUBSTRING_LIST8(listptr)
484    
485    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
486        getnamesptr, subsptr) \
487      if (use_pcre16) \
488        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
489          getnamesptr, subsptr); \
490      else \
491        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
492          getnamesptr, subsptr)
493    
494    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
495      if (use_pcre16) \
496        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
497      else \
498        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
499    
500    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
501      if (use_pcre16) \
502        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
503      else \
504        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
505    
506    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
507      if (use_pcre16) \
508        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
509      else \
510        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
511    
512    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
513      (use_pcre16 ? \
514         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
515        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
516    
517    #define PCRE_JIT_STACK_FREE(stack) \
518      if (use_pcre16) \
519        PCRE_JIT_STACK_FREE16(stack); \
520      else \
521        PCRE_JIT_STACK_FREE8(stack)
522    
523    #define PCRE_MAKETABLES \
524      (use_pcre16? pcre16_maketables() : pcre_maketables())
525    
526    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
527      if (use_pcre16) \
528        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
529      else \
530        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
531    
532    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
533      if (use_pcre16) \
534        PCRE_PRINTINT16(re, outfile, debug_lengths); \
535      else \
536        PCRE_PRINTINT8(re, outfile, debug_lengths)
537    
538    #define PCRE_STUDY(extra, re, options, error) \
539      if (use_pcre16) \
540        PCRE_STUDY16(extra, re, options, error); \
541      else \
542        PCRE_STUDY8(extra, re, options, error)
543    
544    /* ----- Only 8-bit mode is supported ----- */
545    
546    #elif defined SUPPORT_PCRE8
547    #define CHAR_SIZE                 1
548    #define PCHARS                    PCHARS8
549    #define PCHARSV                   PCHARSV8
550    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
551    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
552    #define STRLEN                    STRLEN8
553    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
554    #define PCRE_COMPILE              PCRE_COMPILE8
555    #define PCRE_CONFIG               pcre_config
556    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
557    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
558    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
559    #define PCRE_EXEC                 PCRE_EXEC8
560    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
561    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
562    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
563    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
564    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
565    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
566    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
567    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
568    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
569    #define PCRE_MAKETABLES           pcre_maketables()
570    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
571    #define PCRE_PRINTINT             PCRE_PRINTINT8
572    #define PCRE_STUDY                PCRE_STUDY8
573    
574    /* ----- Only 16-bit mode is supported ----- */
575    
576    #else
577    #define CHAR_SIZE                 2
578    #define PCHARS                    PCHARS16
579    #define PCHARSV                   PCHARSV16
580    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
581    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
582    #define STRLEN                    STRLEN16
583    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
584    #define PCRE_COMPILE              PCRE_COMPILE16
585    #define PCRE_CONFIG               pcre16_config
586    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
587    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
588    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
589    #define PCRE_EXEC                 PCRE_EXEC16
590    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
591    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
592    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
593    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
594    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
595    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
596    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
597    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
598    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
599    #define PCRE_MAKETABLES           pcre16_maketables()
600    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
601    #define PCRE_PRINTINT             PCRE_PRINTINT16
602    #define PCRE_STUDY                PCRE_STUDY16
603    #endif
604    
605    /* ----- End of mode-specific function call macros ----- */
606    
607    
608  /* Other parameters */  /* Other parameters */
609    
# Line 171  UTF8 support if PCRE is built without it Line 615  UTF8 support if PCRE is built without it
615  #endif  #endif
616  #endif  #endif
617    
618    #if !defined NODFA
619    #define DFA_WS_DIMENSION 1000
620    #endif
621    
622  /* This is the default loop count for timing. */  /* This is the default loop count for timing. */
623    
624  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
# Line 185  static int callout_fail_count; Line 633  static int callout_fail_count;
633  static int callout_fail_id;  static int callout_fail_id;
634  static int debug_lengths;  static int debug_lengths;
635  static int first_callout;  static int first_callout;
636    static int jit_was_used;
637  static int locale_set = 0;  static int locale_set = 0;
638  static int show_malloc;  static int show_malloc;
639  static int use_utf8;  static int use_utf;
640  static size_t gotten_store;  static size_t gotten_store;
641    static size_t first_gotten_store = 0;
642  static const unsigned char *last_callout_mark = NULL;  static const unsigned char *last_callout_mark = NULL;
643    
644  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
645    
646  static int buffer_size = 50000;  static int buffer_size = 50000;
647  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
648  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
649  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
650    
651    /* Another buffer is needed translation to 16-bit character strings. It will
652    obtained and extended as required. */
653    
654    #ifdef SUPPORT_PCRE16
655    static int buffer16_size = 0;
656    static pcre_uint16 *buffer16 = NULL;
657    
658    #ifdef SUPPORT_PCRE8
659    
660    /* We need the table of operator lengths that is used for 16-bit compiling, in
661    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
662    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
663    appropriately for the 16-bit world. Just as a safety check, make sure that
664    COMPILE_PCRE16 is *not* set. */
665    
666    #ifdef COMPILE_PCRE16
667    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
668    #endif
669    
670    #if LINK_SIZE == 2
671    #undef LINK_SIZE
672    #define LINK_SIZE 1
673    #elif LINK_SIZE == 3 || LINK_SIZE == 4
674    #undef LINK_SIZE
675    #define LINK_SIZE 2
676    #else
677    #error LINK_SIZE must be either 2, 3, or 4
678    #endif
679    
680    #undef IMM2_SIZE
681    #define IMM2_SIZE 1
682    
683    #endif /* SUPPORT_PCRE8 */
684    
685    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
686    #endif  /* SUPPORT_PCRE16 */
687    
688    /* If we have 8-bit support, default use_pcre16 to false; if there is also
689    16-bit support, it can be changed by an option. If there is no 8-bit support,
690    there must be 16-bit support, so default it to 1. */
691    
692    #ifdef SUPPORT_PCRE8
693    static int use_pcre16 = 0;
694    #else
695    static int use_pcre16 = 1;
696    #endif
697    
698    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
699    
700    static int jit_study_bits[] =
701      {
702      PCRE_STUDY_JIT_COMPILE,
703      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
704      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
705      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
706      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
707      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
708      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
709        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
710    };
711    
712    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
713      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
714    
715  /* Textual explanations for runtime error codes */  /* Textual explanations for runtime error codes */
716    
# Line 208  static const char *errtexts[] = { Line 722  static const char *errtexts[] = {
722    "magic number missing",    "magic number missing",
723    "unknown opcode - pattern overwritten?",    "unknown opcode - pattern overwritten?",
724    "no more memory",    "no more memory",
725    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
726    "match limit exceeded",    "match limit exceeded",
727    "callout error code",    "callout error code",
728    NULL,  /* BADUTF8 is handled specially */    NULL,  /* BADUTF8/16 is handled specially */
729    "bad UTF-8 offset",    NULL,  /* BADUTF8/16 offset is handled specially */
730    NULL,  /* PARTIAL is handled specially */    NULL,  /* PARTIAL is handled specially */
731    "not used - internal error",    "not used - internal error",
732    "internal error - pattern overwritten?",    "internal error - pattern overwritten?",
# Line 221  static const char *errtexts[] = { Line 735  static const char *errtexts[] = {
735    "backreference condition or recursion test not supported for DFA matching",    "backreference condition or recursion test not supported for DFA matching",
736    "match limit not supported for DFA matching",    "match limit not supported for DFA matching",
737    "workspace size exceeded in DFA matching",    "workspace size exceeded in DFA matching",
738    "too much recursion for DFA matching",    "too much recursion for DFA matching",
739    "recursion limit exceeded",    "recursion limit exceeded",
740    "not used - internal error",    "not used - internal error",
741    "invalid combination of newline options",    "invalid combination of newline options",
742    "bad offset value",    "bad offset value",
743    NULL,  /* SHORTUTF8 is handled specially */    NULL,  /* SHORTUTF8/16 is handled specially */
744    "nested recursion at the same subject position"    "nested recursion at the same subject position",
745      "JIT stack limit reached",
746      "pattern compiled in wrong mode: 8-bit/16-bit error",
747      "pattern compiled with other endianness",
748      "invalid data in workspace for DFA restart"
749  };  };
750    
751    
752  /*************************************************  /*************************************************
753  *         Alternate character tables             *  *         Alternate character tables             *
# Line 243  the L (locale) option also adjusts the t Line 761  the L (locale) option also adjusts the t
761  /* This is the set of tables distributed as default with PCRE. It recognizes  /* This is the set of tables distributed as default with PCRE. It recognizes
762  only ASCII characters. */  only ASCII characters. */
763    
764  static const unsigned char tables0[] = {  static const pcre_uint8 tables0[] = {
765    
766  /* This table is a lower casing table. */  /* This table is a lower casing table. */
767    
# Line 416  graph, print, punct, and cntrl. Other cl Line 934  graph, print, punct, and cntrl. Other cl
934  be at least an approximation of ISO 8859. In particular, there are characters  be at least an approximation of ISO 8859. In particular, there are characters
935  greater than 128 that are marked as spaces, letters, etc. */  greater than 128 that are marked as spaces, letters, etc. */
936    
937  static const unsigned char tables1[] = {  static const pcre_uint8 tables1[] = {
938  0,1,2,3,4,5,6,7,  0,1,2,3,4,5,6,7,
939  8,9,10,11,12,13,14,15,  8,9,10,11,12,13,14,15,
940  16,17,18,19,20,21,22,23,  16,17,18,19,20,21,22,23,
# Line 579  return sys_errlist[n]; Line 1097  return sys_errlist[n];
1097  #endif /* HAVE_STRERROR */  #endif /* HAVE_STRERROR */
1098    
1099    
1100    /*************************************************
1101    *         JIT memory callback                    *
1102    *************************************************/
1103    
1104    static pcre_jit_stack* jit_callback(void *arg)
1105    {
1106    jit_was_used = TRUE;
1107    return (pcre_jit_stack *)arg;
1108    }
1109    
1110    
1111    #if !defined NOUTF || defined SUPPORT_PCRE16
1112    /*************************************************
1113    *            Convert UTF-8 string to value       *
1114    *************************************************/
1115    
1116    /* This function takes one or more bytes that represents a UTF-8 character,
1117    and returns the value of the character.
1118    
1119    Argument:
1120      utf8bytes   a pointer to the byte vector
1121      vptr        a pointer to an int to receive the value
1122    
1123    Returns:      >  0 => the number of bytes consumed
1124                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1125    */
1126    
1127    static int
1128    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1129    {
1130    int c = *utf8bytes++;
1131    int d = c;
1132    int i, j, s;
1133    
1134    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1135      {
1136      if ((d & 0x80) == 0) break;
1137      d <<= 1;
1138      }
1139    
1140    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1141    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1142    
1143    /* i now has a value in the range 1-5 */
1144    
1145    s = 6*i;
1146    d = (c & utf8_table3[i]) << s;
1147    
1148    for (j = 0; j < i; j++)
1149      {
1150      c = *utf8bytes++;
1151      if ((c & 0xc0) != 0x80) return -(j+1);
1152      s -= 6;
1153      d |= (c & 0x3f) << s;
1154      }
1155    
1156    /* Check that encoding was the correct unique one */
1157    
1158    for (j = 0; j < utf8_table1_size; j++)
1159      if (d <= utf8_table1[j]) break;
1160    if (j != i) return -(i+1);
1161    
1162    /* Valid value */
1163    
1164    *vptr = d;
1165    return i+1;
1166    }
1167    #endif /* NOUTF || SUPPORT_PCRE16 */
1168    
1169    
1170    
1171    #if !defined NOUTF || defined SUPPORT_PCRE16
1172    /*************************************************
1173    *       Convert character value to UTF-8         *
1174    *************************************************/
1175    
1176    /* This function takes an integer value in the range 0 - 0x7fffffff
1177    and encodes it as a UTF-8 character in 0 to 6 bytes.
1178    
1179    Arguments:
1180      cvalue     the character value
1181      utf8bytes  pointer to buffer for result - at least 6 bytes long
1182    
1183    Returns:     number of characters placed in the buffer
1184    */
1185    
1186    static int
1187    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1188    {
1189    register int i, j;
1190    for (i = 0; i < utf8_table1_size; i++)
1191      if (cvalue <= utf8_table1[i]) break;
1192    utf8bytes += i;
1193    for (j = i; j > 0; j--)
1194     {
1195     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1196     cvalue >>= 6;
1197     }
1198    *utf8bytes = utf8_table2[i] | cvalue;
1199    return i + 1;
1200    }
1201    #endif
1202    
1203    
1204    #ifdef SUPPORT_PCRE16
1205    /*************************************************
1206    *         Convert a string to 16-bit             *
1207    *************************************************/
1208    
1209    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1210    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1211    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1212    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1213    result is always left in buffer16.
1214    
1215    Note that this function does not object to surrogate values. This is
1216    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1217    for the purpose of testing that they are correctly faulted.
1218    
1219    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1220    in UTF-8 so that values greater than 255 can be handled.
1221    
1222    Arguments:
1223      data       TRUE if converting a data line; FALSE for a regex
1224      p          points to a byte string
1225      utf        true if UTF-8 (to be converted to UTF-16)
1226      len        number of bytes in the string (excluding trailing zero)
1227    
1228    Returns:     number of 16-bit data items used (excluding trailing zero)
1229                 OR -1 if a UTF-8 string is malformed
1230                 OR -2 if a value > 0x10ffff is encountered
1231                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1232    */
1233    
1234    static int
1235    to16(int data, pcre_uint8 *p, int utf, int len)
1236    {
1237    pcre_uint16 *pp;
1238    
1239    if (buffer16_size < 2*len + 2)
1240      {
1241      if (buffer16 != NULL) free(buffer16);
1242      buffer16_size = 2*len + 2;
1243      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1244      if (buffer16 == NULL)
1245        {
1246        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1247        exit(1);
1248        }
1249      }
1250    
1251    pp = buffer16;
1252    
1253    if (!utf && !data)
1254      {
1255      while (len-- > 0) *pp++ = *p++;
1256      }
1257    
1258    else
1259      {
1260      int c = 0;
1261      while (len > 0)
1262        {
1263        int chlen = utf82ord(p, &c);
1264        if (chlen <= 0) return -1;
1265        if (c > 0x10ffff) return -2;
1266        p += chlen;
1267        len -= chlen;
1268        if (c < 0x10000) *pp++ = c; else
1269          {
1270          if (!utf) return -3;
1271          c -= 0x10000;
1272          *pp++ = 0xD800 | (c >> 10);
1273          *pp++ = 0xDC00 | (c & 0x3ff);
1274          }
1275        }
1276      }
1277    
1278    *pp = 0;
1279    return pp - buffer16;
1280    }
1281    #endif
1282    
1283    
1284  /*************************************************  /*************************************************
# Line 604  Returns:       pointer to the start of n Line 1304  Returns:       pointer to the start of n
1304                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1305  */  */
1306    
1307  static uschar *  static pcre_uint8 *
1308  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1309  {  {
1310  uschar *here = start;  pcre_uint8 *here = start;
1311    
1312  for (;;)  for (;;)
1313    {    {
1314    int rlen = (int)(buffer_size - (here - buffer));    size_t rlen = (size_t)(buffer_size - (here - buffer));
1315    
1316    if (rlen > 1000)    if (rlen > 1000)
1317      {      {
1318      int dlen;      int dlen;
1319    
1320      /* If libreadline support is required, use readline() to read a line if the      /* If libreadline or libedit support is required, use readline() to read a
1321      input is a terminal. Note that readline() removes the trailing newline, so      line if the input is a terminal. Note that readline() removes the trailing
1322      we must put it back again, to be compatible with fgets(). */      newline, so we must put it back again, to be compatible with fgets(). */
1323    
1324  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1325      if (isatty(fileno(f)))      if (isatty(fileno(f)))
1326        {        {
1327        size_t len;        size_t len;
# Line 654  for (;;) Line 1354  for (;;)
1354    else    else
1355      {      {
1356      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1357      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1358      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1359      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1360    
1361      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1362        {        {
# Line 687  return NULL;  /* Control never gets here Line 1387  return NULL;  /* Control never gets here
1387    
1388    
1389    
   
   
   
   
1390  /*************************************************  /*************************************************
1391  *          Read number from string               *  *          Read number from string               *
1392  *************************************************/  *************************************************/
# Line 707  Returns:        the unsigned long Line 1403  Returns:        the unsigned long
1403  */  */
1404    
1405  static int  static int
1406  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1407  {  {
1408  int result = 0;  int result = 0;
1409  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 718  return(result); Line 1414  return(result);
1414    
1415    
1416    
   
1417  /*************************************************  /*************************************************
1418  *            Convert UTF-8 string to value       *  *             Print one character                *
1419  *************************************************/  *************************************************/
1420    
1421  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
1422    
1423  Argument:  static int pchar(int c, FILE *f)
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
   
 Returns:      >  0 => the number of bytes consumed  
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1424  {  {
1425  int c = *utf8bytes++;  if (PRINTOK(c))
1426  int d = c;    {
1427  int i, j, s;    if (f != NULL) fprintf(f, "%c", c);
1428      return 1;
1429      }
1430    
1431  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  if (c < 0x100)
1432    {    {
1433    if ((d & 0x80) == 0) break;    if (use_utf)
1434    d <<= 1;      {
1435        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1436        return 6;
1437        }
1438      else
1439        {
1440        if (f != NULL) fprintf(f, "\\x%02x", c);
1441        return 4;
1442        }
1443    }    }
1444    
1445  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1446  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  return (c <= 0x000000ff)? 6 :
1447           (c <= 0x00000fff)? 7 :
1448           (c <= 0x0000ffff)? 8 :
1449           (c <= 0x000fffff)? 9 : 10;
1450    }
1451    
 /* i now has a value in the range 1-5 */  
1452    
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
1453    
1454  for (j = 0; j < i; j++)  #ifdef SUPPORT_PCRE8
1455    {  /*************************************************
1456    c = *utf8bytes++;  *         Print 8-bit character string           *
1457    if ((c & 0xc0) != 0x80) return -(j+1);  *************************************************/
   s -= 6;  
   d |= (c & 0x3f) << s;  
   }  
1458    
1459  /* Check that encoding was the correct unique one */  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1460    If handed a NULL file, just counts chars without printing. */
1461    
1462  for (j = 0; j < utf8_table1_size; j++)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1463    if (d <= utf8_table1[j]) break;  {
1464  if (j != i) return -(i+1);  int c = 0;
1465    int yield = 0;
1466    
1467  /* Valid value */  if (length < 0)
1468      length = strlen((char *)p);
1469    
1470  *vptr = d;  while (length-- > 0)
1471  return i+1;    {
1472  }  #if !defined NOUTF
1473      if (use_utf)
1474        {
1475        int rc = utf82ord(p, &c);
1476        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1477          {
1478          length -= rc - 1;
1479          p += rc;
1480          yield += pchar(c, f);
1481          continue;
1482          }
1483        }
1484    #endif
1485      c = *p++;
1486      yield += pchar(c, f);
1487      }
1488    
1489    return yield;
1490    }
1491  #endif  #endif
1492    
1493    
1494    
1495    #ifdef SUPPORT_PCRE16
1496  /*************************************************  /*************************************************
1497  *       Convert character value to UTF-8         *  *    Find length of 0-terminated 16-bit string   *
1498  *************************************************/  *************************************************/
1499    
1500  /* This function takes an integer value in the range 0 - 0x7fffffff  static int strlen16(PCRE_SPTR16 p)
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 ord2utf8(int cvalue, uschar *utf8bytes)  
1501  {  {
1502  register int i, j;  int len = 0;
1503  for (i = 0; i < utf8_table1_size; i++)  while (*p++ != 0) len++;
1504    if (cvalue <= utf8_table1[i]) break;  return len;
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1505  }  }
1506    #endif  /* SUPPORT_PCRE16 */
 #endif  
   
1507    
1508    
1509    #ifdef SUPPORT_PCRE16
1510  /*************************************************  /*************************************************
1511  *             Print character string             *  *           Print 16-bit character string        *
1512  *************************************************/  *************************************************/
1513    
1514  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1515  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1516    
1517  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1518  {  {
 int c = 0;  
1519  int yield = 0;  int yield = 0;
1520    
1521    if (length < 0)
1522      length = strlen16(p);
1523    
1524  while (length-- > 0)  while (length-- > 0)
1525    {    {
1526  #if !defined NOUTF8    int c = *p++ & 0xffff;
1527    if (use_utf8)  #if !defined NOUTF
1528      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1529      {      {
1530      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
1531        if (d >= 0xDC00 && d < 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
1532        {        {
1533        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1534        p += rc;        length--;
1535        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
1536        }        }
1537      }      }
1538  #endif  #endif
1539      yield += pchar(c, f);
1540      }
1541    
1542     /* Not UTF-8, or malformed UTF-8  */  return yield;
1543    }
1544    #endif  /* SUPPORT_PCRE16 */
1545    
1546    c = *p++;  
1547    if (PRINTHEX(c))  
1548      {  #ifdef SUPPORT_PCRE8
1549      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1550      yield++;  *     Read a capture name (8-bit) and check it   *
1551      }  *************************************************/
1552    else  
1553      {  static pcre_uint8 *
1554      if (f != NULL) fprintf(f, "\\x%02x", c);  read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1555      yield += 4;  {
1556      }  pcre_uint8 *npp = *pp;
1557    while (isalnum(*p)) *npp++ = *p++;
1558    *npp++ = 0;
1559    *npp = 0;
1560    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1561      {
1562      fprintf(outfile, "no parentheses with name \"");
1563      PCHARSV(*pp, 0, -1, outfile);
1564      fprintf(outfile, "\"\n");
1565    }    }
1566    
1567  return yield;  *pp = npp;
1568    return p;
1569  }  }
1570    #endif  /* SUPPORT_PCRE8 */
1571    
1572    
1573    
1574    #ifdef SUPPORT_PCRE16
1575    /*************************************************
1576    *     Read a capture name (16-bit) and check it  *
1577    *************************************************/
1578    
1579    /* Note that the text being read is 8-bit. */
1580    
1581    static pcre_uint8 *
1582    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1583    {
1584    pcre_uint16 *npp = *pp;
1585    while (isalnum(*p)) *npp++ = *p++;
1586    *npp++ = 0;
1587    *npp = 0;
1588    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1589      {
1590      fprintf(outfile, "no parentheses with name \"");
1591      PCHARSV(*pp, 0, -1, outfile);
1592      fprintf(outfile, "\"\n");
1593      }
1594    *pp = npp;
1595    return p;
1596    }
1597    #endif  /* SUPPORT_PCRE16 */
1598    
1599    
1600    
# Line 905  if (callout_extra) Line 1623  if (callout_extra)
1623      else      else
1624        {        {
1625        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1626        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1627          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1628        fprintf(f, "\n");        fprintf(f, "\n");
1629        }        }
# Line 918  printed lengths of the substrings. */ Line 1636  printed lengths of the substrings. */
1636    
1637  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1638    
1639  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1640  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1641    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1642    
1643  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1644    
1645  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1646    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1647    
1648  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 961  fprintf(outfile, "%.*s", (cb->next_item_ Line 1679  fprintf(outfile, "%.*s", (cb->next_item_
1679  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1680  first_callout = 0;  first_callout = 0;
1681    
1682  if (cb->mark != last_callout_mark)  if (cb->mark != last_callout_mark)
1683    {    {
1684    fprintf(outfile, "Latest Mark: %s\n",    if (cb->mark == NULL)
1685      (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));      fprintf(outfile, "Latest Mark: <unset>\n");
1686    last_callout_mark = cb->mark;    else
1687    }      {
1688        fprintf(outfile, "Latest Mark: ");
1689        PCHARSV(cb->mark, 0, -1, outfile);
1690        putc('\n', outfile);
1691        }
1692      last_callout_mark = cb->mark;
1693      }
1694    
1695  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1696    {    {
# Line 987  return (cb->callout_number != callout_fa Line 1711  return (cb->callout_number != callout_fa
1711  *            Local malloc functions              *  *            Local malloc functions              *
1712  *************************************************/  *************************************************/
1713    
1714  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1715  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1716    show_malloc variable is set only during matching. */
1717    
1718  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1719  {  {
1720  void *block = malloc(size);  void *block = malloc(size);
1721  gotten_store = size;  gotten_store = size;
1722    if (first_gotten_store == 0) first_gotten_store = size;
1723  if (show_malloc)  if (show_malloc)
1724    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1725  return block;  return block;
# Line 1006  if (show_malloc) Line 1732  if (show_malloc)
1732  free(block);  free(block);
1733  }  }
1734    
   
1735  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1736    
1737  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 1029  free(block); Line 1754  free(block);
1754  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1755  *************************************************/  *************************************************/
1756    
1757  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1758    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1759    value, but the code is defensive.
1760    
1761    Arguments:
1762      re        compiled regex
1763      study     study data
1764      option    PCRE_INFO_xxx option
1765      ptr       where to put the data
1766    
1767    Returns:    0 when OK, < 0 on error
1768    */
1769    
1770  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static int
1771    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1772  {  {
1773  int rc;  int rc;
1774  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1775    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1776    #ifdef SUPPORT_PCRE16
1777      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1778    #else
1779      rc = PCRE_ERROR_BADMODE;
1780    #endif
1781    else
1782    #ifdef SUPPORT_PCRE8
1783      rc = pcre_fullinfo(re, study, option, ptr);
1784    #else
1785      rc = PCRE_ERROR_BADMODE;
1786    #endif
1787    
1788    if (rc < 0)
1789      {
1790      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1791        use_pcre16? "16" : "", option);
1792      if (rc == PCRE_ERROR_BADMODE)
1793        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1794          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1795      }
1796    
1797    return rc;
1798  }  }
1799    
1800    
1801    
1802  /*************************************************  /*************************************************
1803  *         Byte flipping function                 *  *             Swap byte functions                *
1804  *************************************************/  *************************************************/
1805    
1806  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1807  byteflip(unsigned long int value, int n)  value, respectively.
1808    
1809    Arguments:
1810      value        any number
1811    
1812    Returns:       the byte swapped value
1813    */
1814    
1815    static pcre_uint32
1816    swap_uint32(pcre_uint32 value)
1817  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1818  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1819         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1820         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1821         ((value & 0xff000000) >> 24);         (value >> 24);
1822  }  }
1823    
1824    static pcre_uint16
1825    swap_uint16(pcre_uint16 value)
1826    {
1827    return (value >> 8) | (value << 8);
1828    }
1829    
1830    
1831    
1832    /*************************************************
1833    *        Flip bytes in a compiled pattern        *
1834    *************************************************/
1835    
1836    /* This function is called if the 'F' option was present on a pattern that is
1837    to be written to a file. We flip the bytes of all the integer fields in the
1838    regex data block and the study block. In 16-bit mode this also flips relevant
1839    bytes in the pattern itself. This is to make it possible to test PCRE's
1840    ability to reload byte-flipped patterns, e.g. those compiled on a different
1841    architecture. */
1842    
1843    static void
1844    regexflip(pcre *ere, pcre_extra *extra)
1845    {
1846    REAL_PCRE *re = (REAL_PCRE *)ere;
1847    #ifdef SUPPORT_PCRE16
1848    int op;
1849    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1850    int length = re->name_count * re->name_entry_size;
1851    #ifdef SUPPORT_UTF
1852    BOOL utf = (re->options & PCRE_UTF16) != 0;
1853    BOOL utf16_char = FALSE;
1854    #endif /* SUPPORT_UTF */
1855    #endif /* SUPPORT_PCRE16 */
1856    
1857    /* Always flip the bytes in the main data block and study blocks. */
1858    
1859    re->magic_number = REVERSED_MAGIC_NUMBER;
1860    re->size = swap_uint32(re->size);
1861    re->options = swap_uint32(re->options);
1862    re->flags = swap_uint16(re->flags);
1863    re->top_bracket = swap_uint16(re->top_bracket);
1864    re->top_backref = swap_uint16(re->top_backref);
1865    re->first_char = swap_uint16(re->first_char);
1866    re->req_char = swap_uint16(re->req_char);
1867    re->name_table_offset = swap_uint16(re->name_table_offset);
1868    re->name_entry_size = swap_uint16(re->name_entry_size);
1869    re->name_count = swap_uint16(re->name_count);
1870    
1871    if (extra != NULL)
1872      {
1873      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1874      rsd->size = swap_uint32(rsd->size);
1875      rsd->flags = swap_uint32(rsd->flags);
1876      rsd->minlength = swap_uint32(rsd->minlength);
1877      }
1878    
1879    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1880    in the name table, if present, and then in the pattern itself. */
1881    
1882    #ifdef SUPPORT_PCRE16
1883    if (!use_pcre16) return;
1884    
1885    while(TRUE)
1886      {
1887      /* Swap previous characters. */
1888      while (length-- > 0)
1889        {
1890        *ptr = swap_uint16(*ptr);
1891        ptr++;
1892        }
1893    #ifdef SUPPORT_UTF
1894      if (utf16_char)
1895        {
1896        if ((ptr[-1] & 0xfc00) == 0xd800)
1897          {
1898          /* We know that there is only one extra character in UTF-16. */
1899          *ptr = swap_uint16(*ptr);
1900          ptr++;
1901          }
1902        }
1903      utf16_char = FALSE;
1904    #endif /* SUPPORT_UTF */
1905    
1906      /* Get next opcode. */
1907    
1908      length = 0;
1909      op = *ptr;
1910      *ptr++ = swap_uint16(op);
1911    
1912      switch (op)
1913        {
1914        case OP_END:
1915        return;
1916    
1917    #ifdef SUPPORT_UTF
1918        case OP_CHAR:
1919        case OP_CHARI:
1920        case OP_NOT:
1921        case OP_NOTI:
1922        case OP_STAR:
1923        case OP_MINSTAR:
1924        case OP_PLUS:
1925        case OP_MINPLUS:
1926        case OP_QUERY:
1927        case OP_MINQUERY:
1928        case OP_UPTO:
1929        case OP_MINUPTO:
1930        case OP_EXACT:
1931        case OP_POSSTAR:
1932        case OP_POSPLUS:
1933        case OP_POSQUERY:
1934        case OP_POSUPTO:
1935        case OP_STARI:
1936        case OP_MINSTARI:
1937        case OP_PLUSI:
1938        case OP_MINPLUSI:
1939        case OP_QUERYI:
1940        case OP_MINQUERYI:
1941        case OP_UPTOI:
1942        case OP_MINUPTOI:
1943        case OP_EXACTI:
1944        case OP_POSSTARI:
1945        case OP_POSPLUSI:
1946        case OP_POSQUERYI:
1947        case OP_POSUPTOI:
1948        case OP_NOTSTAR:
1949        case OP_NOTMINSTAR:
1950        case OP_NOTPLUS:
1951        case OP_NOTMINPLUS:
1952        case OP_NOTQUERY:
1953        case OP_NOTMINQUERY:
1954        case OP_NOTUPTO:
1955        case OP_NOTMINUPTO:
1956        case OP_NOTEXACT:
1957        case OP_NOTPOSSTAR:
1958        case OP_NOTPOSPLUS:
1959        case OP_NOTPOSQUERY:
1960        case OP_NOTPOSUPTO:
1961        case OP_NOTSTARI:
1962        case OP_NOTMINSTARI:
1963        case OP_NOTPLUSI:
1964        case OP_NOTMINPLUSI:
1965        case OP_NOTQUERYI:
1966        case OP_NOTMINQUERYI:
1967        case OP_NOTUPTOI:
1968        case OP_NOTMINUPTOI:
1969        case OP_NOTEXACTI:
1970        case OP_NOTPOSSTARI:
1971        case OP_NOTPOSPLUSI:
1972        case OP_NOTPOSQUERYI:
1973        case OP_NOTPOSUPTOI:
1974        if (utf) utf16_char = TRUE;
1975    #endif
1976        /* Fall through. */
1977    
1978        default:
1979        length = OP_lengths16[op] - 1;
1980        break;
1981    
1982        case OP_CLASS:
1983        case OP_NCLASS:
1984        /* Skip the character bit map. */
1985        ptr += 32/sizeof(pcre_uint16);
1986        length = 0;
1987        break;
1988    
1989        case OP_XCLASS:
1990        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1991        if (LINK_SIZE > 1)
1992          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1993            - (1 + LINK_SIZE + 1));
1994        else
1995          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1996    
1997        /* Reverse the size of the XCLASS instance. */
1998        *ptr = swap_uint16(*ptr);
1999        ptr++;
2000        if (LINK_SIZE > 1)
2001          {
2002          *ptr = swap_uint16(*ptr);
2003          ptr++;
2004          }
2005    
2006        op = *ptr;
2007        *ptr = swap_uint16(op);
2008        ptr++;
2009        if ((op & XCL_MAP) != 0)
2010          {
2011          /* Skip the character bit map. */
2012          ptr += 32/sizeof(pcre_uint16);
2013          length -= 32/sizeof(pcre_uint16);
2014          }
2015        break;
2016        }
2017      }
2018    /* Control should never reach here in 16 bit mode. */
2019    #endif /* SUPPORT_PCRE16 */
2020    }
2021    
2022    
2023    
# Line 1062  return ((value & 0x000000ff) << 24) | Line 2026  return ((value & 0x000000ff) << 24) |
2026  *************************************************/  *************************************************/
2027    
2028  static int  static int
2029  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2030    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
2031    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
2032  {  {
# Line 1077  for (;;) Line 2041  for (;;)
2041    {    {
2042    *limit = mid;    *limit = mid;
2043    
2044    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2045      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2046    
2047    if (count == errnumber)    if (count == errnumber)
# Line 1122  Returns:    < 0, = 0, or > 0, according Line 2086  Returns:    < 0, = 0, or > 0, according
2086  */  */
2087    
2088  static int  static int
2089  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2090  {  {
2091  while (n--)  while (n--)
2092    {    {
# Line 1149  Returns:      appropriate PCRE_NEWLINE_x Line 2113  Returns:      appropriate PCRE_NEWLINE_x
2113  */  */
2114    
2115  static int  static int
2116  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2117  {  {
2118  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2119  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2120  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2121  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2122  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2123  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2124  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2125  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2126  return 0;  return 0;
2127  }  }
# Line 1173  usage(void) Line 2137  usage(void)
2137  {  {
2138  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2139  printf("Input and output default to stdin and stdout.\n");  printf("Input and output default to stdin and stdout.\n");
2140  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2141  printf("If input is a terminal, readline() is used to read from it.\n");  printf("If input is a terminal, readline() is used to read from it.\n");
2142  #else  #else
2143  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2144  #endif  #endif
2145  printf("\nOptions:\n");  printf("\nOptions:\n");
2146  printf("  -b       show compiled code (bytecode)\n");  #ifdef SUPPORT_PCRE16
2147    printf("  -16      use the 16-bit library\n");
2148    #endif
2149    printf("  -b       show compiled code\n");
2150  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2151    printf("  -C arg   show a specific compile-time option\n");
2152    printf("           and exit with its value. The arg can be:\n");
2153    printf("     linksize     internal link size [2, 3, 4]\n");
2154    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2155    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2156    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2157    printf("     ucp          Unicode Properties supported [0, 1]\n");
2158    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2159    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2160  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2161  #if !defined NODFA  #if !defined NODFA
2162  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
# Line 1195  printf("  -p       use POSIX interface\n Line 2171  printf("  -p       use POSIX interface\n
2171  #endif  #endif
2172  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
2173  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2174  printf("  -s       force each pattern to be studied\n"  printf("  -s       force each pattern to be studied at basic level\n"
2175           "  -s+      force each pattern to be studied, using JIT if available\n"
2176           "  -s++     ditto, verifying when JIT was actually used\n"
2177           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2178           "             where 1 <= n <= 7 selects JIT options\n"
2179           "  -s++n    ditto, verifying when JIT was actually used\n"
2180         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2181  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2182  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 1215  options, followed by a set of test data, Line 2196  options, followed by a set of test data,
2196  int main(int argc, char **argv)  int main(int argc, char **argv)
2197  {  {
2198  FILE *infile = stdin;  FILE *infile = stdin;
2199    const char *version;
2200  int options = 0;  int options = 0;
2201  int study_options = 0;  int study_options = 0;
2202  int default_find_match_limit = FALSE;  int default_find_match_limit = FALSE;
# Line 1223  int timeit = 0; Line 2205  int timeit = 0;
2205  int timeitm = 0;  int timeitm = 0;
2206  int showinfo = 0;  int showinfo = 0;
2207  int showstore = 0;  int showstore = 0;
2208  int force_study = 0;  int force_study = -1;
2209    int force_study_options = 0;
2210  int quiet = 0;  int quiet = 0;
2211  int size_offsets = 45;  int size_offsets = 45;
2212  int size_offsets_max;  int size_offsets_max;
2213  int *offsets = NULL;  int *offsets = NULL;
 #if !defined NOPOSIX  
 int posix = 0;  
 #endif  
2214  int debug = 0;  int debug = 0;
2215  int done = 0;  int done = 0;
2216  int all_use_dfa = 0;  int all_use_dfa = 0;
2217    int verify_jit = 0;
2218  int yield = 0;  int yield = 0;
2219  int stack_size;  int stack_size;
2220    
2221  /* These vectors store, end-to-end, a list of captured substring names. Assume  #if !defined NOPOSIX
2222  that 1024 is plenty long enough for the few names we'll be testing. */  int posix = 0;
2223    #endif
2224    #if !defined NODFA
2225    int *dfa_workspace = NULL;
2226    #endif
2227    
2228  uschar copynames[1024];  pcre_jit_stack *jit_stack = NULL;
 uschar getnames[1024];  
2229    
2230  uschar *copynamesptr;  /* These vectors store, end-to-end, a list of zero-terminated captured
2231  uschar *getnamesptr;  substring names, each list itself being terminated by an empty name. Assume
2232    that 1024 is plenty long enough for the few names we'll be testing. It is
2233    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2234    for the actual memory, to ensure alignment. */
2235    
2236    pcre_uint16 copynames[1024];
2237    pcre_uint16 getnames[1024];
2238    
2239    #ifdef SUPPORT_PCRE16
2240    pcre_uint16 *cn16ptr;
2241    pcre_uint16 *gn16ptr;
2242    #endif
2243    
2244  /* Get buffers from malloc() so that Electric Fence will check their misuse  #ifdef SUPPORT_PCRE8
2245  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2246    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2247    pcre_uint8 *cn8ptr;
2248    pcre_uint8 *gn8ptr;
2249    #endif
2250    
2251  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2252  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
2253  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
2254    
2255    buffer = (pcre_uint8 *)malloc(buffer_size);
2256    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2257    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2258    
2259  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2260    
# Line 1266  it set 0x8000, but then I was advised th Line 2269  it set 0x8000, but then I was advised th
2269  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2270  #endif  #endif
2271    
2272    /* Get the version number: both pcre_version() and pcre16_version() give the
2273    same answer. We just need to ensure that we call one that is available. */
2274    
2275    #ifdef SUPPORT_PCRE8
2276    version = pcre_version();
2277    #else
2278    version = pcre16_version();
2279    #endif
2280    
2281  /* Scan options */  /* Scan options */
2282    
2283  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2284    {    {
2285    unsigned char *endptr;    pcre_uint8 *endptr;
2286      char *arg = argv[op];
2287    
2288    if (strcmp(argv[op], "-m") == 0) showstore = 1;    if (strcmp(arg, "-m") == 0) showstore = 1;
2289    else if (strcmp(argv[op], "-s") == 0) force_study = 1;    else if (strcmp(arg, "-s") == 0) force_study = 0;
2290    else if (strcmp(argv[op], "-q") == 0) quiet = 1;  
2291    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strncmp(arg, "-s+", 3) == 0)
2292    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;      {
2293    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;      arg += 3;
2294    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;      if (*arg == '+') { arg++; verify_jit = TRUE; }
2295        force_study = 1;
2296        if (*arg == 0)
2297          force_study_options = jit_study_bits[6];
2298        else if (*arg >= '1' && *arg <= '7')
2299          force_study_options = jit_study_bits[*arg - '1'];
2300        else goto BAD_ARG;
2301        }
2302      else if (strcmp(arg, "-16") == 0)
2303        {
2304    #ifdef SUPPORT_PCRE16
2305        use_pcre16 = 1;
2306    #else
2307        printf("** This version of PCRE was built without 16-bit support\n");
2308        exit(1);
2309    #endif
2310        }
2311      else if (strcmp(arg, "-q") == 0) quiet = 1;
2312      else if (strcmp(arg, "-b") == 0) debug = 1;
2313      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2314      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2315      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2316  #if !defined NODFA  #if !defined NODFA
2317    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2318  #endif  #endif
2319    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2320        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2321          *endptr == 0))          *endptr == 0))
2322      {      {
2323      op++;      op++;
2324      argc--;      argc--;
2325      }      }
2326    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2327      {      {
2328      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2329      int temp;      int temp;
2330      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2331                       *endptr == 0))                       *endptr == 0))
2332        {        {
2333        timeitm = temp;        timeitm = temp;
# Line 1303  while (argc > 1 && argv[op][0] == '-') Line 2337  while (argc > 1 && argv[op][0] == '-')
2337      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2338      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2339      }      }
2340    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2341        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2342          *endptr == 0))          *endptr == 0))
2343      {      {
2344  #if defined(_WIN32) || defined(WIN32) || defined(__minix)  #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2345      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2346      exit(1);      exit(1);
2347  #else  #else
# Line 1326  while (argc > 1 && argv[op][0] == '-') Line 2360  while (argc > 1 && argv[op][0] == '-')
2360  #endif  #endif
2361      }      }
2362  #if !defined NOPOSIX  #if !defined NOPOSIX
2363    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2364  #endif  #endif
2365    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2366      {      {
2367      int rc;      int rc;
2368      unsigned long int lrc;      unsigned long int lrc;
2369      printf("PCRE version %s\n", pcre_version());  
2370        if (argc > 2)
2371          {
2372          if (strcmp(argv[op + 1], "linksize") == 0)
2373            {
2374            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2375            printf("%d\n", rc);
2376            yield = rc;
2377            goto EXIT;
2378            }
2379          if (strcmp(argv[op + 1], "pcre8") == 0)
2380            {
2381    #ifdef SUPPORT_PCRE8
2382            printf("1\n");
2383            yield = 1;
2384    #else
2385            printf("0\n");
2386            yield = 0;
2387    #endif
2388            goto EXIT;
2389            }
2390          if (strcmp(argv[op + 1], "pcre16") == 0)
2391            {
2392    #ifdef SUPPORT_PCRE16
2393            printf("1\n");
2394            yield = 1;
2395    #else
2396            printf("0\n");
2397            yield = 0;
2398    #endif
2399            goto EXIT;
2400            }
2401          if (strcmp(argv[op + 1], "utf") == 0)
2402            {
2403    #ifdef SUPPORT_PCRE8
2404            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2405            printf("%d\n", rc);
2406            yield = rc;
2407    #else
2408            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2409            printf("%d\n", rc);
2410            yield = rc;
2411    #endif
2412            goto EXIT;
2413            }
2414          if (strcmp(argv[op + 1], "ucp") == 0)
2415            {
2416            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2417            printf("%d\n", rc);
2418            yield = rc;
2419            goto EXIT;
2420            }
2421          if (strcmp(argv[op + 1], "jit") == 0)
2422            {
2423            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2424            printf("%d\n", rc);
2425            yield = rc;
2426            goto EXIT;
2427            }
2428          if (strcmp(argv[op + 1], "newline") == 0)
2429            {
2430            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2431            /* Note that these values are always the ASCII values, even
2432            in EBCDIC environments. CR is 13 and NL is 10. */
2433            printf("%s\n", (rc == 13)? "CR" :
2434              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2435              (rc == -2)? "ANYCRLF" :
2436              (rc == -1)? "ANY" : "???");
2437            goto EXIT;
2438            }
2439          printf("Unknown -C option: %s\n", argv[op + 1]);
2440          goto EXIT;
2441          }
2442    
2443        printf("PCRE version %s\n", version);
2444      printf("Compiled with\n");      printf("Compiled with\n");
2445    
2446    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2447    are set, either both UTFs are supported or both are not supported. */
2448    
2449    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2450        printf("  8-bit and 16-bit support\n");
2451        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2452        if (rc)
2453          printf("  UTF-8 and UTF-16 support\n");
2454        else
2455          printf("  No UTF-8 or UTF-16 support\n");
2456    #elif defined SUPPORT_PCRE8
2457        printf("  8-bit support only\n");
2458      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2459      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2460      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2461        printf("  16-bit support only\n");
2462        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2463        printf("  %sUTF-16 support\n", rc? "" : "No ");
2464    #endif
2465    
2466        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2467      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2468      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2469        if (rc)
2470          {
2471          const char *arch;
2472          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2473          printf("  Just-in-time compiler support: %s\n", arch);
2474          }
2475        else
2476          printf("  No just-in-time compiler support\n");
2477        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2478      /* Note that these values are always the ASCII values, even      /* Note that these values are always the ASCII values, even
2479      in EBCDIC environments. CR is 13 and NL is 10. */      in EBCDIC environments. CR is 13 and NL is 10. */
2480      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2481        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2482        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
2483        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2484      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2485      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2486                                       "all Unicode newlines");                                       "all Unicode newlines");
2487      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2488      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2489      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2490      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2491      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2492      printf("  Default match limit = %ld\n", lrc);      printf("  Default match limit = %ld\n", lrc);
2493      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2494      printf("  Default recursion depth limit = %ld\n", lrc);      printf("  Default recursion depth limit = %ld\n", lrc);
2495      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2496      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
2497        if (showstore)
2498          {
2499          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2500          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2501          }
2502        printf("\n");
2503      goto EXIT;      goto EXIT;
2504      }      }
2505    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
2506             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
2507      {      {
2508      usage();      usage();
2509      goto EXIT;      goto EXIT;
2510      }      }
2511    else    else
2512      {      {
2513      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
2514        printf("** Unknown or malformed option %s\n", arg);
2515      usage();      usage();
2516      yield = 1;      yield = 1;
2517      goto EXIT;      goto EXIT;
# Line 1415  if (argc > 2) Line 2558  if (argc > 2)
2558    
2559  /* Set alternative malloc function */  /* Set alternative malloc function */
2560    
2561    #ifdef SUPPORT_PCRE8
2562  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2563  pcre_free = new_free;  pcre_free = new_free;
2564  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2565  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2566    #endif
2567    
2568    #ifdef SUPPORT_PCRE16
2569    pcre16_malloc = new_malloc;
2570    pcre16_free = new_free;
2571    pcre16_stack_malloc = stack_malloc;
2572    pcre16_stack_free = stack_free;
2573    #endif
2574    
2575  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2576    
2577  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2578    
2579  /* Main loop */  /* Main loop */
2580    
# Line 1437  while (!done) Line 2589  while (!done)
2589  #endif  #endif
2590    
2591    const char *error;    const char *error;
2592    unsigned char *markptr;    pcre_uint8 *markptr;
2593    unsigned char *p, *pp, *ppp;    pcre_uint8 *p, *pp, *ppp;
2594    unsigned char *to_file = NULL;    pcre_uint8 *to_file = NULL;
2595    const unsigned char *tables = NULL;    const pcre_uint8 *tables = NULL;
2596      unsigned long int get_options;
2597    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2598    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2599    int do_allcaps = 0;    int do_allcaps = 0;
2600    int do_mark = 0;    int do_mark = 0;
2601    int do_study = 0;    int do_study = 0;
2602    int no_force_study = 0;    int no_force_study = 0;
2603    int do_debug = debug;    int do_debug = debug;
2604    int do_G = 0;    int do_G = 0;
2605    int do_g = 0;    int do_g = 0;
# Line 1456  while (!done) Line 2609  while (!done)
2609    int do_flip = 0;    int do_flip = 0;
2610    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2611    
2612    use_utf8 = 0;  #if !defined NODFA
2613      int dfa_matched = 0;
2614    #endif
2615    
2616      use_utf = 0;
2617    debug_lengths = 1;    debug_lengths = 1;
2618    
2619    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1471  while (!done) Line 2628  while (!done)
2628    
2629    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2630      {      {
2631      unsigned long int magic, get_options;      pcre_uint32 magic;
2632      uschar sbuf[8];      pcre_uint8 sbuf[8];
2633      FILE *f;      FILE *f;
2634    
2635      p++;      p++;
2636        if (*p == '!')
2637          {
2638          do_debug = TRUE;
2639          do_showinfo = TRUE;
2640          p++;
2641          }
2642    
2643      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2644      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2645      *pp = 0;      *pp = 0;
# Line 1487  while (!done) Line 2651  while (!done)
2651        continue;        continue;
2652        }        }
2653    
2654        first_gotten_store = 0;
2655      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2656    
2657      true_size =      true_size =
# Line 1494  while (!done) Line 2659  while (!done)
2659      true_study_size =      true_study_size =
2660        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2661    
2662      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
2663      regex_gotten_store = gotten_store;      if (re == NULL)
2664          {
2665          printf("** Failed to get %d bytes of memory for pcre object\n",
2666            (int)true_size);
2667          yield = 1;
2668          goto EXIT;
2669          }
2670        regex_gotten_store = first_gotten_store;
2671    
2672      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2673    
2674      magic = ((real_pcre *)re)->magic_number;      magic = ((REAL_PCRE *)re)->magic_number;
2675      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2676        {        {
2677        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2678          {          {
2679          do_flip = 1;          do_flip = 1;
2680          }          }
2681        else        else
2682          {          {
2683          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2684            new_free(re);
2685          fclose(f);          fclose(f);
2686          continue;          continue;
2687          }          }
2688        }        }
2689    
2690        /* We hide the byte-invert info for little and big endian tests. */
2691      fprintf(outfile, "Compiled pattern%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2692        do_flip? " (byte-inverted)" : "", p);        do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
   
     /* Need to know if UTF-8 for printing data strings */  
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
2693    
2694      /* Now see if there is any following study data. */      /* Now see if there is any following study data. */
2695    
# Line 1538  while (!done) Line 2707  while (!done)
2707          {          {
2708          FAIL_READ:          FAIL_READ:
2709          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2710          if (extra != NULL) new_free(extra);          if (extra != NULL)
2711          if (re != NULL) new_free(re);            {
2712              PCRE_FREE_STUDY(extra);
2713              }
2714            new_free(re);
2715          fclose(f);          fclose(f);
2716          continue;          continue;
2717          }          }
# Line 1548  while (!done) Line 2720  while (!done)
2720        }        }
2721      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2722    
2723        /* Flip the necessary bytes. */
2724        if (do_flip)
2725          {
2726          int rc;
2727          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2728          if (rc == PCRE_ERROR_BADMODE)
2729            {
2730            /* Simulate the result of the function call below. */
2731            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2732              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2733            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2734              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2735            new_free(re);
2736            fclose(f);
2737            continue;
2738            }
2739          }
2740    
2741        /* Need to know if UTF-8 for printing data strings. */
2742    
2743        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2744          {
2745          new_free(re);
2746          fclose(f);
2747          continue;
2748          }
2749        use_utf = (get_options & PCRE_UTF8) != 0;
2750    
2751      fclose(f);      fclose(f);
2752      goto SHOW_INFO;      goto SHOW_INFO;
2753      }      }
2754    
2755    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2756    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2757    
2758    delimiter = *p++;    delimiter = *p++;
2759    
# Line 1604  while (!done) Line 2804  while (!done)
2804    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2805    
2806    options = 0;    options = 0;
2807    study_options = 0;    study_options = force_study_options;
2808    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
2809    
2810    while (*pp != 0)    while (*pp != 0)
# Line 1619  while (!done) Line 2819  while (!done)
2819        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2820    
2821        case '+':        case '+':
2822        if (do_showrest) do_showcaprest = 1; else do_showrest = 1;        if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2823        break;        break;
2824    
2825        case '=': do_allcaps = 1; break;        case '=': do_allcaps = 1; break;
2826        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2827        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2828        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1640  while (!done) Line 2840  while (!done)
2840        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2841  #endif  #endif
2842    
2843        case 'S':        case 'S':
2844        if (do_study == 0) do_study = 1; else        do_study = 1;
2845          {        for (;;)
2846          do_study = 0;          {
2847          no_force_study = 1;          switch (*pp++)
2848          }            {
2849              case 'S':
2850              do_study = 0;
2851              no_force_study = 1;
2852              break;
2853    
2854              case '!':
2855              study_options |= PCRE_STUDY_EXTRA_NEEDED;
2856              break;
2857    
2858              case '+':
2859              if (*pp == '+')
2860                {
2861                verify_jit = TRUE;
2862                pp++;
2863                }
2864              if (*pp >= '1' && *pp <= '7')
2865                study_options |= jit_study_bits[*pp++ - '1'];
2866              else
2867                study_options |= jit_study_bits[6];
2868              break;
2869    
2870              case '-':
2871              study_options &= ~PCRE_STUDY_ALLJIT;
2872              break;
2873    
2874              default:
2875              pp--;
2876              goto ENDLOOP;
2877              }
2878            }
2879          ENDLOOP:
2880        break;        break;
2881    
2882        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
# Line 1653  while (!done) Line 2884  while (!done)
2884        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2885        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2886        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2887        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2888        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2889    
2890        case 'T':        case 'T':
# Line 1687  while (!done) Line 2918  while (!done)
2918          goto SKIP_DATA;          goto SKIP_DATA;
2919          }          }
2920        locale_set = 1;        locale_set = 1;
2921        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2922        pp = ppp;        pp = ppp;
2923        break;        break;
2924    
# Line 1700  while (!done) Line 2931  while (!done)
2931    
2932        case '<':        case '<':
2933          {          {
2934          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2935            {            {
2936            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
2937            pp += 3;            pp += 3;
# Line 1728  while (!done) Line 2959  while (!done)
2959    
2960    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2961    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2962    local character tables. */    local character tables. Neither does it have 16-bit support. */
2963    
2964  #if !defined NOPOSIX  #if !defined NOPOSIX
2965    if (posix || do_posix)    if (posix || do_posix)
# Line 1744  while (!done) Line 2975  while (!done)
2975      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2976      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2977    
2978        first_gotten_store = 0;
2979      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2980    
2981      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1763  while (!done) Line 2995  while (!done)
2995  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2996    
2997      {      {
2998      unsigned long int get_options;      /* In 16-bit mode, convert the input. */
2999    
3000    #ifdef SUPPORT_PCRE16
3001        if (use_pcre16)
3002          {
3003          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3004            {
3005            case -1:
3006            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3007              "converted to UTF-16\n");
3008            goto SKIP_DATA;
3009    
3010            case -2:
3011            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3012              "cannot be converted to UTF-16\n");
3013            goto SKIP_DATA;
3014    
3015            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3016            fprintf(outfile, "**Failed: character value greater than 0xffff "
3017              "cannot be converted to 16-bit in non-UTF mode\n");
3018            goto SKIP_DATA;
3019    
3020            default:
3021            break;
3022            }
3023          p = (pcre_uint8 *)buffer16;
3024          }
3025    #endif
3026    
3027        /* Compile many times when timing */
3028    
3029      if (timeit > 0)      if (timeit > 0)
3030        {        {
# Line 1772  while (!done) Line 3033  while (!done)
3033        clock_t start_time = clock();        clock_t start_time = clock();
3034        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
3035          {          {
3036          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3037          if (re != NULL) free(re);          if (re != NULL) free(re);
3038          }          }
3039        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1781  while (!done) Line 3042  while (!done)
3042            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
3043        }        }
3044    
3045      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
3046        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3047    
3048      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
3049      if non-interactive. */      if non-interactive. */
# Line 1812  while (!done) Line 3074  while (!done)
3074      within the regex; check for this so that we know how to process the data      within the regex; check for this so that we know how to process the data
3075      lines. */      lines. */
3076    
3077      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3078      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;        goto SKIP_DATA;
3079        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
     /* Print information if required. There are now two info-returning  
     functions. The old one has a limited interface and returns only limited  
     data. Check that it agrees with the newer one. */  
   
     if (log_store)  
       fprintf(outfile, "Memory allocation (code space): %d\n",  
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
3080    
3081      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3082      and remember the store that was got. */      and remember the store that was got. */
3083    
3084      true_size = ((real_pcre *)re)->size;      true_size = ((REAL_PCRE *)re)->size;
3085      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3086    
3087        /* Output code size information if requested */
3088    
3089        if (log_store)
3090          fprintf(outfile, "Memory allocation (code space): %d\n",
3091            (int)(first_gotten_store -
3092                  sizeof(REAL_PCRE) -
3093                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3094    
3095      /* If -s or /S was present, study the regex to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
3096      help with the matching, unless the pattern has the SS option, which      help with the matching, unless the pattern has the SS option, which
3097      suppresses the effect of /S (used for a few test patterns where studying is      suppresses the effect of /S (used for a few test patterns where studying is
3098      never sensible). */      never sensible). */
3099    
3100      if (do_study || (force_study && !no_force_study))      if (do_study || (force_study >= 0 && !no_force_study))
3101        {        {
3102        if (timeit > 0)        if (timeit > 0)
3103          {          {
# Line 1844  while (!done) Line 3105  while (!done)
3105          clock_t time_taken;          clock_t time_taken;
3106          clock_t start_time = clock();          clock_t start_time = clock();
3107          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3108            extra = pcre_study(re, study_options, &error);            {
3109              PCRE_STUDY(extra, re, study_options, &error);
3110              }
3111          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3112          if (extra != NULL) free(extra);          if (extra != NULL)
3113              {
3114              PCRE_FREE_STUDY(extra);
3115              }
3116          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3117            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3118              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3119          }          }
3120        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options, &error);
3121        if (error != NULL)        if (error != NULL)
3122          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3123        else if (extra != NULL)        else if (extra != NULL)
3124            {
3125          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3126            if (log_store)
3127              {
3128              size_t jitsize;
3129              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3130                  jitsize != 0)
3131                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3132              }
3133            }
3134        }        }
3135    
3136      /* If /K was present, we set up for handling MARK data. */      /* If /K was present, we set up for handling MARK data. */
# Line 1871  while (!done) Line 3146  while (!done)
3146        extra->flags |= PCRE_EXTRA_MARK;        extra->flags |= PCRE_EXTRA_MARK;
3147        }        }
3148    
3149      /* If the 'F' option was present, we flip the bytes of all the integer      /* Extract and display information from the compiled data if required. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
   
     if (do_flip)  
       {  
       real_pcre *rre = (real_pcre *)re;  
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));  
         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));  
         }  
       }  
   
     /* Extract information from the compiled data if required */  
3150    
3151      SHOW_INFO:      SHOW_INFO:
3152    
3153      if (do_debug)      if (do_debug)
3154        {        {
3155        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3156        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3157        }        }
3158    
3159      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1923  while (!done) Line 3161  while (!done)
3161      if (do_showinfo)      if (do_showinfo)
3162        {        {
3163        unsigned long int all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3164        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3165          hascrorlf;          hascrorlf, maxlookbehind;
3166        int nameentrysize, namecount;        int nameentrysize, namecount;
3167        const uschar *nametable;        const pcre_uint8 *nametable;
3168    
3169        new_info(re, NULL, PCRE_INFO_SIZE, &size);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3170        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3171        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3172        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3173        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3174        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3175        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3176        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3177        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3178        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3179        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3180              new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3181  #if !defined NOINFOCHECK            != 0)
3182        old_count = pcre_info(re, &old_options, &old_first_char);          goto SKIP_DATA;
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3183    
3184        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3185          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1976  while (!done) Line 3194  while (!done)
3194          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3195          while (namecount-- > 0)          while (namecount-- > 0)
3196            {            {
3197            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3198              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3199              GET2(nametable, 0));  #else
3200              int imm2_size = IMM2_SIZE;
3201    #endif
3202              int length = (int)STRLEN(nametable + imm2_size);
3203              fprintf(outfile, "  ");
3204              PCHARSV(nametable, imm2_size, length, outfile);
3205              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3206    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3207              fprintf(outfile, "%3d\n", use_pcre16?
3208                 (int)(((PCRE_SPTR16)nametable)[0])
3209                :((int)nametable[0] << 8) | (int)nametable[1]);
3210              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3211    #else
3212              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3213    #ifdef SUPPORT_PCRE8
3214            nametable += nameentrysize;            nametable += nameentrysize;
3215    #else
3216              nametable += nameentrysize * 2;
3217    #endif
3218    #endif
3219            }            }
3220          }          }
3221    
3222        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3223        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3224    
3225        all_options = ((real_pcre *)re)->options;        all_options = ((REAL_PCRE *)re)->options;
3226        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3227    
3228        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3229          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 2003  while (!done) Line 3239  while (!done)
3239            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3240            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3241            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3242            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3243            ((get_options & PCRE_UCP) != 0)? " ucp" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3244            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3245            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3246            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3247    
# Line 2047  while (!done) Line 3283  while (!done)
3283          }          }
3284        else        else
3285          {          {
3286          int ch = first_char & 255;          const char *caseless =
3287          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3288            "" : " (caseless)";            "" : " (caseless)";
3289          if (PRINTHEX(ch))  
3290            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3291              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3292          else          else
3293            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3294              fprintf(outfile, "First char = ");
3295              pchar(first_char, outfile);
3296              fprintf(outfile, "%s\n", caseless);
3297              }
3298          }          }
3299    
3300        if (need_char < 0)        if (need_char < 0)
# Line 2062  while (!done) Line 3303  while (!done)
3303          }          }
3304        else        else
3305          {          {
3306          int ch = need_char & 255;          const char *caseless =
3307          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3308            "" : " (caseless)";            "" : " (caseless)";
3309          if (PRINTHEX(ch))  
3310            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3311              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3312          else          else
3313            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3314              fprintf(outfile, "Need char = ");
3315              pchar(need_char, outfile);
3316              fprintf(outfile, "%s\n", caseless);
3317              }
3318          }          }
3319    
3320          if (maxlookbehind > 0)
3321            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3322    
3323        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3324        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3325        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
3326        flipped.) If study was forced by an external -s, don't show this        flipped.) If study was forced by an external -s, don't show this
3327        information unless -i or -d was also present. This means that, except        information unless -i or -d was also present. This means that, except
3328        when auto-callouts are involved, the output from runs with and without        when auto-callouts are involved, the output from runs with and without
3329        -s should be identical. */        -s should be identical. */
3330    
3331        if (do_study || (force_study && showinfo && !no_force_study))        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3332          {          {
3333          if (extra == NULL)          if (extra == NULL)
3334            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3335          else          else
3336            {            {
3337            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3338            int minlength;            int minlength;
3339    
3340            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3341            fprintf(outfile, "Subject length lower bound = %d\n", minlength);              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3342    
3343            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
           if (start_bits == NULL)  
             fprintf(outfile, "No set of starting bytes\n");  
           else  
3344              {              {
3345              int i;              if (start_bits == NULL)
3346              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3347              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3348                {                {
3349                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3350                  int c = 24;
3351                  fprintf(outfile, "Starting byte set: ");
3352                  for (i = 0; i < 256; i++)
3353                  {                  {
3354                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
3355                    {                    {
3356                    fprintf(outfile, "%c ", i);                    if (c > 75)
3357                    c += 2;                      {
3358                    }                      fprintf(outfile, "\n  ");
3359                  else                      c = 2;
3360                    {                      }
3361                    fprintf(outfile, "\\x%02x ", i);                    if (PRINTOK(i) && i != ' ')
3362                    c += 5;                      {
3363                        fprintf(outfile, "%c ", i);
3364                        c += 2;
3365                        }
3366                      else
3367                        {
3368                        fprintf(outfile, "\\x%02x ", i);
3369                        c += 5;
3370                        }
3371                    }                    }
3372                  }                  }
3373                  fprintf(outfile, "\n");
3374                }                }
3375              fprintf(outfile, "\n");              }
3376              }
3377    
3378            /* Show this only if the JIT was set by /S, not by -s. */
3379    
3380            if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3381                (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3382              {
3383              int jit;
3384              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3385                {
3386                if (jit)
3387                  fprintf(outfile, "JIT study was successful\n");
3388                else
3389    #ifdef SUPPORT_JIT
3390                  fprintf(outfile, "JIT study was not successful\n");
3391    #else
3392                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3393    #endif
3394              }              }
3395            }            }
3396          }          }
# Line 2139  while (!done) Line 3409  while (!done)
3409          }          }
3410        else        else
3411          {          {
3412          uschar sbuf[8];          pcre_uint8 sbuf[8];
3413          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3414          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3415          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3416          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3417            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3418          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3419          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3420          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3421          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3422            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3423    
3424          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3425              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 2158  while (!done) Line 3429  while (!done)
3429          else          else
3430            {            {
3431            fprintf(outfile, "Compiled pattern written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3432    
3433            /* If there is study data, write it, but verify the writing only            /* If there is study data, write it. */
3434            if the studying was requested by /S, not just by -s. */  
   
3435            if (extra != NULL)            if (extra != NULL)
3436              {              {
3437              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 2177  while (!done) Line 3447  while (!done)
3447          }          }
3448    
3449        new_free(re);        new_free(re);
3450        if (extra != NULL) new_free(extra);        if (extra != NULL)
3451            {
3452            PCRE_FREE_STUDY(extra);
3453            }
3454        if (locale_set)        if (locale_set)
3455          {          {
3456          new_free((void *)tables);          new_free((void *)tables);
# Line 2192  while (!done) Line 3465  while (!done)
3465    
3466    for (;;)    for (;;)
3467      {      {
3468      uschar *q;      pcre_uint8 *q;
3469      uschar *bptr;      pcre_uint8 *bptr;
3470      int *use_offsets = offsets;      int *use_offsets = offsets;
3471      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3472      int callout_data = 0;      int callout_data = 0;
# Line 2209  while (!done) Line 3482  while (!done)
3482      int g_notempty = 0;      int g_notempty = 0;
3483      int use_dfa = 0;      int use_dfa = 0;
3484    
     options = 0;  
   
3485      *copynames = 0;      *copynames = 0;
3486      *getnames = 0;      *getnames = 0;
3487    
3488      copynamesptr = copynames;  #ifdef SUPPORT_PCRE16
3489      getnamesptr = getnames;      cn16ptr = copynames;
3490        gn16ptr = getnames;
3491    #endif
3492    #ifdef SUPPORT_PCRE8
3493        cn8ptr = copynames8;
3494        gn8ptr = getnames8;
3495    #endif
3496    
3497      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3498      first_callout = 1;      first_callout = 1;
3499      last_callout_mark = NULL;      last_callout_mark = NULL;
3500      callout_extra = 0;      callout_extra = 0;
3501      callout_count = 0;      callout_count = 0;
3502      callout_fail_count = 999999;      callout_fail_count = 999999;
3503      callout_fail_id = -1;      callout_fail_id = -1;
3504      show_malloc = 0;      show_malloc = 0;
3505        options = 0;
3506    
3507      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3508        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 2260  while (!done) Line 3538  while (!done)
3538        int i = 0;        int i = 0;
3539        int n = 0;        int n = 0;
3540    
3541        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3542          In non-UTF mode, allow the value of the byte to fall through to later,
3543          where values greater than 127 are turned into UTF-8 when running in
3544          16-bit mode. */
3545    
3546          if (c != '\\')
3547            {
3548            if (use_utf)
3549              {
3550              *q++ = c;
3551              continue;
3552              }
3553            }
3554    
3555          /* Handle backslash escapes */
3556    
3557          else switch ((c = *p++))
3558          {          {
3559          case 'a': c =    7; break;          case 'a': c =    7; break;
3560          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 2276  while (!done) Line 3570  while (!done)
3570          c -= '0';          c -= '0';
3571          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3572            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3573          break;          break;
3574    
3575          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3576          if (*p == '{')          if (*p == '{')
3577            {            {
3578            unsigned char *pt = p;            pcre_uint8 *pt = p;
3579            c = 0;            c = 0;
3580            while (isxdigit(*(++pt)))  
3581              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3582              when isxdigit() is a macro that refers to its argument more than
3583              once. This is banned by the C Standard, but apparently happens in at
3584              least one MacOS environment. */
3585    
3586              for (pt++; isxdigit(*pt); pt++)
3587                {
3588                if (++i == 9)
3589                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3590                                   "using only the first eight.\n");
3591                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3592                }
3593            if (*pt == '}')            if (*pt == '}')
3594              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             if (use_utf8)  
               {  
               utn = ord2utf8(c, buff8);  
               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
               c = buff8[ii];   /* Last byte */  
               }  
             else  
              {  
              if (c > 255)  
                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "  
                  "UTF-8 mode is not enabled.\n"  
                  "** Truncation will probably give the wrong result.\n", c);  
              }  
3595              p = pt + 1;              p = pt + 1;
3596              break;              break;
3597              }              }
3598            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3599            }            }
 #endif  
3600    
3601          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3602            allows UTF-8 characters to be constructed byte by byte, and also allows
3603            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3604            Otherwise, pass it down to later code so that it can be turned into
3605            UTF-8 when running in 16-bit mode. */
3606    
3607          c = 0;          c = 0;
3608          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3609            {            {
3610            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3611            p++;            p++;
3612            }            }
3613            if (use_utf)
3614              {
3615              *q++ = c;
3616              continue;
3617              }
3618          break;          break;
3619    
3620          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 2364  while (!done) Line 3647  while (!done)
3647            }            }
3648          else if (isalnum(*p))          else if (isalnum(*p))
3649            {            {
3650            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3651            }            }
3652          else if (*p == '+')          else if (*p == '+')
3653            {            {
# Line 2380  while (!done) Line 3656  while (!done)
3656            }            }
3657          else if (*p == '-')          else if (*p == '-')
3658            {            {
3659            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3660            p++;            p++;
3661            }            }
3662          else if (*p == '!')          else if (*p == '!')
# Line 2434  while (!done) Line 3710  while (!done)
3710            }            }
3711          else if (isalnum(*p))          else if (isalnum(*p))
3712            {            {
3713            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3714            while (isalnum(*p)) *npp++ = *p++;            }
3715            *npp++ = 0;          continue;
3716            *npp = 0;  
3717            n = pcre_get_stringnumber(re, (char *)getnamesptr);          case 'J':
3718            if (n < 0)          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3719              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);          if (extra != NULL
3720            getnamesptr = npp;              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3721                && extra->executable_jit != NULL)
3722              {
3723              if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3724              jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3725              PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3726            }            }
3727          continue;          continue;
3728    
# Line 2477  while (!done) Line 3758  while (!done)
3758            }            }
3759          use_size_offsets = n;          use_size_offsets = n;
3760          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
3761              else use_offsets = offsets + size_offsets_max - n;  /* To catch overruns */
3762          continue;          continue;
3763    
3764          case 'P':          case 'P':
# Line 2537  while (!done) Line 3819  while (!done)
3819            }            }
3820          continue;          continue;
3821          }          }
3822        *q++ = c;  
3823          /* We now have a character value in c that may be greater than 255. In
3824          16-bit mode, we always convert characters to UTF-8 so that values greater
3825          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3826          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3827          mode must have come from \x{...} or octal constructs because values from
3828          \x.. get this far only in non-UTF mode. */
3829    
3830    #if !defined NOUTF || defined SUPPORT_PCRE16
3831          if (use_pcre16 || use_utf)
3832            {
3833            pcre_uint8 buff8[8];
3834            int ii, utn;
3835            utn = ord2utf8(c, buff8);
3836            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3837            }
3838          else
3839    #endif
3840            {
3841            if (c > 255)
3842              {
3843              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3844                "and UTF-8 mode is not enabled.\n", c);
3845              fprintf(outfile, "** Truncation will probably give the wrong "
3846                "result.\n");
3847              }
3848            *q++ = c;
3849            }
3850        }        }
3851    
3852        /* Reached end of subject string */
3853    
3854      *q = 0;      *q = 0;
3855      len = (int)(q - dbuffer);      len = (int)(q - dbuffer);
3856    
# Line 2600  while (!done) Line 3912  while (!done)
3912            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3913              {              {
3914              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3915              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3916                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3917              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3918              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
3919                {                {
3920                fprintf(outfile, "%2d+ ", (int)i);                fprintf(outfile, "%2d+ ", (int)i);
3921                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3922                  outfile);                  outfile);
3923                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3924                }                }
# Line 2614  while (!done) Line 3926  while (!done)
3926            }            }
3927          }          }
3928        free(pmatch);        free(pmatch);
3929          goto NEXT_DATA;
3930        }        }
3931    
3932    #endif  /* !defined NOPOSIX */
3933    
3934      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3935    
3936      else  #ifdef SUPPORT_PCRE16
3937  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3938          {
3939          len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3940          switch(len)
3941            {
3942            case -1:
3943            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3944              "converted to UTF-16\n");
3945            goto NEXT_DATA;
3946    
3947            case -2:
3948            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3949              "cannot be converted to UTF-16\n");
3950            goto NEXT_DATA;
3951    
3952            case -3:
3953            fprintf(outfile, "**Failed: character value greater than 0xffff "
3954              "cannot be converted to 16-bit in non-UTF mode\n");
3955            goto NEXT_DATA;
3956    
3957            default:
3958            break;
3959            }
3960          bptr = (pcre_uint8 *)buffer16;
3961          }
3962    #endif
3963    
3964        /* Ensure that there is a JIT callback if we want to verify that JIT was
3965        actually used. If jit_stack == NULL, no stack has yet been assigned. */
3966    
3967        if (verify_jit && jit_stack == NULL && extra != NULL)
3968           { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
3969    
3970      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3971        {        {
3972        markptr = NULL;        markptr = NULL;
3973          jit_was_used = FALSE;
3974    
3975        if (timeitm > 0)        if (timeitm > 0)
3976          {          {
# Line 2634  while (!done) Line 3981  while (!done)
3981  #if !defined NODFA  #if !defined NODFA
3982          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
3983            {            {
3984            int workspace[1000];            if ((options & PCRE_DFA_RESTART) != 0)
3985                {
3986                fprintf(outfile, "Timing DFA restarts is not supported\n");
3987                break;
3988                }
3989              if (dfa_workspace == NULL)
3990                dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
3991            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3992              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,              {
3993                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3994                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets,
3995                  dfa_workspace, DFA_WS_DIMENSION);
3996                }
3997            }            }
3998          else          else
3999  #endif  #endif
4000    
4001          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
4002            count = pcre_exec(re, extra, (char *)bptr, len,            {
4003              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4004                (options | g_notempty), use_offsets, use_size_offsets);
4005              }
4006          time_taken = clock() - start_time;          time_taken = clock() - start_time;
4007          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
4008            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2655  while (!done) Line 4011  while (!done)
4011    
4012        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
4013        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
4014        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
4015          running of pcre_exec(), so disable the JIT optimization. This makes it
4016          possible to run the same set of tests with and without JIT externally
4017          requested. */
4018    
4019        if (find_match_limit)        if (find_match_limit)
4020          {          {
4021          if (extra == NULL)          if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4022            {          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4023            extra = (pcre_extra *)malloc(sizeof(pcre_extra));          extra->flags = 0;
           extra->flags = 0;  
           }  
4024    
4025          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
4026            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2687  while (!done) Line 4044  while (!done)
4044            }            }
4045          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4046          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
4047          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4048            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
4049          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4050          }          }
# Line 2698  while (!done) Line 4055  while (!done)
4055  #if !defined NODFA  #if !defined NODFA
4056        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
4057          {          {
4058          int workspace[1000];          if (dfa_workspace == NULL)
4059          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,            dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4060            options | g_notempty, use_offsets, use_size_offsets, workspace,          if (dfa_matched++ == 0)
4061            sizeof(workspace)/sizeof(int));            dfa_workspace[0] = -1;  /* To catch bad restart */
4062            PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4063              (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4064              DFA_WS_DIMENSION);
4065          if (count == 0)          if (count == 0)
4066            {            {
4067            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 2712  while (!done) Line 4072  while (!done)
4072    
4073        else        else
4074          {          {
4075          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4076            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
4077          if (count == 0)          if (count == 0)
4078            {            {
4079            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
# Line 2726  while (!done) Line 4086  while (!done)
4086        if (count >= 0)        if (count >= 0)
4087          {          {
4088          int i, maxcount;          int i, maxcount;
4089            void *cnptr, *gnptr;
4090    
4091  #if !defined NODFA  #if !defined NODFA
4092          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
# Line 2746  while (!done) Line 4107  while (!done)
4107              do_g = do_G = FALSE;        /* Break g/G loop */              do_g = do_G = FALSE;        /* Break g/G loop */
4108              }              }
4109            }            }
4110    
4111          /* do_allcaps requests showing of all captures in the pattern, to check          /* do_allcaps requests showing of all captures in the pattern, to check
4112          unset ones at the end. */          unset ones at the end. */
4113    
4114          if (do_allcaps)          if (do_allcaps)
4115            {            {
4116            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4117            count++;   /* Allow for full match */              goto SKIP_DATA;
4118            if (count * 2 > use_size_offsets) count = use_size_offsets/2;            count++;   /* Allow for full match */
4119            }            if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4120              }
4121    
4122          /* Output the captured substrings */          /* Output the captured substrings */
4123    
4124          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
4125            {            {
4126            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
4127              {              {
4128              if (use_offsets[i] != -1)              if (use_offsets[i] != -1)
4129                fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",                fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4130                  use_offsets[i], i);                  use_offsets[i], i);
4131              if (use_offsets[i+1] != -1)              if (use_offsets[i+1] != -1)
4132                fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",                fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4133                  use_offsets[i+1], i+1);                  use_offsets[i+1], i+1);
4134              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
4135              }              }
4136            else            else
4137              {              {
4138              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
4139              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr, use_offsets[i],
4140                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
4141                if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4142              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4143              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
4144                {                {
4145                fprintf(outfile, "%2d+ ", i/2);                fprintf(outfile, "%2d+ ", i/2);
4146                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4147                  outfile);                  outfile);
4148                fprintf(outfile, "\n");                fprintf(outfile, "\n");
4149                }                }
4150              }              }
4151            }            }
4152    
4153          if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);          if (markptr != NULL)
4154              {
4155              fprintf(outfile, "MK: ");
4156              PCHARSV(markptr, 0, -1, outfile);
4157              fprintf(outfile, "\n");
4158              }
4159    
4160          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4161            {            {
4162            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
4163              {              {
4164                int rc;
4165              char copybuffer[256];              char copybuffer[256];
4166              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4167                i, copybuffer, sizeof(copybuffer));                copybuffer, sizeof(copybuffer));
4168              if (rc < 0)              if (rc < 0)
4169                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4170              else              else
4171                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);                {
4172                  fprintf(outfile, "%2dC ", i);
4173                  PCHARSV(copybuffer, 0, rc, outfile);
4174                  fprintf(outfile, " (%d)\n", rc);
4175                  }
4176              }              }
4177            }            }
4178    
4179          for (copynamesptr = copynames;          cnptr = copynames;
4180               *copynamesptr != 0;          for (;;)
              copynamesptr += (int)strlen((char*)copynamesptr) + 1)  
4181            {            {
4182              int rc;
4183            char copybuffer[256];            char copybuffer[256];
4184            int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,  
4185              count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));            if (use_pcre16)
4186                {
4187                if (*(pcre_uint16 *)cnptr == 0) break;
4188                }
4189              else
4190                {
4191                if (*(pcre_uint8 *)cnptr == 0) break;
4192                }
4193    
4194              PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4195                cnptr, copybuffer, sizeof(copybuffer));
4196    
4197            if (rc < 0)            if (rc < 0)
4198              fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);              {
4199                fprintf(outfile, "copy substring ");
4200                PCHARSV(cnptr, 0, -1, outfile);
4201                fprintf(outfile, " failed %d\n", rc);
4202                }
4203            else            else
4204              fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);              {
4205                fprintf(outfile, "  C ");
4206                PCHARSV(copybuffer, 0, rc, outfile);
4207                fprintf(outfile, " (%d) ", rc);
4208                PCHARSV(cnptr, 0, -1, outfile);
4209                putc('\n', outfile);
4210                }
4211    
4212              cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4213            }            }
4214    
4215          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4216            {            {
4217            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
4218              {              {
4219                int rc;
4220              const char *substring;              const char *substring;
4221              int rc = pcre_get_substring((char *)bptr, use_offsets, count,              PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
               i, &substring);  
4222              if (rc < 0)              if (rc < 0)
4223                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
4224              else              else
4225                {                {
4226                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG ", i);
4227                pcre_free_substring(substring);                PCHARSV(substring, 0, rc, outfile);
4228                  fprintf(outfile, " (%d)\n", rc);
4229                  PCRE_FREE_SUBSTRING(substring);
4230                }                }
4231              }              }
4232            }            }
4233    
4234          for (getnamesptr = getnames;          gnptr = getnames;
4235               *getnamesptr != 0;          for (;;)
              getnamesptr += (int)strlen((char*)getnamesptr) + 1)  
4236            {            {
4237              int rc;
4238            const char *substring;            const char *substring;
4239            int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,  
4240              count, (char *)getnamesptr, &substring);            if (use_pcre16)
4241                {
4242                if (*(pcre_uint16 *)gnptr == 0) break;
4243                }
4244              else
4245                {
4246                if (*(pcre_uint8 *)gnptr == 0) break;
4247                }
4248    
4249              PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4250                gnptr, &substring);
4251            if (rc < 0)            if (rc < 0)
4252              fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);              {
4253                fprintf(outfile, "get substring ");
4254                PCHARSV(gnptr, 0, -1, outfile);
4255                fprintf(outfile, " failed %d\n", rc);
4256                }
4257            else            else
4258              {              {
4259              fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);              fprintf(outfile, "  G ");
4260              pcre_free_substring(substring);              PCHARSV(substring, 0, rc, outfile);
4261                fprintf(outfile, " (%d) ", rc);
4262                PCHARSV(gnptr, 0, -1, outfile);
4263                PCRE_FREE_SUBSTRING(substring);
4264                putc('\n', outfile);
4265              }              }
4266    
4267              gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4268            }            }
4269    
4270          if (getlist)          if (getlist)
4271            {            {
4272              int rc;
4273            const char **stringlist;            const char **stringlist;
4274            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,            PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
             &stringlist);  
4275            if (rc < 0)            if (rc < 0)
4276              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
4277            else            else
4278              {              {
4279              for (i = 0; i < count; i++)              for (i = 0; i < count; i++)
4280                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                {
4281                  fprintf(outfile, "%2dL ", i);
4282                  PCHARSV(stringlist[i], 0, -1, outfile);
4283                  putc('\n', outfile);
4284                  }
4285              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
4286                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
4287              /* free((void *)stringlist); */              PCRE_FREE_SUBSTRING_LIST(stringlist);
             pcre_free_substring_list(stringlist);  
4288              }              }
4289            }            }
4290          }          }
# Line 2873  while (!done) Line 4294  while (!done)
4294        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
4295          {          {
4296          if (markptr == NULL) fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
4297            else fprintf(outfile, "Partial match, mark=%s", markptr);          else
4298              {
4299              fprintf(outfile, "Partial match, mark=");
4300              PCHARSV(markptr, 0, -1, outfile);
4301              }
4302          if (use_size_offsets > 1)          if (use_size_offsets > 1)
4303            {            {
4304            fprintf(outfile, ": ");            fprintf(outfile, ": ");
4305            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],            PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4306              outfile);              outfile);
4307            }            }
4308            if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4309          fprintf(outfile, "\n");          fprintf(outfile, "\n");
4310          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
4311          }          }
# Line 2894  while (!done) Line 4320  while (!done)
4320        terminated by CRLF, an advance of one character just passes the \r,        terminated by CRLF, an advance of one character just passes the \r,
4321        whereas we should prefer the longer newline sequence, as does the code in        whereas we should prefer the longer newline sequence, as does the code in
4322        pcre_exec(). Fudge the offset value to achieve this. We check for a        pcre_exec(). Fudge the offset value to achieve this. We check for a
4323        newline setting in the pattern; if none was set, use pcre_config() to        newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4324        find the default.        find the default.
4325    
4326        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
# Line 2905  while (!done) Line 4331  while (!done)
4331          if (g_notempty != 0)          if (g_notempty != 0)
4332            {            {
4333            int onechar = 1;            int onechar = 1;
4334            unsigned int obits = ((real_pcre *)re)->options;            unsigned int obits = ((REAL_PCRE *)re)->options;
4335            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
4336            if ((obits & PCRE_NEWLINE_BITS) == 0)            if ((obits & PCRE_NEWLINE_BITS) == 0)
4337              {              {
4338              int d;              int d;
4339              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4340              /* Note that these values are always the ASCII ones, even in              /* Note that these values are always the ASCII ones, even in
4341              EBCDIC environments. CR = 13, NL = 10. */              EBCDIC environments. CR = 13, NL = 10. */
4342              obits = (d == 13)? PCRE_NEWLINE_CR :              obits = (d == 13)? PCRE_NEWLINE_CR :
# Line 2924  while (!done) Line 4350  while (!done)
4350                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4351                &&                &&
4352                start_offset < len - 1 &&                start_offset < len - 1 &&
4353                bptr[start_offset] == '\r' &&  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4354                bptr[start_offset+1] == '\n')                (use_pcre16?
4355                       ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4356                    && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4357                  :
4358                       bptr[start_offset] == '\r'
4359                    && bptr[start_offset + 1] == '\n')
4360    #elif defined SUPPORT_PCRE16
4361                     ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4362                  && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4363    #else
4364                     bptr[start_offset] == '\r'
4365                  && bptr[start_offset + 1] == '\n'
4366    #endif
4367                  )
4368              onechar++;              onechar++;
4369            else if (use_utf8)            else if (use_utf)
4370              {              {
4371              while (start_offset + onechar < len)              while (start_offset + onechar < len)
4372                {                {
# Line 2940  while (!done) Line 4379  while (!done)
4379          else          else
4380            {            {
4381            switch(count)            switch(count)
4382              {              {
4383              case PCRE_ERROR_NOMATCH:              case PCRE_ERROR_NOMATCH:
4384              if (gmatched == 0)              if (gmatched == 0)
4385                {                {
4386                if (markptr == NULL) fprintf(outfile, "No match\n");                if (markptr == NULL)
4387                  else fprintf(outfile, "No match, mark = %s\n", markptr);                  {
4388                    fprintf(outfile, "No match");
4389                    }
4390                  else
4391                    {
4392                    fprintf(outfile, "No match, mark = ");
4393                    PCHARSV(markptr, 0, -1, outfile);
4394                    }
4395                  if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4396                  putc('\n', outfile);
4397                }                }
4398              break;              break;
4399    
4400              case PCRE_ERROR_BADUTF8:              case PCRE_ERROR_BADUTF8:
4401              case PCRE_ERROR_SHORTUTF8:              case PCRE_ERROR_SHORTUTF8:
4402              fprintf(outfile, "Error %d (%s UTF-8 string)", count,              fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4403                (count == PCRE_ERROR_BADUTF8)? "bad" : "short");                (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4404                  use_pcre16? "16" : "8");
4405              if (use_size_offsets >= 2)              if (use_size_offsets >= 2)
4406                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4407                  use_offsets[1]);                  use_offsets[1]);
4408              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4409              break;              break;
4410    
4411                case PCRE_ERROR_BADUTF8_OFFSET:
4412                fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4413                  use_pcre16? "16" : "8");
4414                break;
4415    
4416              default:              default:
4417              if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))              if (count < 0 &&
4418                    (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4419                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4420              else              else
4421                fprintf(outfile, "Error %d (Unexpected value)\n", count);                fprintf(outfile, "Error %d (Unexpected value)\n", count);
4422              break;              break;
4423              }              }
4424    
4425            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
4426            }            }
4427          }          }
# Line 2998  while (!done) Line 4453  while (!done)
4453    
4454        else        else
4455          {          {
4456          bptr += use_offsets[1];          bptr += use_offsets[1] * CHAR_SIZE;
4457          len -= use_offsets[1];          len -= use_offsets[1];
4458          }          }
4459        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
# Line 3013  while (!done) Line 4468  while (!done)
4468  #endif  #endif
4469    
4470    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
4471    if (extra != NULL) new_free(extra);    if (extra != NULL)
4472        {
4473        PCRE_FREE_STUDY(extra);
4474        }
4475    if (locale_set)    if (locale_set)
4476      {      {
4477      new_free((void *)tables);      new_free((void *)tables);
4478      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
4479      locale_set = 0;      locale_set = 0;
4480      }      }
4481      if (jit_stack != NULL)
4482        {
4483        PCRE_JIT_STACK_FREE(jit_stack);
4484        jit_stack = NULL;
4485        }
4486    }    }
4487    
4488  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
# Line 3034  free(dbuffer); Line 4497  free(dbuffer);
4497  free(pbuffer);  free(pbuffer);
4498  free(offsets);  free(offsets);
4499    
4500    #ifdef SUPPORT_PCRE16
4501    if (buffer16 != NULL) free(buffer16);
4502    #endif
4503    
4504    #if !defined NODFA
4505    if (dfa_workspace != NULL)
4506      free(dfa_workspace);
4507    #endif
4508    
4509  return yield;  return yield;
4510  }  }
4511    

Legend:
Removed from v.645  
changed lines
  Added in v.1027

  ViewVC Help
Powered by ViewVC 1.1.5