/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 255 by ph10, Wed Sep 19 08:50:04 2007 UTC revision 922 by ph10, Mon Feb 20 18:44:42 2012 UTC
# Line 1  Line 1 
1  /*************************************************  /*.************************************************
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
51  #include "config.h"  #include "config.h"
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 59  POSSIBILITY OF SUCH DAMAGE.
59  #include <locale.h>  #include <locale.h>
60  #include <errno.h>  #include <errno.h>
61    
62    #ifdef SUPPORT_LIBREADLINE
63    #ifdef HAVE_UNISTD_H
64    #include <unistd.h>
65    #endif
66    #include <readline/readline.h>
67    #include <readline/history.h>
68    #endif
69    
70    
71  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
72  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 63  input mode under Windows. */ Line 82  input mode under Windows. */
82  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
83  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
84    
85    #ifndef isatty
86    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
87    #endif                         /* though in some environments they seem to   */
88                                   /* be already defined, hence the #ifndefs.    */
89    #ifndef fileno
90    #define fileno _fileno
91    #endif
92    
93    /* A user sent this fix for Borland Builder 5 under Windows. */
94    
95    #ifdef __BORLANDC__
96    #define _setmode(handle, mode) setmode(handle, mode)
97    #endif
98    
99    /* Not Windows */
100    
101  #else  #else
102  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
103  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 70  input mode under Windows. */ Line 105  input mode under Windows. */
105  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
106  #endif  #endif
107    
108    #define PRIV(name) name
109    
110  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
111  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 81  here before pcre_internal.h so that the Line 117  here before pcre_internal.h so that the
117  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
118    
119  #include "pcre.h"  #include "pcre.h"
120    
121    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
122    /* Configure internal macros to 16 bit mode. */
123    #define COMPILE_PCRE16
124    #endif
125    
126  #include "pcre_internal.h"  #include "pcre_internal.h"
127    
128  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* The pcre_printint() function, which prints the internal form of a compiled
129  two copies, we include the source file here, changing the names of the external  regex, is held in a separate file so that (a) it can be compiled in either
130  symbols to prevent clashes. */  8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
131    when that is compiled in debug mode. */
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
132    
133  #include "pcre_tables.c"  #ifdef SUPPORT_PCRE8
134    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
135    #endif
136    #ifdef SUPPORT_PCRE16
137    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
138    #endif
139    
140    /* We need access to some of the data tables that PCRE uses. So as not to have
141    to keep two copies, we include the source file here, changing the names of the
142    external symbols to prevent clashes. */
143    
144  /* We also need the pcre_printint() function for printing out compiled  #define PCRE_INCLUDED
145  patterns. This function is in a separate file so that it can be included in  
146  pcre_compile.c when that module is compiled with debugging enabled.  #include "pcre_tables.c"
147    
148  The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
149  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
150  contained in this file. We uses it here also, in cases when the locale has not  the same as in the printint.src file. We uses it here in cases when the locale
151  been explicitly changed, so as to get consistent output from systems that  has not been explicitly changed, so as to get consistent output from systems
152  differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
153    
154  #include "pcre_printint.src"  #ifdef EBCDIC
155    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
156    #else
157    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
158    #endif
159    
160  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
161    
162    /* Posix support is disabled in 16 bit only mode. */
163    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
164    #define NOPOSIX
165    #endif
166    
167  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
168  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 122  Makefile. */ Line 172  Makefile. */
172  #include "pcreposix.h"  #include "pcreposix.h"
173  #endif  #endif
174    
175  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
176  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
177  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
178  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
179  UTF8 support if PCRE is built without it. */  
180    #ifndef SUPPORT_UTF
181  #ifndef SUPPORT_UTF8  #ifndef NOUTF
182  #ifndef NOUTF8  #define NOUTF
 #define NOUTF8  
183  #endif  #endif
184  #endif  #endif
185    
186    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
187    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
188    only from one place and is handled differently). I couldn't dream up any way of
189    using a single macro to do this in a generic way, because of the many different
190    argument requirements. We know that at least one of SUPPORT_PCRE8 and
191    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
192    use these in the definitions of generic macros.
193    
194    **** Special note about the PCHARSxxx macros: the address of the string to be
195    printed is always given as two arguments: a base address followed by an offset.
196    The base address is cast to the correct data size for 8 or 16 bit data; the
197    offset is in units of this size. If the string were given as base+offset in one
198    argument, the casting might be incorrectly applied. */
199    
200    #ifdef SUPPORT_PCRE8
201    
202    #define PCHARS8(lv, p, offset, len, f) \
203      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
204    
205    #define PCHARSV8(p, offset, len, f) \
206      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
207    
208    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
209      p = read_capture_name8(p, cn8, re)
210    
211    #define STRLEN8(p) ((int)strlen((char *)p))
212    
213    #define SET_PCRE_CALLOUT8(callout) \
214      pcre_callout = callout
215    
216    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
217       pcre_assign_jit_stack(extra, callback, userdata)
218    
219    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
220      re = pcre_compile((char *)pat, options, error, erroffset, tables)
221    
222    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
223        namesptr, cbuffer, size) \
224      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
225        (char *)namesptr, cbuffer, size)
226    
227    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
228      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
229    
230    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
231        offsets, size_offsets, workspace, size_workspace) \
232      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
233        offsets, size_offsets, workspace, size_workspace)
234    
235    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
236        offsets, size_offsets) \
237      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
238        offsets, size_offsets)
239    
240    #define PCRE_FREE_STUDY8(extra) \
241      pcre_free_study(extra)
242    
243    #define PCRE_FREE_SUBSTRING8(substring) \
244      pcre_free_substring(substring)
245    
246    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
247      pcre_free_substring_list(listptr)
248    
249    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
250        getnamesptr, subsptr) \
251      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
252        (char *)getnamesptr, subsptr)
253    
254    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
255      n = pcre_get_stringnumber(re, (char *)ptr)
256    
257    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
258      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
259    
260    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
261      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
262    
263    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
264      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
265    
266    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
267      pcre_printint(re, outfile, debug_lengths)
268    
269    #define PCRE_STUDY8(extra, re, options, error) \
270      extra = pcre_study(re, options, error)
271    
272    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
273      pcre_jit_stack_alloc(startsize, maxsize)
274    
275    #define PCRE_JIT_STACK_FREE8(stack) \
276      pcre_jit_stack_free(stack)
277    
278    #endif /* SUPPORT_PCRE8 */
279    
280    /* -----------------------------------------------------------*/
281    
282    #ifdef SUPPORT_PCRE16
283    
284    #define PCHARS16(lv, p, offset, len, f) \
285      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
286    
287    #define PCHARSV16(p, offset, len, f) \
288      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
289    
290    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
291      p = read_capture_name16(p, cn16, re)
292    
293    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
294    
295    #define SET_PCRE_CALLOUT16(callout) \
296      pcre16_callout = (int (*)(pcre16_callout_block *))callout
297    
298    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
299      pcre16_assign_jit_stack((pcre16_extra *)extra, \
300        (pcre16_jit_callback)callback, userdata)
301    
302    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
303      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
304        tables)
305    
306    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
307        namesptr, cbuffer, size) \
308      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
309        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
310    
311    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
312      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
313        (PCRE_UCHAR16 *)cbuffer, size/2)
314    
315    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
316        offsets, size_offsets, workspace, size_workspace) \
317      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
318        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
319        workspace, size_workspace)
320    
321    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
322        offsets, size_offsets) \
323      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
324        len, start_offset, options, offsets, size_offsets)
325    
326    #define PCRE_FREE_STUDY16(extra) \
327      pcre16_free_study((pcre16_extra *)extra)
328    
329    #define PCRE_FREE_SUBSTRING16(substring) \
330      pcre16_free_substring((PCRE_SPTR16)substring)
331    
332    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
333      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
334    
335    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
336        getnamesptr, subsptr) \
337      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
338        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
339    
340    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
341      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
342    
343    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
344      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
345        (PCRE_SPTR16 *)(void*)subsptr)
346    
347    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
348      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
349        (PCRE_SPTR16 **)(void*)listptr)
350    
351    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
352      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
353        tables)
354    
355    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
356      pcre16_printint(re, outfile, debug_lengths)
357    
358    #define PCRE_STUDY16(extra, re, options, error) \
359      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
360    
361    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
362      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
363    
364    #define PCRE_JIT_STACK_FREE16(stack) \
365      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
366    
367    #endif /* SUPPORT_PCRE16 */
368    
369    
370    /* ----- Both modes are supported; a runtime test is needed, except for
371    pcre_config(), and the JIT stack functions, when it doesn't matter which
372    version is called. ----- */
373    
374    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
375    
376    #define CHAR_SIZE (use_pcre16? 2:1)
377    
378    #define PCHARS(lv, p, offset, len, f) \
379      if (use_pcre16) \
380        PCHARS16(lv, p, offset, len, f); \
381      else \
382        PCHARS8(lv, p, offset, len, f)
383    
384    #define PCHARSV(p, offset, len, f) \
385      if (use_pcre16) \
386        PCHARSV16(p, offset, len, f); \
387      else \
388        PCHARSV8(p, offset, len, f)
389    
390    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
391      if (use_pcre16) \
392        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
393      else \
394        READ_CAPTURE_NAME8(p, cn8, cn16, re)
395    
396    #define SET_PCRE_CALLOUT(callout) \
397      if (use_pcre16) \
398        SET_PCRE_CALLOUT16(callout); \
399      else \
400        SET_PCRE_CALLOUT8(callout)
401    
402    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
403    
404    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
405      if (use_pcre16) \
406        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
407      else \
408        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
409    
410    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
411      if (use_pcre16) \
412        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
413      else \
414        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
415    
416    #define PCRE_CONFIG pcre_config
417    
418    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
419        namesptr, cbuffer, size) \
420      if (use_pcre16) \
421        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
422          namesptr, cbuffer, size); \
423      else \
424        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
425          namesptr, cbuffer, size)
426    
427    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
428      if (use_pcre16) \
429        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
430      else \
431        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
432    
433    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
434        offsets, size_offsets, workspace, size_workspace) \
435      if (use_pcre16) \
436        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
437          offsets, size_offsets, workspace, size_workspace); \
438      else \
439        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
440          offsets, size_offsets, workspace, size_workspace)
441    
442    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
443        offsets, size_offsets) \
444      if (use_pcre16) \
445        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
446          offsets, size_offsets); \
447      else \
448        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
449          offsets, size_offsets)
450    
451    #define PCRE_FREE_STUDY(extra) \
452      if (use_pcre16) \
453        PCRE_FREE_STUDY16(extra); \
454      else \
455        PCRE_FREE_STUDY8(extra)
456    
457    #define PCRE_FREE_SUBSTRING(substring) \
458      if (use_pcre16) \
459        PCRE_FREE_SUBSTRING16(substring); \
460      else \
461        PCRE_FREE_SUBSTRING8(substring)
462    
463    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
464      if (use_pcre16) \
465        PCRE_FREE_SUBSTRING_LIST16(listptr); \
466      else \
467        PCRE_FREE_SUBSTRING_LIST8(listptr)
468    
469    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
470        getnamesptr, subsptr) \
471      if (use_pcre16) \
472        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
473          getnamesptr, subsptr); \
474      else \
475        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
476          getnamesptr, subsptr)
477    
478    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
479      if (use_pcre16) \
480        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
481      else \
482        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
483    
484    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
485      if (use_pcre16) \
486        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
487      else \
488        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
489    
490    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
491      if (use_pcre16) \
492        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
493      else \
494        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
495    
496    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
497      (use_pcre16 ? \
498         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
499        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
500    
501    #define PCRE_JIT_STACK_FREE(stack) \
502      if (use_pcre16) \
503        PCRE_JIT_STACK_FREE16(stack); \
504      else \
505        PCRE_JIT_STACK_FREE8(stack)
506    
507    #define PCRE_MAKETABLES \
508      (use_pcre16? pcre16_maketables() : pcre_maketables())
509    
510    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
511      if (use_pcre16) \
512        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
513      else \
514        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
515    
516    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
517      if (use_pcre16) \
518        PCRE_PRINTINT16(re, outfile, debug_lengths); \
519      else \
520        PCRE_PRINTINT8(re, outfile, debug_lengths)
521    
522    #define PCRE_STUDY(extra, re, options, error) \
523      if (use_pcre16) \
524        PCRE_STUDY16(extra, re, options, error); \
525      else \
526        PCRE_STUDY8(extra, re, options, error)
527    
528    /* ----- Only 8-bit mode is supported ----- */
529    
530    #elif defined SUPPORT_PCRE8
531    #define CHAR_SIZE                 1
532    #define PCHARS                    PCHARS8
533    #define PCHARSV                   PCHARSV8
534    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
535    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
536    #define STRLEN                    STRLEN8
537    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
538    #define PCRE_COMPILE              PCRE_COMPILE8
539    #define PCRE_CONFIG               pcre_config
540    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
541    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
542    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
543    #define PCRE_EXEC                 PCRE_EXEC8
544    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
545    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
546    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
547    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
548    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
549    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
550    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
551    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
552    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
553    #define PCRE_MAKETABLES           pcre_maketables()
554    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
555    #define PCRE_PRINTINT             PCRE_PRINTINT8
556    #define PCRE_STUDY                PCRE_STUDY8
557    
558    /* ----- Only 16-bit mode is supported ----- */
559    
560    #else
561    #define CHAR_SIZE                 2
562    #define PCHARS                    PCHARS16
563    #define PCHARSV                   PCHARSV16
564    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
565    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
566    #define STRLEN                    STRLEN16
567    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
568    #define PCRE_COMPILE              PCRE_COMPILE16
569    #define PCRE_CONFIG               pcre16_config
570    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
571    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
572    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
573    #define PCRE_EXEC                 PCRE_EXEC16
574    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
575    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
576    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
577    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
578    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
579    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
580    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
581    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
582    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
583    #define PCRE_MAKETABLES           pcre16_maketables()
584    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
585    #define PCRE_PRINTINT             PCRE_PRINTINT16
586    #define PCRE_STUDY                PCRE_STUDY16
587    #endif
588    
589    /* ----- End of mode-specific function call macros ----- */
590    
591    
592  /* Other parameters */  /* Other parameters */
593    
# Line 161  static int debug_lengths; Line 615  static int debug_lengths;
615  static int first_callout;  static int first_callout;
616  static int locale_set = 0;  static int locale_set = 0;
617  static int show_malloc;  static int show_malloc;
618  static int use_utf8;  static int use_utf;
619  static size_t gotten_store;  static size_t gotten_store;
620    static size_t first_gotten_store = 0;
621    static const unsigned char *last_callout_mark = NULL;
622    
623  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
624    
625  static int buffer_size = 50000;  static int buffer_size = 50000;
626  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
627  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
628  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
629    
630    /* Another buffer is needed translation to 16-bit character strings. It will
631    obtained and extended as required. */
632    
633    #ifdef SUPPORT_PCRE16
634    static int buffer16_size = 0;
635    static pcre_uint16 *buffer16 = NULL;
636    
637    #ifdef SUPPORT_PCRE8
638    
639    /* We need the table of operator lengths that is used for 16-bit compiling, in
640    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
641    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
642    appropriately for the 16-bit world. Just as a safety check, make sure that
643    COMPILE_PCRE16 is *not* set. */
644    
645  /*************************************************  #ifdef COMPILE_PCRE16
646  *        Read or extend an input line            *  #error COMPILE_PCRE16 must not be set when compiling pcretest.c
647  *************************************************/  #endif
648    
649  /* Input lines are read into buffer, but both patterns and data lines can be  #if LINK_SIZE == 2
650  continued over multiple input lines. In addition, if the buffer fills up, we  #undef LINK_SIZE
651  want to automatically expand it so as to be able to handle extremely large  #define LINK_SIZE 1
652  lines that are needed for certain stress tests. When the input buffer is  #elif LINK_SIZE == 3 || LINK_SIZE == 4
653  expanded, the other two buffers must also be expanded likewise, and the  #undef LINK_SIZE
654  contents of pbuffer, which are a copy of the input for callouts, must be  #define LINK_SIZE 2
655  preserved (for when expansion happens for a data line). This is not the most  #else
656  optimal way of handling this, but hey, this is just a test program!  #error LINK_SIZE must be either 2, 3, or 4
657    #endif
658    
659  Arguments:  #undef IMM2_SIZE
660    f            the file to read  #define IMM2_SIZE 1
   start        where in buffer to start (this *must* be within buffer)  
661    
662  Returns:       pointer to the start of new data  #endif /* SUPPORT_PCRE8 */
                could be a copy of start, or could be moved  
                NULL if no data read and EOF reached  
 */  
663    
664  static uschar *  static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
665  extend_inputline(FILE *f, uschar *start)  #endif  /* SUPPORT_PCRE16 */
 {  
 uschar *here = start;  
666    
667  for (;;)  /* If we have 8-bit support, default use_pcre16 to false; if there is also
668    {  16-bit support, it can be changed by an option. If there is no 8-bit support,
669    int rlen = buffer_size - (here - buffer);  there must be 16-bit support, so default it to 1. */
670    
671    if (rlen > 1000)  #ifdef SUPPORT_PCRE8
672      {  static int use_pcre16 = 0;
673      int dlen;  #else
674      if (fgets((char *)here, rlen,  f) == NULL)  static int use_pcre16 = 1;
675        return (here == start)? NULL : start;  #endif
     dlen = (int)strlen((char *)here);  
     if (dlen > 0 && here[dlen - 1] == '\n') return start;  
     here += dlen;  
     }  
676    
677    else  /* Textual explanations for runtime error codes */
     {  
     int new_buffer_size = 2*buffer_size;  
     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);  
     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);  
     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);  
678    
679      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)  static const char *errtexts[] = {
680        {    NULL,  /* 0 is no error */
681        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);    NULL,  /* NOMATCH is handled specially */
682        exit(1);    "NULL argument passed",
683        }    "bad option value",
684      "magic number missing",
685      "unknown opcode - pattern overwritten?",
686      "no more memory",
687      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
688      "match limit exceeded",
689      "callout error code",
690      NULL,  /* BADUTF8/16 is handled specially */
691      NULL,  /* BADUTF8/16 offset is handled specially */
692      NULL,  /* PARTIAL is handled specially */
693      "not used - internal error",
694      "internal error - pattern overwritten?",
695      "bad count value",
696      "item unsupported for DFA matching",
697      "backreference condition or recursion test not supported for DFA matching",
698      "match limit not supported for DFA matching",
699      "workspace size exceeded in DFA matching",
700      "too much recursion for DFA matching",
701      "recursion limit exceeded",
702      "not used - internal error",
703      "invalid combination of newline options",
704      "bad offset value",
705      NULL,  /* SHORTUTF8/16 is handled specially */
706      "nested recursion at the same subject position",
707      "JIT stack limit reached",
708      "pattern compiled in wrong mode: 8-bit/16-bit error"
709    };
710    
     memcpy(new_buffer, buffer, buffer_size);  
     memcpy(new_pbuffer, pbuffer, buffer_size);  
711    
712      buffer_size = new_buffer_size;  /*************************************************
713    *         Alternate character tables             *
714    *************************************************/
715    
716      start = new_buffer + (start - buffer);  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
717      here = new_buffer + (here - buffer);  using the default tables of the library. However, the T option can be used to
718    select alternate sets of tables, for different kinds of testing. Note also that
719    the L (locale) option also adjusts the tables. */
720    
721    /* This is the set of tables distributed as default with PCRE. It recognizes
722    only ASCII characters. */
723    
724    static const pcre_uint8 tables0[] = {
725    
726    /* This table is a lower casing table. */
727    
728        0,  1,  2,  3,  4,  5,  6,  7,
729        8,  9, 10, 11, 12, 13, 14, 15,
730       16, 17, 18, 19, 20, 21, 22, 23,
731       24, 25, 26, 27, 28, 29, 30, 31,
732       32, 33, 34, 35, 36, 37, 38, 39,
733       40, 41, 42, 43, 44, 45, 46, 47,
734       48, 49, 50, 51, 52, 53, 54, 55,
735       56, 57, 58, 59, 60, 61, 62, 63,
736       64, 97, 98, 99,100,101,102,103,
737      104,105,106,107,108,109,110,111,
738      112,113,114,115,116,117,118,119,
739      120,121,122, 91, 92, 93, 94, 95,
740       96, 97, 98, 99,100,101,102,103,
741      104,105,106,107,108,109,110,111,
742      112,113,114,115,116,117,118,119,
743      120,121,122,123,124,125,126,127,
744      128,129,130,131,132,133,134,135,
745      136,137,138,139,140,141,142,143,
746      144,145,146,147,148,149,150,151,
747      152,153,154,155,156,157,158,159,
748      160,161,162,163,164,165,166,167,
749      168,169,170,171,172,173,174,175,
750      176,177,178,179,180,181,182,183,
751      184,185,186,187,188,189,190,191,
752      192,193,194,195,196,197,198,199,
753      200,201,202,203,204,205,206,207,
754      208,209,210,211,212,213,214,215,
755      216,217,218,219,220,221,222,223,
756      224,225,226,227,228,229,230,231,
757      232,233,234,235,236,237,238,239,
758      240,241,242,243,244,245,246,247,
759      248,249,250,251,252,253,254,255,
760    
761    /* This table is a case flipping table. */
762    
763        0,  1,  2,  3,  4,  5,  6,  7,
764        8,  9, 10, 11, 12, 13, 14, 15,
765       16, 17, 18, 19, 20, 21, 22, 23,
766       24, 25, 26, 27, 28, 29, 30, 31,
767       32, 33, 34, 35, 36, 37, 38, 39,
768       40, 41, 42, 43, 44, 45, 46, 47,
769       48, 49, 50, 51, 52, 53, 54, 55,
770       56, 57, 58, 59, 60, 61, 62, 63,
771       64, 97, 98, 99,100,101,102,103,
772      104,105,106,107,108,109,110,111,
773      112,113,114,115,116,117,118,119,
774      120,121,122, 91, 92, 93, 94, 95,
775       96, 65, 66, 67, 68, 69, 70, 71,
776       72, 73, 74, 75, 76, 77, 78, 79,
777       80, 81, 82, 83, 84, 85, 86, 87,
778       88, 89, 90,123,124,125,126,127,
779      128,129,130,131,132,133,134,135,
780      136,137,138,139,140,141,142,143,
781      144,145,146,147,148,149,150,151,
782      152,153,154,155,156,157,158,159,
783      160,161,162,163,164,165,166,167,
784      168,169,170,171,172,173,174,175,
785      176,177,178,179,180,181,182,183,
786      184,185,186,187,188,189,190,191,
787      192,193,194,195,196,197,198,199,
788      200,201,202,203,204,205,206,207,
789      208,209,210,211,212,213,214,215,
790      216,217,218,219,220,221,222,223,
791      224,225,226,227,228,229,230,231,
792      232,233,234,235,236,237,238,239,
793      240,241,242,243,244,245,246,247,
794      248,249,250,251,252,253,254,255,
795    
796    /* This table contains bit maps for various character classes. Each map is 32
797    bytes long and the bits run from the least significant end of each byte. The
798    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
799    graph, print, punct, and cntrl. Other classes are built from combinations. */
800    
801      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
802      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
803      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805    
806      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
807      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
808      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
809      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810    
811      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
812      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
813      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815    
816      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
817      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
818      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820    
821      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
822      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
823      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
824      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825    
826      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
827      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
828      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
829      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830    
831      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
832      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
833      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
834      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835    
836      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
837      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
838      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
839      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840    
841      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
842      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
843      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845    
846      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
847      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850    
851    /* This table identifies various classes of character by individual bits:
852      0x01   white space character
853      0x02   letter
854      0x04   decimal digit
855      0x08   hexadecimal digit
856      0x10   alphanumeric or '_'
857      0x80   regular expression metacharacter or binary zero
858    */
859    
860      free(buffer);    0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
861      free(dbuffer);    0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
862      free(pbuffer);    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
863      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
864      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
865      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
866      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
867      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
868      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
869      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
870      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
871      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
872      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
873      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
874      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
875      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
876      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
877      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
878      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
879      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
880      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
881      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
882      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
883      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
884      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
885      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
886      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
887      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
888      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
889      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
890      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
891      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
892    
893    /* This is a set of tables that came orginally from a Windows user. It seems to
894    be at least an approximation of ISO 8859. In particular, there are characters
895    greater than 128 that are marked as spaces, letters, etc. */
896    
897    static const pcre_uint8 tables1[] = {
898    0,1,2,3,4,5,6,7,
899    8,9,10,11,12,13,14,15,
900    16,17,18,19,20,21,22,23,
901    24,25,26,27,28,29,30,31,
902    32,33,34,35,36,37,38,39,
903    40,41,42,43,44,45,46,47,
904    48,49,50,51,52,53,54,55,
905    56,57,58,59,60,61,62,63,
906    64,97,98,99,100,101,102,103,
907    104,105,106,107,108,109,110,111,
908    112,113,114,115,116,117,118,119,
909    120,121,122,91,92,93,94,95,
910    96,97,98,99,100,101,102,103,
911    104,105,106,107,108,109,110,111,
912    112,113,114,115,116,117,118,119,
913    120,121,122,123,124,125,126,127,
914    128,129,130,131,132,133,134,135,
915    136,137,138,139,140,141,142,143,
916    144,145,146,147,148,149,150,151,
917    152,153,154,155,156,157,158,159,
918    160,161,162,163,164,165,166,167,
919    168,169,170,171,172,173,174,175,
920    176,177,178,179,180,181,182,183,
921    184,185,186,187,188,189,190,191,
922    224,225,226,227,228,229,230,231,
923    232,233,234,235,236,237,238,239,
924    240,241,242,243,244,245,246,215,
925    248,249,250,251,252,253,254,223,
926    224,225,226,227,228,229,230,231,
927    232,233,234,235,236,237,238,239,
928    240,241,242,243,244,245,246,247,
929    248,249,250,251,252,253,254,255,
930    0,1,2,3,4,5,6,7,
931    8,9,10,11,12,13,14,15,
932    16,17,18,19,20,21,22,23,
933    24,25,26,27,28,29,30,31,
934    32,33,34,35,36,37,38,39,
935    40,41,42,43,44,45,46,47,
936    48,49,50,51,52,53,54,55,
937    56,57,58,59,60,61,62,63,
938    64,97,98,99,100,101,102,103,
939    104,105,106,107,108,109,110,111,
940    112,113,114,115,116,117,118,119,
941    120,121,122,91,92,93,94,95,
942    96,65,66,67,68,69,70,71,
943    72,73,74,75,76,77,78,79,
944    80,81,82,83,84,85,86,87,
945    88,89,90,123,124,125,126,127,
946    128,129,130,131,132,133,134,135,
947    136,137,138,139,140,141,142,143,
948    144,145,146,147,148,149,150,151,
949    152,153,154,155,156,157,158,159,
950    160,161,162,163,164,165,166,167,
951    168,169,170,171,172,173,174,175,
952    176,177,178,179,180,181,182,183,
953    184,185,186,187,188,189,190,191,
954    224,225,226,227,228,229,230,231,
955    232,233,234,235,236,237,238,239,
956    240,241,242,243,244,245,246,215,
957    248,249,250,251,252,253,254,223,
958    192,193,194,195,196,197,198,199,
959    200,201,202,203,204,205,206,207,
960    208,209,210,211,212,213,214,247,
961    216,217,218,219,220,221,222,255,
962    0,62,0,0,1,0,0,0,
963    0,0,0,0,0,0,0,0,
964    32,0,0,0,1,0,0,0,
965    0,0,0,0,0,0,0,0,
966    0,0,0,0,0,0,255,3,
967    126,0,0,0,126,0,0,0,
968    0,0,0,0,0,0,0,0,
969    0,0,0,0,0,0,0,0,
970    0,0,0,0,0,0,255,3,
971    0,0,0,0,0,0,0,0,
972    0,0,0,0,0,0,12,2,
973    0,0,0,0,0,0,0,0,
974    0,0,0,0,0,0,0,0,
975    254,255,255,7,0,0,0,0,
976    0,0,0,0,0,0,0,0,
977    255,255,127,127,0,0,0,0,
978    0,0,0,0,0,0,0,0,
979    0,0,0,0,254,255,255,7,
980    0,0,0,0,0,4,32,4,
981    0,0,0,128,255,255,127,255,
982    0,0,0,0,0,0,255,3,
983    254,255,255,135,254,255,255,7,
984    0,0,0,0,0,4,44,6,
985    255,255,127,255,255,255,127,255,
986    0,0,0,0,254,255,255,255,
987    255,255,255,255,255,255,255,127,
988    0,0,0,0,254,255,255,255,
989    255,255,255,255,255,255,255,255,
990    0,2,0,0,255,255,255,255,
991    255,255,255,255,255,255,255,127,
992    0,0,0,0,255,255,255,255,
993    255,255,255,255,255,255,255,255,
994    0,0,0,0,254,255,0,252,
995    1,0,0,248,1,0,0,120,
996    0,0,0,0,254,255,255,255,
997    0,0,128,0,0,0,128,0,
998    255,255,255,255,0,0,0,0,
999    0,0,0,0,0,0,0,128,
1000    255,255,255,255,0,0,0,0,
1001    0,0,0,0,0,0,0,0,
1002    128,0,0,0,0,0,0,0,
1003    0,1,1,0,1,1,0,0,
1004    0,0,0,0,0,0,0,0,
1005    0,0,0,0,0,0,0,0,
1006    1,0,0,0,128,0,0,0,
1007    128,128,128,128,0,0,128,0,
1008    28,28,28,28,28,28,28,28,
1009    28,28,0,0,0,0,0,128,
1010    0,26,26,26,26,26,26,18,
1011    18,18,18,18,18,18,18,18,
1012    18,18,18,18,18,18,18,18,
1013    18,18,18,128,128,0,128,16,
1014    0,26,26,26,26,26,26,18,
1015    18,18,18,18,18,18,18,18,
1016    18,18,18,18,18,18,18,18,
1017    18,18,18,128,128,0,0,0,
1018    0,0,0,0,0,1,0,0,
1019    0,0,0,0,0,0,0,0,
1020    0,0,0,0,0,0,0,0,
1021    0,0,0,0,0,0,0,0,
1022    1,0,0,0,0,0,0,0,
1023    0,0,18,0,0,0,0,0,
1024    0,0,20,20,0,18,0,0,
1025    0,20,18,0,0,0,0,0,
1026    18,18,18,18,18,18,18,18,
1027    18,18,18,18,18,18,18,18,
1028    18,18,18,18,18,18,18,0,
1029    18,18,18,18,18,18,18,18,
1030    18,18,18,18,18,18,18,18,
1031    18,18,18,18,18,18,18,18,
1032    18,18,18,18,18,18,18,0,
1033    18,18,18,18,18,18,18,18
1034    };
1035    
     buffer = new_buffer;  
     dbuffer = new_dbuffer;  
     pbuffer = new_pbuffer;  
     }  
   }  
1036    
 return NULL;  /* Control never gets here */  
 }  
1037    
1038    
1039    #ifndef HAVE_STRERROR
1040    /*************************************************
1041    *     Provide strerror() for non-ANSI libraries  *
1042    *************************************************/
1043    
1044    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1045    in their libraries, but can provide the same facility by this simple
1046    alternative function. */
1047    
1048    extern int   sys_nerr;
1049    extern char *sys_errlist[];
1050    
1051    char *
1052    strerror(int n)
1053    {
1054    if (n < 0 || n >= sys_nerr) return "unknown error number";
1055    return sys_errlist[n];
1056    }
1057    #endif /* HAVE_STRERROR */
1058    
1059    
1060  /*************************************************  /*************************************************
1061  *          Read number from string               *  *         JIT memory callback                    *
1062  *************************************************/  *************************************************/
1063    
1064  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  static pcre_jit_stack* jit_callback(void *arg)
 around with conditional compilation, just do the job by hand. It is only used  
 for unpicking arguments, so just keep it simple.  
   
 Arguments:  
   str           string to be converted  
   endptr        where to put the end pointer  
   
 Returns:        the unsigned long  
 */  
   
 static int  
 get_value(unsigned char *str, unsigned char **endptr)  
1065  {  {
1066  int result = 0;  return (pcre_jit_stack *)arg;
 while(*str != 0 && isspace(*str)) str++;  
 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');  
 *endptr = str;  
 return(result);  
1067  }  }
1068    
1069    
1070    #if !defined NOUTF || defined SUPPORT_PCRE16
   
1071  /*************************************************  /*************************************************
1072  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
1073  *************************************************/  *************************************************/
# Line 297  Returns:      >  0 => the number of byte Line 1083  Returns:      >  0 => the number of byte
1083                -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
1084  */  */
1085    
 #if !defined NOUTF8  
   
1086  static int  static int
1087  utf82ord(unsigned char *utf8bytes, int *vptr)  utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1088  {  {
1089  int c = *utf8bytes++;  int c = *utf8bytes++;
1090  int d = c;  int d = c;
# Line 339  if (j != i) return -(i+1); Line 1123  if (j != i) return -(i+1);
1123  *vptr = d;  *vptr = d;
1124  return i+1;  return i+1;
1125  }  }
1126    #endif /* NOUTF || SUPPORT_PCRE16 */
 #endif  
1127    
1128    
1129    
1130    #if !defined NOUTF || defined SUPPORT_PCRE16
1131  /*************************************************  /*************************************************
1132  *       Convert character value to UTF-8         *  *       Convert character value to UTF-8         *
1133  *************************************************/  *************************************************/
# Line 358  Arguments: Line 1142  Arguments:
1142  Returns:     number of characters placed in the buffer  Returns:     number of characters placed in the buffer
1143  */  */
1144    
 #if !defined NOUTF8  
   
1145  static int  static int
1146  ord2utf8(int cvalue, uschar *utf8bytes)  ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1147  {  {
1148  register int i, j;  register int i, j;
1149  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
# Line 375  for (j = i; j > 0; j--) Line 1157  for (j = i; j > 0; j--)
1157  *utf8bytes = utf8_table2[i] | cvalue;  *utf8bytes = utf8_table2[i] | cvalue;
1158  return i + 1;  return i + 1;
1159  }  }
1160    #endif
1161    
1162    
1163    #ifdef SUPPORT_PCRE16
1164    /*************************************************
1165    *         Convert a string to 16-bit             *
1166    *************************************************/
1167    
1168    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1169    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1170    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1171    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1172    result is always left in buffer16.
1173    
1174    Note that this function does not object to surrogate values. This is
1175    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1176    for the purpose of testing that they are correctly faulted.
1177    
1178    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1179    in UTF-8 so that values greater than 255 can be handled.
1180    
1181    Arguments:
1182      data       TRUE if converting a data line; FALSE for a regex
1183      p          points to a byte string
1184      utf        true if UTF-8 (to be converted to UTF-16)
1185      len        number of bytes in the string (excluding trailing zero)
1186    
1187    Returns:     number of 16-bit data items used (excluding trailing zero)
1188                 OR -1 if a UTF-8 string is malformed
1189                 OR -2 if a value > 0x10ffff is encountered
1190                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1191    */
1192    
1193    static int
1194    to16(int data, pcre_uint8 *p, int utf, int len)
1195    {
1196    pcre_uint16 *pp;
1197    
1198    if (buffer16_size < 2*len + 2)
1199      {
1200      if (buffer16 != NULL) free(buffer16);
1201      buffer16_size = 2*len + 2;
1202      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1203      if (buffer16 == NULL)
1204        {
1205        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1206        exit(1);
1207        }
1208      }
1209    
1210    pp = buffer16;
1211    
1212    if (!utf && !data)
1213      {
1214      while (len-- > 0) *pp++ = *p++;
1215      }
1216    
1217    else
1218      {
1219      int c = 0;
1220      while (len > 0)
1221        {
1222        int chlen = utf82ord(p, &c);
1223        if (chlen <= 0) return -1;
1224        if (c > 0x10ffff) return -2;
1225        p += chlen;
1226        len -= chlen;
1227        if (c < 0x10000) *pp++ = c; else
1228          {
1229          if (!utf) return -3;
1230          c -= 0x10000;
1231          *pp++ = 0xD800 | (c >> 10);
1232          *pp++ = 0xDC00 | (c & 0x3ff);
1233          }
1234        }
1235      }
1236    
1237    *pp = 0;
1238    return pp - buffer16;
1239    }
1240    #endif
1241    
1242    
1243    /*************************************************
1244    *        Read or extend an input line            *
1245    *************************************************/
1246    
1247    /* Input lines are read into buffer, but both patterns and data lines can be
1248    continued over multiple input lines. In addition, if the buffer fills up, we
1249    want to automatically expand it so as to be able to handle extremely large
1250    lines that are needed for certain stress tests. When the input buffer is
1251    expanded, the other two buffers must also be expanded likewise, and the
1252    contents of pbuffer, which are a copy of the input for callouts, must be
1253    preserved (for when expansion happens for a data line). This is not the most
1254    optimal way of handling this, but hey, this is just a test program!
1255    
1256    Arguments:
1257      f            the file to read
1258      start        where in buffer to start (this *must* be within buffer)
1259      prompt       for stdin or readline()
1260    
1261    Returns:       pointer to the start of new data
1262                   could be a copy of start, or could be moved
1263                   NULL if no data read and EOF reached
1264    */
1265    
1266    static pcre_uint8 *
1267    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1268    {
1269    pcre_uint8 *here = start;
1270    
1271    for (;;)
1272      {
1273      size_t rlen = (size_t)(buffer_size - (here - buffer));
1274    
1275      if (rlen > 1000)
1276        {
1277        int dlen;
1278    
1279        /* If libreadline support is required, use readline() to read a line if the
1280        input is a terminal. Note that readline() removes the trailing newline, so
1281        we must put it back again, to be compatible with fgets(). */
1282    
1283    #ifdef SUPPORT_LIBREADLINE
1284        if (isatty(fileno(f)))
1285          {
1286          size_t len;
1287          char *s = readline(prompt);
1288          if (s == NULL) return (here == start)? NULL : start;
1289          len = strlen(s);
1290          if (len > 0) add_history(s);
1291          if (len > rlen - 1) len = rlen - 1;
1292          memcpy(here, s, len);
1293          here[len] = '\n';
1294          here[len+1] = 0;
1295          free(s);
1296          }
1297        else
1298  #endif  #endif
1299    
1300        /* Read the next line by normal means, prompting if the file is stdin. */
1301    
1302          {
1303          if (f == stdin) printf("%s", prompt);
1304          if (fgets((char *)here, rlen,  f) == NULL)
1305            return (here == start)? NULL : start;
1306          }
1307    
1308        dlen = (int)strlen((char *)here);
1309        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1310        here += dlen;
1311        }
1312    
1313      else
1314        {
1315        int new_buffer_size = 2*buffer_size;
1316        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1317        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1318        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319    
1320        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1321          {
1322          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1323          exit(1);
1324          }
1325    
1326        memcpy(new_buffer, buffer, buffer_size);
1327        memcpy(new_pbuffer, pbuffer, buffer_size);
1328    
1329        buffer_size = new_buffer_size;
1330    
1331        start = new_buffer + (start - buffer);
1332        here = new_buffer + (here - buffer);
1333    
1334        free(buffer);
1335        free(dbuffer);
1336        free(pbuffer);
1337    
1338        buffer = new_buffer;
1339        dbuffer = new_dbuffer;
1340        pbuffer = new_pbuffer;
1341        }
1342      }
1343    
1344    return NULL;  /* Control never gets here */
1345    }
1346    
1347    
1348    
1349    /*************************************************
1350    *          Read number from string               *
1351    *************************************************/
1352    
1353    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1354    around with conditional compilation, just do the job by hand. It is only used
1355    for unpicking arguments, so just keep it simple.
1356    
1357    Arguments:
1358      str           string to be converted
1359      endptr        where to put the end pointer
1360    
1361    Returns:        the unsigned long
1362    */
1363    
1364    static int
1365    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1366    {
1367    int result = 0;
1368    while(*str != 0 && isspace(*str)) str++;
1369    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1370    *endptr = str;
1371    return(result);
1372    }
1373    
1374    
1375    
1376  /*************************************************  /*************************************************
1377  *             Print character string             *  *             Print one character                *
1378  *************************************************/  *************************************************/
1379    
1380  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Print a single character either literally, or as a hex escape. */
 mode. Yields number of characters printed. If handed a NULL file, just counts  
 chars without printing. */  
1381    
1382  static int pchars(unsigned char *p, int length, FILE *f)  static int pchar(int c, FILE *f)
1383    {
1384    if (PRINTOK(c))
1385      {
1386      if (f != NULL) fprintf(f, "%c", c);
1387      return 1;
1388      }
1389    
1390    if (c < 0x100)
1391      {
1392      if (use_utf)
1393        {
1394        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1395        return 6;
1396        }
1397      else
1398        {
1399        if (f != NULL) fprintf(f, "\\x%02x", c);
1400        return 4;
1401        }
1402      }
1403    
1404    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1405    return (c <= 0x000000ff)? 6 :
1406           (c <= 0x00000fff)? 7 :
1407           (c <= 0x0000ffff)? 8 :
1408           (c <= 0x000fffff)? 9 : 10;
1409    }
1410    
1411    
1412    
1413    #ifdef SUPPORT_PCRE8
1414    /*************************************************
1415    *         Print 8-bit character string           *
1416    *************************************************/
1417    
1418    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1419    If handed a NULL file, just counts chars without printing. */
1420    
1421    static int pchars(pcre_uint8 *p, int length, FILE *f)
1422  {  {
1423  int c = 0;  int c = 0;
1424  int yield = 0;  int yield = 0;
1425    
1426    if (length < 0)
1427      length = strlen((char *)p);
1428    
1429  while (length-- > 0)  while (length-- > 0)
1430    {    {
1431  #if !defined NOUTF8  #if !defined NOUTF
1432    if (use_utf8)    if (use_utf)
1433      {      {
1434      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
   
1435      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1436        {        {
1437        length -= rc - 1;        length -= rc - 1;
1438        p += rc;        p += rc;
1439        if (PRINTHEX(c))        yield += pchar(c, f);
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
1440        continue;        continue;
1441        }        }
1442      }      }
1443  #endif  #endif
1444      c = *p++;
1445      yield += pchar(c, f);
1446      }
1447    
1448     /* Not UTF-8, or malformed UTF-8  */  return yield;
1449    }
1450    #endif
1451    
1452    c = *p++;  
1453    if (PRINTHEX(c))  
1454      {  #ifdef SUPPORT_PCRE16
1455      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1456      yield++;  *    Find length of 0-terminated 16-bit string   *
1457      }  *************************************************/
1458    else  
1459    static int strlen16(PCRE_SPTR16 p)
1460    {
1461    int len = 0;
1462    while (*p++ != 0) len++;
1463    return len;
1464    }
1465    #endif  /* SUPPORT_PCRE16 */
1466    
1467    
1468    #ifdef SUPPORT_PCRE16
1469    /*************************************************
1470    *           Print 16-bit character string        *
1471    *************************************************/
1472    
1473    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1474    If handed a NULL file, just counts chars without printing. */
1475    
1476    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1477    {
1478    int yield = 0;
1479    
1480    if (length < 0)
1481      length = strlen16(p);
1482    
1483    while (length-- > 0)
1484      {
1485      int c = *p++ & 0xffff;
1486    #if !defined NOUTF
1487      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1488      {      {
1489      if (f != NULL) fprintf(f, "\\x%02x", c);      int d = *p & 0xffff;
1490      yield += 4;      if (d >= 0xDC00 && d < 0xDFFF)
1491          {
1492          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1493          length--;
1494          p++;
1495          }
1496      }      }
1497    #endif
1498      yield += pchar(c, f);
1499    }    }
1500    
1501  return yield;  return yield;
1502  }  }
1503    #endif  /* SUPPORT_PCRE16 */
1504    
1505    
1506    
1507    #ifdef SUPPORT_PCRE8
1508    /*************************************************
1509    *     Read a capture name (8-bit) and check it   *
1510    *************************************************/
1511    
1512    static pcre_uint8 *
1513    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1514    {
1515    pcre_uint8 *npp = *pp;
1516    while (isalnum(*p)) *npp++ = *p++;
1517    *npp++ = 0;
1518    *npp = 0;
1519    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1520      {
1521      fprintf(outfile, "no parentheses with name \"");
1522      PCHARSV(*pp, 0, -1, outfile);
1523      fprintf(outfile, "\"\n");
1524      }
1525    
1526    *pp = npp;
1527    return p;
1528    }
1529    #endif  /* SUPPORT_PCRE8 */
1530    
1531    
1532    
1533    #ifdef SUPPORT_PCRE16
1534    /*************************************************
1535    *     Read a capture name (16-bit) and check it  *
1536    *************************************************/
1537    
1538    /* Note that the text being read is 8-bit. */
1539    
1540    static pcre_uint8 *
1541    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1542    {
1543    pcre_uint16 *npp = *pp;
1544    while (isalnum(*p)) *npp++ = *p++;
1545    *npp++ = 0;
1546    *npp = 0;
1547    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1548      {
1549      fprintf(outfile, "no parentheses with name \"");
1550      PCHARSV(*pp, 0, -1, outfile);
1551      fprintf(outfile, "\"\n");
1552      }
1553    *pp = npp;
1554    return p;
1555    }
1556    #endif  /* SUPPORT_PCRE16 */
1557    
1558    
1559    
# Line 468  if (callout_extra) Line 1582  if (callout_extra)
1582      else      else
1583        {        {
1584        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1585        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1586          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1587        fprintf(f, "\n");        fprintf(f, "\n");
1588        }        }
# Line 481  printed lengths of the substrings. */ Line 1595  printed lengths of the substrings. */
1595    
1596  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1597    
1598  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1599  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1600    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1601    
1602  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1603    
1604  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1605    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1606    
1607  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 524  fprintf(outfile, "%.*s", (cb->next_item_ Line 1638  fprintf(outfile, "%.*s", (cb->next_item_
1638  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1639  first_callout = 0;  first_callout = 0;
1640    
1641    if (cb->mark != last_callout_mark)
1642      {
1643      if (cb->mark == NULL)
1644        fprintf(outfile, "Latest Mark: <unset>\n");
1645      else
1646        {
1647        fprintf(outfile, "Latest Mark: ");
1648        PCHARSV(cb->mark, 0, -1, outfile);
1649        putc('\n', outfile);
1650        }
1651      last_callout_mark = cb->mark;
1652      }
1653    
1654  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1655    {    {
1656    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 543  return (cb->callout_number != callout_fa Line 1670  return (cb->callout_number != callout_fa
1670  *            Local malloc functions              *  *            Local malloc functions              *
1671  *************************************************/  *************************************************/
1672    
1673  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1674  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1675    show_malloc variable is set only during matching. */
1676    
1677  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1678  {  {
1679  void *block = malloc(size);  void *block = malloc(size);
1680  gotten_store = size;  gotten_store = size;
1681    if (first_gotten_store == 0) first_gotten_store = size;
1682  if (show_malloc)  if (show_malloc)
1683    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1684  return block;  return block;
# Line 562  if (show_malloc) Line 1691  if (show_malloc)
1691  free(block);  free(block);
1692  }  }
1693    
   
1694  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1695    
1696  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 585  free(block); Line 1713  free(block);
1713  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1714  *************************************************/  *************************************************/
1715    
1716  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1717    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1718    value, but the code is defensive.
1719    
1720    Arguments:
1721      re        compiled regex
1722      study     study data
1723      option    PCRE_INFO_xxx option
1724      ptr       where to put the data
1725    
1726  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  Returns:    0 when OK, < 0 on error
1727    */
1728    
1729    static int
1730    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1731  {  {
1732  int rc;  int rc;
1733  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1734    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1735    #ifdef SUPPORT_PCRE16
1736      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1737    #else
1738      rc = PCRE_ERROR_BADMODE;
1739    #endif
1740    else
1741    #ifdef SUPPORT_PCRE8
1742      rc = pcre_fullinfo(re, study, option, ptr);
1743    #else
1744      rc = PCRE_ERROR_BADMODE;
1745    #endif
1746    
1747    if (rc < 0)
1748      {
1749      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1750        use_pcre16? "16" : "", option);
1751      if (rc == PCRE_ERROR_BADMODE)
1752        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1753          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1754      }
1755    
1756    return rc;
1757  }  }
1758    
1759    
1760    
1761  /*************************************************  /*************************************************
1762  *         Byte flipping function                 *  *             Swap byte functions                *
1763  *************************************************/  *************************************************/
1764    
1765  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1766  byteflip(unsigned long int value, int n)  value, respectively.
1767    
1768    Arguments:
1769      value        any number
1770    
1771    Returns:       the byte swapped value
1772    */
1773    
1774    static pcre_uint32
1775    swap_uint32(pcre_uint32 value)
1776  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1777  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1778         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1779         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1780         ((value & 0xff000000) >> 24);         (value >> 24);
1781  }  }
1782    
1783    static pcre_uint16
1784    swap_uint16(pcre_uint16 value)
1785    {
1786    return (value >> 8) | (value << 8);
1787    }
1788    
1789    
1790    
1791    /*************************************************
1792    *        Flip bytes in a compiled pattern        *
1793    *************************************************/
1794    
1795    /* This function is called if the 'F' option was present on a pattern that is
1796    to be written to a file. We flip the bytes of all the integer fields in the
1797    regex data block and the study block. In 16-bit mode this also flips relevant
1798    bytes in the pattern itself. This is to make it possible to test PCRE's
1799    ability to reload byte-flipped patterns, e.g. those compiled on a different
1800    architecture. */
1801    
1802    static void
1803    regexflip(pcre *ere, pcre_extra *extra)
1804    {
1805    REAL_PCRE *re = (REAL_PCRE *)ere;
1806    #ifdef SUPPORT_PCRE16
1807    int op;
1808    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1809    int length = re->name_count * re->name_entry_size;
1810    #ifdef SUPPORT_UTF
1811    BOOL utf = (re->options & PCRE_UTF16) != 0;
1812    BOOL utf16_char = FALSE;
1813    #endif /* SUPPORT_UTF */
1814    #endif /* SUPPORT_PCRE16 */
1815    
1816    /* Always flip the bytes in the main data block and study blocks. */
1817    
1818    re->magic_number = REVERSED_MAGIC_NUMBER;
1819    re->size = swap_uint32(re->size);
1820    re->options = swap_uint32(re->options);
1821    re->flags = swap_uint16(re->flags);
1822    re->top_bracket = swap_uint16(re->top_bracket);
1823    re->top_backref = swap_uint16(re->top_backref);
1824    re->first_char = swap_uint16(re->first_char);
1825    re->req_char = swap_uint16(re->req_char);
1826    re->name_table_offset = swap_uint16(re->name_table_offset);
1827    re->name_entry_size = swap_uint16(re->name_entry_size);
1828    re->name_count = swap_uint16(re->name_count);
1829    
1830    if (extra != NULL)
1831      {
1832      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1833      rsd->size = swap_uint32(rsd->size);
1834      rsd->flags = swap_uint32(rsd->flags);
1835      rsd->minlength = swap_uint32(rsd->minlength);
1836      }
1837    
1838    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1839    in the name table, if present, and then in the pattern itself. */
1840    
1841    #ifdef SUPPORT_PCRE16
1842    if (!use_pcre16) return;
1843    
1844    while(TRUE)
1845      {
1846      /* Swap previous characters. */
1847      while (length-- > 0)
1848        {
1849        *ptr = swap_uint16(*ptr);
1850        ptr++;
1851        }
1852    #ifdef SUPPORT_UTF
1853      if (utf16_char)
1854        {
1855        if ((ptr[-1] & 0xfc00) == 0xd800)
1856          {
1857          /* We know that there is only one extra character in UTF-16. */
1858          *ptr = swap_uint16(*ptr);
1859          ptr++;
1860          }
1861        }
1862      utf16_char = FALSE;
1863    #endif /* SUPPORT_UTF */
1864    
1865      /* Get next opcode. */
1866    
1867      length = 0;
1868      op = *ptr;
1869      *ptr++ = swap_uint16(op);
1870    
1871      switch (op)
1872        {
1873        case OP_END:
1874        return;
1875    
1876    #ifdef SUPPORT_UTF
1877        case OP_CHAR:
1878        case OP_CHARI:
1879        case OP_NOT:
1880        case OP_NOTI:
1881        case OP_STAR:
1882        case OP_MINSTAR:
1883        case OP_PLUS:
1884        case OP_MINPLUS:
1885        case OP_QUERY:
1886        case OP_MINQUERY:
1887        case OP_UPTO:
1888        case OP_MINUPTO:
1889        case OP_EXACT:
1890        case OP_POSSTAR:
1891        case OP_POSPLUS:
1892        case OP_POSQUERY:
1893        case OP_POSUPTO:
1894        case OP_STARI:
1895        case OP_MINSTARI:
1896        case OP_PLUSI:
1897        case OP_MINPLUSI:
1898        case OP_QUERYI:
1899        case OP_MINQUERYI:
1900        case OP_UPTOI:
1901        case OP_MINUPTOI:
1902        case OP_EXACTI:
1903        case OP_POSSTARI:
1904        case OP_POSPLUSI:
1905        case OP_POSQUERYI:
1906        case OP_POSUPTOI:
1907        case OP_NOTSTAR:
1908        case OP_NOTMINSTAR:
1909        case OP_NOTPLUS:
1910        case OP_NOTMINPLUS:
1911        case OP_NOTQUERY:
1912        case OP_NOTMINQUERY:
1913        case OP_NOTUPTO:
1914        case OP_NOTMINUPTO:
1915        case OP_NOTEXACT:
1916        case OP_NOTPOSSTAR:
1917        case OP_NOTPOSPLUS:
1918        case OP_NOTPOSQUERY:
1919        case OP_NOTPOSUPTO:
1920        case OP_NOTSTARI:
1921        case OP_NOTMINSTARI:
1922        case OP_NOTPLUSI:
1923        case OP_NOTMINPLUSI:
1924        case OP_NOTQUERYI:
1925        case OP_NOTMINQUERYI:
1926        case OP_NOTUPTOI:
1927        case OP_NOTMINUPTOI:
1928        case OP_NOTEXACTI:
1929        case OP_NOTPOSSTARI:
1930        case OP_NOTPOSPLUSI:
1931        case OP_NOTPOSQUERYI:
1932        case OP_NOTPOSUPTOI:
1933        if (utf) utf16_char = TRUE;
1934    #endif
1935        /* Fall through. */
1936    
1937        default:
1938        length = OP_lengths16[op] - 1;
1939        break;
1940    
1941        case OP_CLASS:
1942        case OP_NCLASS:
1943        /* Skip the character bit map. */
1944        ptr += 32/sizeof(pcre_uint16);
1945        length = 0;
1946        break;
1947    
1948        case OP_XCLASS:
1949        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1950        if (LINK_SIZE > 1)
1951          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1952            - (1 + LINK_SIZE + 1));
1953        else
1954          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1955    
1956        /* Reverse the size of the XCLASS instance. */
1957        *ptr = swap_uint16(*ptr);
1958        ptr++;
1959        if (LINK_SIZE > 1)
1960          {
1961          *ptr = swap_uint16(*ptr);
1962          ptr++;
1963          }
1964    
1965        op = *ptr;
1966        *ptr = swap_uint16(op);
1967        ptr++;
1968        if ((op & XCL_MAP) != 0)
1969          {
1970          /* Skip the character bit map. */
1971          ptr += 32/sizeof(pcre_uint16);
1972          length -= 32/sizeof(pcre_uint16);
1973          }
1974        break;
1975        }
1976      }
1977    /* Control should never reach here in 16 bit mode. */
1978    #endif /* SUPPORT_PCRE16 */
1979    }
1980    
1981    
1982    
# Line 618  return ((value & 0x000000ff) << 24) | Line 1985  return ((value & 0x000000ff) << 24) |
1985  *************************************************/  *************************************************/
1986    
1987  static int  static int
1988  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1989    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
1990    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
1991  {  {
# Line 633  for (;;) Line 2000  for (;;)
2000    {    {
2001    *limit = mid;    *limit = mid;
2002    
2003    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2004      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2005    
2006    if (count == errnumber)    if (count == errnumber)
# Line 678  Returns:    < 0, = 0, or > 0, according Line 2045  Returns:    < 0, = 0, or > 0, according
2045  */  */
2046    
2047  static int  static int
2048  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2049  {  {
2050  while (n--)  while (n--)
2051    {    {
# Line 694  return 0; Line 2061  return 0;
2061  *         Check newline indicator                *  *         Check newline indicator                *
2062  *************************************************/  *************************************************/
2063    
2064  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
2065  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
2066    
2067  Arguments:  Arguments:
2068    p           points after the leading '<'    p           points after the leading '<'
# Line 706  Returns:      appropriate PCRE_NEWLINE_x Line 2072  Returns:      appropriate PCRE_NEWLINE_x
2072  */  */
2073    
2074  static int  static int
2075  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2076  {  {
2077  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2078  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2079  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2080  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2081  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2082  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2083  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2084  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2085  return 0;  return 0;
2086  }  }
# Line 728  return 0; Line 2094  return 0;
2094  static void  static void
2095  usage(void)  usage(void)
2096  {  {
2097  printf("Usage:     pcretest [options] [<input> [<output>]]\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2098  printf("  -b       show compiled code (bytecode)\n");  printf("Input and output default to stdin and stdout.\n");
2099    #ifdef SUPPORT_LIBREADLINE
2100    printf("If input is a terminal, readline() is used to read from it.\n");
2101    #else
2102    printf("This version of pcretest is not linked with readline().\n");
2103    #endif
2104    printf("\nOptions:\n");
2105    #ifdef SUPPORT_PCRE16
2106    printf("  -16      use the 16-bit library\n");
2107    #endif
2108    printf("  -b       show compiled code\n");
2109  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2110    printf("  -C arg   show a specific compile-time option\n");
2111    printf("           and exit with its value. The arg can be:\n");
2112    printf("     linksize     internal link size [2, 3, 4]\n");
2113    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2114    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2115    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2116    printf("     ucp          Unicode Properties supported [0, 1]\n");
2117    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2118    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2119  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2120  #if !defined NODFA  #if !defined NODFA
2121  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
2122  #endif  #endif
2123  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
2124  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
2125           "  -M       find MATCH_LIMIT minimum for each subject\n"
2126         "  -m       output memory used information\n"         "  -m       output memory used information\n"
2127         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
2128  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 744  printf("  -p       use POSIX interface\n Line 2130  printf("  -p       use POSIX interface\n
2130  #endif  #endif
2131  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
2132  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2133  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
2134           "  -s+      force each pattern to be studied, using JIT if available\n"
2135           "  -s++     ditto, verifying when JIT was actually used\n"
2136         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2137  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2138  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 764  options, followed by a set of test data, Line 2152  options, followed by a set of test data,
2152  int main(int argc, char **argv)  int main(int argc, char **argv)
2153  {  {
2154  FILE *infile = stdin;  FILE *infile = stdin;
2155    const char *version;
2156  int options = 0;  int options = 0;
2157  int study_options = 0;  int study_options = 0;
2158    int default_find_match_limit = FALSE;
2159  int op = 1;  int op = 1;
2160  int timeit = 0;  int timeit = 0;
2161  int timeitm = 0;  int timeitm = 0;
2162  int showinfo = 0;  int showinfo = 0;
2163  int showstore = 0;  int showstore = 0;
2164    int force_study = -1;
2165    int force_study_options = 0;
2166  int quiet = 0;  int quiet = 0;
2167  int size_offsets = 45;  int size_offsets = 45;
2168  int size_offsets_max;  int size_offsets_max;
# Line 781  int posix = 0; Line 2173  int posix = 0;
2173  int debug = 0;  int debug = 0;
2174  int done = 0;  int done = 0;
2175  int all_use_dfa = 0;  int all_use_dfa = 0;
2176    int verify_jit = 0;
2177  int yield = 0;  int yield = 0;
2178  int stack_size;  int stack_size;
2179    
2180  /* These vectors store, end-to-end, a list of captured substring names. Assume  pcre_jit_stack *jit_stack = NULL;
 that 1024 is plenty long enough for the few names we'll be testing. */  
   
 uschar copynames[1024];  
 uschar getnames[1024];  
2181    
2182  uschar *copynamesptr;  /* These vectors store, end-to-end, a list of zero-terminated captured
2183  uschar *getnamesptr;  substring names, each list itself being terminated by an empty name. Assume
2184    that 1024 is plenty long enough for the few names we'll be testing. It is
2185    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2186    for the actual memory, to ensure alignment. */
2187    
2188    pcre_uint16 copynames[1024];
2189    pcre_uint16 getnames[1024];
2190    
2191    #ifdef SUPPORT_PCRE16
2192    pcre_uint16 *cn16ptr;
2193    pcre_uint16 *gn16ptr;
2194    #endif
2195    
2196  /* Get buffers from malloc() so that Electric Fence will check their misuse  #ifdef SUPPORT_PCRE8
2197  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2198    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2199    pcre_uint8 *cn8ptr;
2200    pcre_uint8 *gn8ptr;
2201    #endif
2202    
2203  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2204  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
2205  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
2206    
2207    buffer = (pcre_uint8 *)malloc(buffer_size);
2208    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2209    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2210    
2211  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2212    
# Line 813  it set 0x8000, but then I was advised th Line 2221  it set 0x8000, but then I was advised th
2221  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2222  #endif  #endif
2223    
2224    /* Get the version number: both pcre_version() and pcre16_version() give the
2225    same answer. We just need to ensure that we call one that is available. */
2226    
2227    #ifdef SUPPORT_PCRE8
2228    version = pcre_version();
2229    #else
2230    version = pcre16_version();
2231    #endif
2232    
2233  /* Scan options */  /* Scan options */
2234    
2235  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2236    {    {
2237    unsigned char *endptr;    pcre_uint8 *endptr;
2238      char *arg = argv[op];
2239    
2240      if (strcmp(arg, "-m") == 0) showstore = 1;
2241      else if (strcmp(arg, "-s") == 0) force_study = 0;
2242    
2243      else if (strncmp(arg, "-s+", 3) == 0)
2244        {
2245        arg += 3;
2246        if (*arg == '+') { arg++; verify_jit = TRUE; }
2247    
2248    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)      if (*arg != 0) goto BAD_ARG;
2249      showstore = 1;  
2250    else if (strcmp(argv[op], "-q") == 0) quiet = 1;      force_study = 1;
2251    else if (strcmp(argv[op], "-b") == 0) debug = 1;      force_study_options = PCRE_STUDY_JIT_COMPILE
2252    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;                          | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
2253    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;                          | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
2254        }
2255      else if (strcmp(arg, "-16") == 0)
2256        {
2257    #ifdef SUPPORT_PCRE16
2258        use_pcre16 = 1;
2259    #else
2260        printf("** This version of PCRE was built without 16-bit support\n");
2261        exit(1);
2262    #endif
2263        }
2264      else if (strcmp(arg, "-q") == 0) quiet = 1;
2265      else if (strcmp(arg, "-b") == 0) debug = 1;
2266      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2267      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2268      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2269  #if !defined NODFA  #if !defined NODFA
2270    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2271  #endif  #endif
2272    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2273        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2274          *endptr == 0))          *endptr == 0))
2275      {      {
2276      op++;      op++;
2277      argc--;      argc--;
2278      }      }
2279    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2280      {      {
2281      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2282      int temp;      int temp;
2283      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2284                       *endptr == 0))                       *endptr == 0))
2285        {        {
2286        timeitm = temp;        timeitm = temp;
# Line 849  while (argc > 1 && argv[op][0] == '-') Line 2290  while (argc > 1 && argv[op][0] == '-')
2290      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2291      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2292      }      }
2293    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2294        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2295          *endptr == 0))          *endptr == 0))
2296      {      {
2297  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2298      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2299      exit(1);      exit(1);
2300  #else  #else
# Line 872  while (argc > 1 && argv[op][0] == '-') Line 2313  while (argc > 1 && argv[op][0] == '-')
2313  #endif  #endif
2314      }      }
2315  #if !defined NOPOSIX  #if !defined NOPOSIX
2316    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2317  #endif  #endif
2318    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2319      {      {
2320      int rc;      int rc;
2321      printf("PCRE version %s\n", pcre_version());      unsigned long int lrc;
2322    
2323        if (argc > 2)
2324          {
2325          if (strcmp(argv[op + 1], "linksize") == 0)
2326            {
2327            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2328            printf("%d\n", rc);
2329            yield = rc;
2330            goto EXIT;
2331            }
2332          if (strcmp(argv[op + 1], "pcre8") == 0)
2333            {
2334    #ifdef SUPPORT_PCRE8
2335            printf("1\n");
2336            yield = 1;
2337    #else
2338            printf("0\n");
2339            yield = 0;
2340    #endif
2341            goto EXIT;
2342            }
2343          if (strcmp(argv[op + 1], "pcre16") == 0)
2344            {
2345    #ifdef SUPPORT_PCRE16
2346            printf("1\n");
2347            yield = 1;
2348    #else
2349            printf("0\n");
2350            yield = 0;
2351    #endif
2352            goto EXIT;
2353            }
2354          if (strcmp(argv[op + 1], "utf") == 0)
2355            {
2356    #ifdef SUPPORT_PCRE8
2357            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2358            printf("%d\n", rc);
2359            yield = rc;
2360    #else
2361            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2362            printf("%d\n", rc);
2363            yield = rc;
2364    #endif
2365            goto EXIT;
2366            }
2367          if (strcmp(argv[op + 1], "ucp") == 0)
2368            {
2369            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2370            printf("%d\n", rc);
2371            yield = rc;
2372            goto EXIT;
2373            }
2374          if (strcmp(argv[op + 1], "jit") == 0)
2375            {
2376            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2377            printf("%d\n", rc);
2378            yield = rc;
2379            goto EXIT;
2380            }
2381          if (strcmp(argv[op + 1], "newline") == 0)
2382            {
2383            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2384            /* Note that these values are always the ASCII values, even
2385            in EBCDIC environments. CR is 13 and NL is 10. */
2386            printf("%s\n", (rc == 13)? "CR" :
2387              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2388              (rc == -2)? "ANYCRLF" :
2389              (rc == -1)? "ANY" : "???");
2390            goto EXIT;
2391            }
2392          printf("Unknown -C option: %s\n", argv[op + 1]);
2393          goto EXIT;
2394          }
2395    
2396        printf("PCRE version %s\n", version);
2397      printf("Compiled with\n");      printf("Compiled with\n");
2398    
2399    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2400    are set, either both UTFs are supported or both are not supported. */
2401    
2402    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2403        printf("  8-bit and 16-bit support\n");
2404        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2405        if (rc)
2406          printf("  UTF-8 and UTF-16 support\n");
2407        else
2408          printf("  No UTF-8 or UTF-16 support\n");
2409    #elif defined SUPPORT_PCRE8
2410        printf("  8-bit support only\n");
2411      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2412      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2413      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2414        printf("  16-bit support only\n");
2415        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2416        printf("  %sUTF-16 support\n", rc? "" : "No ");
2417    #endif
2418    
2419        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2420      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2421      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2422      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      if (rc)
2423        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        {
2424          const char *arch;
2425          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2426          printf("  Just-in-time compiler support: %s\n", arch);
2427          }
2428        else
2429          printf("  No just-in-time compiler support\n");
2430        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2431        /* Note that these values are always the ASCII values, even
2432        in EBCDIC environments. CR is 13 and NL is 10. */
2433        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2434          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2435        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
2436        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2437      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2438      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2439                                       "all Unicode newlines");                                       "all Unicode newlines");
2440      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2441      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2442      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2443      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2444      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2445      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
2446      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2447      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
2448      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2449      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
2450        if (showstore)
2451          {
2452          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2453          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2454          }
2455        printf("\n");
2456      goto EXIT;      goto EXIT;
2457      }      }
2458    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
2459             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
2460      {      {
2461      usage();      usage();
2462      goto EXIT;      goto EXIT;
2463      }      }
2464    else    else
2465      {      {
2466      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
2467        printf("** Unknown or malformed option %s\n", arg);
2468      usage();      usage();
2469      yield = 1;      yield = 1;
2470      goto EXIT;      goto EXIT;
# Line 958  if (argc > 2) Line 2511  if (argc > 2)
2511    
2512  /* Set alternative malloc function */  /* Set alternative malloc function */
2513    
2514    #ifdef SUPPORT_PCRE8
2515  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2516  pcre_free = new_free;  pcre_free = new_free;
2517  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2518  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2519    #endif
2520    
2521    #ifdef SUPPORT_PCRE16
2522    pcre16_malloc = new_malloc;
2523    pcre16_free = new_free;
2524    pcre16_stack_malloc = stack_malloc;
2525    pcre16_stack_free = stack_free;
2526    #endif
2527    
2528  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2529    
2530  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2531    
2532  /* Main loop */  /* Main loop */
2533    
# Line 980  while (!done) Line 2542  while (!done)
2542  #endif  #endif
2543    
2544    const char *error;    const char *error;
2545    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
2546    unsigned char *to_file = NULL;    pcre_uint8 *p, *pp, *ppp;
2547    const unsigned char *tables = NULL;    pcre_uint8 *to_file = NULL;
2548      const pcre_uint8 *tables = NULL;
2549      unsigned long int get_options;
2550    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2551    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2552      int do_allcaps = 0;
2553      int do_mark = 0;
2554    int do_study = 0;    int do_study = 0;
2555      int no_force_study = 0;
2556    int do_debug = debug;    int do_debug = debug;
2557    int do_G = 0;    int do_G = 0;
2558    int do_g = 0;    int do_g = 0;
2559    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2560    int do_showrest = 0;    int do_showrest = 0;
2561      int do_showcaprest = 0;
2562    int do_flip = 0;    int do_flip = 0;
2563    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2564    
2565    use_utf8 = 0;    use_utf = 0;
2566    debug_lengths = 1;    debug_lengths = 1;
2567    
2568    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (extend_inputline(infile, buffer) == NULL) break;  
2569    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2570    fflush(outfile);    fflush(outfile);
2571    
# Line 1010  while (!done) Line 2577  while (!done)
2577    
2578    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2579      {      {
2580      unsigned long int magic, get_options;      pcre_uint32 magic;
2581      uschar sbuf[8];      pcre_uint8 sbuf[8];
2582      FILE *f;      FILE *f;
2583    
2584      p++;      p++;
2585        if (*p == '!')
2586          {
2587          do_debug = TRUE;
2588          do_showinfo = TRUE;
2589          p++;
2590          }
2591    
2592      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2593      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2594      *pp = 0;      *pp = 0;
# Line 1026  while (!done) Line 2600  while (!done)
2600        continue;        continue;
2601        }        }
2602    
2603        first_gotten_store = 0;
2604      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2605    
2606      true_size =      true_size =
# Line 1033  while (!done) Line 2608  while (!done)
2608      true_study_size =      true_study_size =
2609        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2610    
2611      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
2612      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2613    
2614      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2615    
2616      magic = ((real_pcre *)re)->magic_number;      magic = ((REAL_PCRE *)re)->magic_number;
2617      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2618        {        {
2619        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2620          {          {
2621          do_flip = 1;          do_flip = 1;
2622          }          }
# Line 1053  while (!done) Line 2628  while (!done)
2628          }          }
2629        }        }
2630    
2631      fprintf(outfile, "Compiled regex%s loaded from %s\n",      /* We hide the byte-invert info for little and big endian tests. */
2632        do_flip? " (byte-inverted)" : "", p);      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2633          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
     /* Need to know if UTF-8 for printing data strings */  
2634    
2635      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      /* Now see if there is any following study data. */
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
   
     /* Now see if there is any following study data */  
2636    
2637      if (true_study_size != 0)      if (true_study_size != 0)
2638        {        {
# Line 1077  while (!done) Line 2648  while (!done)
2648          {          {
2649          FAIL_READ:          FAIL_READ:
2650          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2651          if (extra != NULL) new_free(extra);          if (extra != NULL)
2652              {
2653              PCRE_FREE_STUDY(extra);
2654              }
2655          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
2656          fclose(f);          fclose(f);
2657          continue;          continue;
# Line 1087  while (!done) Line 2661  while (!done)
2661        }        }
2662      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2663    
2664        /* Flip the necessary bytes. */
2665        if (do_flip)
2666          {
2667          int rc;
2668          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2669          if (rc == PCRE_ERROR_BADMODE)
2670            {
2671            /* Simulate the result of the function call below. */
2672            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2673              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2674            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2675              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2676            continue;
2677            }
2678          }
2679    
2680        /* Need to know if UTF-8 for printing data strings. */
2681    
2682        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2683        use_utf = (get_options & PCRE_UTF8) != 0;
2684    
2685      fclose(f);      fclose(f);
2686      goto SHOW_INFO;      goto SHOW_INFO;
2687      }      }
2688    
2689    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2690    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2691    
2692    delimiter = *p++;    delimiter = *p++;
2693    
2694    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
2695      {      {
2696      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2697      goto SKIP_DATA;      goto SKIP_DATA;
2698      }      }
2699    
2700    pp = p;    pp = p;
2701    poffset = p - buffer;    poffset = (int)(p - buffer);
2702    
2703    for(;;)    for(;;)
2704      {      {
# Line 1114  while (!done) Line 2709  while (!done)
2709        pp++;        pp++;
2710        }        }
2711      if (*pp != 0) break;      if (*pp != 0) break;
2712      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if ((pp = extend_inputline(infile, pp)) == NULL)  
2713        {        {
2714        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
2715        done = 1;        done = 1;
# Line 1158  while (!done) Line 2752  while (!done)
2752        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2753        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2754    
2755        case '+': do_showrest = 1; break;        case '+':
2756          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2757          break;
2758    
2759          case '=': do_allcaps = 1; break;
2760        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2761        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2762        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1168  while (!done) Line 2766  while (!done)
2766        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2767        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2768        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
2769          case 'K': do_mark = 1; break;
2770        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2771        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2772    
# Line 1175  while (!done) Line 2774  while (!done)
2774        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2775  #endif  #endif
2776    
2777        case 'S': do_study = 1; break;        case 'S':
2778          if (do_study == 0)
2779            {
2780            do_study = 1;
2781            if (*pp == '+')
2782              {
2783              if (*(++pp) == '+')
2784                {
2785                verify_jit = TRUE;
2786                pp++;
2787                }
2788              study_options |= PCRE_STUDY_JIT_COMPILE
2789                            | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
2790                            | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
2791              }
2792            }
2793          else
2794            {
2795            do_study = 0;
2796            no_force_study = 1;
2797            }
2798          break;
2799    
2800        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2801          case 'W': options |= PCRE_UCP; break;
2802        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2803          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2804        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2805        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2806        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2807    
2808          case 'T':
2809          switch (*pp++)
2810            {
2811            case '0': tables = tables0; break;
2812            case '1': tables = tables1; break;
2813    
2814            case '\r':
2815            case '\n':
2816            case ' ':
2817            case 0:
2818            fprintf(outfile, "** Missing table number after /T\n");
2819            goto SKIP_DATA;
2820    
2821            default:
2822            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2823            goto SKIP_DATA;
2824            }
2825          break;
2826    
2827        case 'L':        case 'L':
2828        ppp = pp;        ppp = pp;
2829        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1194  while (!done) Line 2836  while (!done)
2836          goto SKIP_DATA;          goto SKIP_DATA;
2837          }          }
2838        locale_set = 1;        locale_set = 1;
2839        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2840        pp = ppp;        pp = ppp;
2841        break;        break;
2842    
# Line 1207  while (!done) Line 2849  while (!done)
2849    
2850        case '<':        case '<':
2851          {          {
2852          int x = check_newline(pp, outfile);          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2853          if (x == 0) goto SKIP_DATA;            {
2854          options |= x;            options |= PCRE_JAVASCRIPT_COMPAT;
2855          while (*pp++ != '>');            pp += 3;
2856              }
2857            else
2858              {
2859              int x = check_newline(pp, outfile);
2860              if (x == 0) goto SKIP_DATA;
2861              options |= x;
2862              while (*pp++ != '>');
2863              }
2864          }          }
2865        break;        break;
2866    
# Line 1227  while (!done) Line 2877  while (!done)
2877    
2878    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2879    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2880    local character tables. */    local character tables. Neither does it have 16-bit support. */
2881    
2882  #if !defined NOPOSIX  #if !defined NOPOSIX
2883    if (posix || do_posix)    if (posix || do_posix)
# Line 1240  while (!done) Line 2890  while (!done)
2890      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2891      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2892      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2893        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2894        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2895    
2896        first_gotten_store = 0;
2897      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2898    
2899      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1260  while (!done) Line 2913  while (!done)
2913  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2914    
2915      {      {
2916        /* In 16-bit mode, convert the input. */
2917    
2918    #ifdef SUPPORT_PCRE16
2919        if (use_pcre16)
2920          {
2921          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2922            {
2923            case -1:
2924            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2925              "converted to UTF-16\n");
2926            goto SKIP_DATA;
2927    
2928            case -2:
2929            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2930              "cannot be converted to UTF-16\n");
2931            goto SKIP_DATA;
2932    
2933            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2934            fprintf(outfile, "**Failed: character value greater than 0xffff "
2935              "cannot be converted to 16-bit in non-UTF mode\n");
2936            goto SKIP_DATA;
2937    
2938            default:
2939            break;
2940            }
2941          p = (pcre_uint8 *)buffer16;
2942          }
2943    #endif
2944    
2945        /* Compile many times when timing */
2946    
2947      if (timeit > 0)      if (timeit > 0)
2948        {        {
2949        register int i;        register int i;
# Line 1267  while (!done) Line 2951  while (!done)
2951        clock_t start_time = clock();        clock_t start_time = clock();
2952        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2953          {          {
2954          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2955          if (re != NULL) free(re);          if (re != NULL) free(re);
2956          }          }
2957        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1276  while (!done) Line 2960  while (!done)
2960            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2961        }        }
2962    
2963      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2964        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2965    
2966      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2967      if non-interactive. */      if non-interactive. */
# Line 1289  while (!done) Line 2974  while (!done)
2974          {          {
2975          for (;;)          for (;;)
2976            {            {
2977            if (extend_inputline(infile, buffer) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
2978              {              {
2979              done = 1;              done = 1;
2980              goto CONTINUE;              goto CONTINUE;
# Line 1303  while (!done) Line 2988  while (!done)
2988        goto CONTINUE;        goto CONTINUE;
2989        }        }
2990    
2991      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
2992      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
2993      returns only limited data. Check that it agrees with the newer one. */      lines. */
2994    
2995      if (log_store)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2996        fprintf(outfile, "Memory allocation (code space): %d\n",        goto SKIP_DATA;
2997          (int)(gotten_store -      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
2998    
2999      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3000      and remember the store that was got. */      and remember the store that was got. */
3001    
3002      true_size = ((real_pcre *)re)->size;      true_size = ((REAL_PCRE *)re)->size;
3003      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3004    
3005        /* Output code size information if requested */
3006    
3007      /* If /S was present, study the regexp to generate additional info to      if (log_store)
3008      help with the matching. */        fprintf(outfile, "Memory allocation (code space): %d\n",
3009            (int)(first_gotten_store -
3010                  sizeof(REAL_PCRE) -
3011                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3012    
3013        /* If -s or /S was present, study the regex to generate additional info to
3014        help with the matching, unless the pattern has the SS option, which
3015        suppresses the effect of /S (used for a few test patterns where studying is
3016        never sensible). */
3017    
3018      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
3019        {        {
3020        if (timeit > 0)        if (timeit > 0)
3021          {          {
# Line 1330  while (!done) Line 3023  while (!done)
3023          clock_t time_taken;          clock_t time_taken;
3024          clock_t start_time = clock();          clock_t start_time = clock();
3025          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3026            extra = pcre_study(re, study_options, &error);            {
3027              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3028              }
3029          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3030          if (extra != NULL) free(extra);          if (extra != NULL)
3031              {
3032              PCRE_FREE_STUDY(extra);
3033              }
3034          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3035            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3036              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3037          }          }
3038        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3039        if (error != NULL)        if (error != NULL)
3040          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3041        else if (extra != NULL)        else if (extra != NULL)
3042            {
3043          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3044            if (log_store)
3045              {
3046              size_t jitsize;
3047              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3048                  jitsize != 0)
3049                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3050              }
3051            }
3052        }        }
3053    
3054      /* If the 'F' option was present, we flip the bytes of all the integer      /* If /K was present, we set up for handling MARK data. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
3055    
3056      if (do_flip)      if (do_mark)
3057        {        {
3058        real_pcre *rre = (real_pcre *)re;        if (extra == NULL)
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
3059          {          {
3060          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3061          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          extra->flags = 0;
         rsd->options = byteflip(rsd->options, sizeof(rsd->options));  
3062          }          }
3063          extra->mark = &markptr;
3064          extra->flags |= PCRE_EXTRA_MARK;
3065        }        }
3066    
3067      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
3068    
3069      SHOW_INFO:      SHOW_INFO:
3070    
3071      if (do_debug)      if (do_debug)
3072        {        {
3073        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3074        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3075        }        }
3076    
3077        /* We already have the options in get_options (see above) */
3078    
3079      if (do_showinfo)      if (do_showinfo)
3080        {        {
3081        unsigned long int get_options, all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3082        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3083          hascrorlf;          hascrorlf;
3084        int nameentrysize, namecount;        int nameentrysize, namecount;
3085        const uschar *nametable;        const pcre_uint8 *nametable;
3086    
3087        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3088        new_info(re, NULL, PCRE_INFO_SIZE, &size);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3089        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3090        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3091        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3092        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3093        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3094        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3095        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3096        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3097        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3098        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            != 0)
3099            goto SKIP_DATA;
 #if !defined NOINFOCHECK  
       old_count = pcre_info(re, &old_options, &old_first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3100    
3101        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3102          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1447  while (!done) Line 3111  while (!done)
3111          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3112          while (namecount-- > 0)          while (namecount-- > 0)
3113            {            {
3114            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3115              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3116              GET2(nametable, 0));  #else
3117              int imm2_size = IMM2_SIZE;
3118    #endif
3119              int length = (int)STRLEN(nametable + imm2_size);
3120              fprintf(outfile, "  ");
3121              PCHARSV(nametable, imm2_size, length, outfile);
3122              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3123    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3124              fprintf(outfile, "%3d\n", use_pcre16?
3125                 (int)(((PCRE_SPTR16)nametable)[0])
3126                :((int)nametable[0] << 8) | (int)nametable[1]);
3127              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3128    #else
3129              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3130    #ifdef SUPPORT_PCRE8
3131            nametable += nameentrysize;            nametable += nameentrysize;
3132    #else
3133              nametable += nameentrysize * 2;
3134    #endif
3135    #endif
3136            }            }
3137          }          }
3138    
3139        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3140        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3141    
3142        all_options = ((real_pcre *)re)->options;        all_options = ((REAL_PCRE *)re)->options;
3143        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3144    
3145        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3146          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3147            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3148            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3149            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1474  while (!done) Line 3156  while (!done)
3156            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3157            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3158            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3159            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3160            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3161              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3162              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3163            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3164    
3165        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1516  while (!done) Line 3200  while (!done)
3200          }          }
3201        else        else
3202          {          {
3203          int ch = first_char & 255;          const char *caseless =
3204          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3205            "" : " (caseless)";            "" : " (caseless)";
3206          if (PRINTHEX(ch))  
3207            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3208              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3209          else          else
3210            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3211              fprintf(outfile, "First char = ");
3212              pchar(first_char, outfile);
3213              fprintf(outfile, "%s\n", caseless);
3214              }
3215          }          }
3216    
3217        if (need_char < 0)        if (need_char < 0)
# Line 1531  while (!done) Line 3220  while (!done)
3220          }          }
3221        else        else
3222          {          {
3223          int ch = need_char & 255;          const char *caseless =
3224          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3225            "" : " (caseless)";            "" : " (caseless)";
3226          if (PRINTHEX(ch))  
3227            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3228              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3229          else          else
3230            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3231              fprintf(outfile, "Need char = ");
3232              pchar(need_char, outfile);
3233              fprintf(outfile, "%s\n", caseless);
3234              }
3235          }          }
3236    
3237        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3238        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3239        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
3240        flipped.) */        flipped.) If study was forced by an external -s, don't show this
3241          information unless -i or -d was also present. This means that, except
3242          when auto-callouts are involved, the output from runs with and without
3243          -s should be identical. */
3244    
3245        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3246          {          {
3247          if (extra == NULL)          if (extra == NULL)
3248            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3249          else          else
3250            {            {
3251            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3252            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
3253    
3254            if (start_bits == NULL)            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3255              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3256            else  
3257              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3258              {              {
3259              int i;              if (start_bits == NULL)
3260              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3261              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3262                {                {
3263                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3264                  int c = 24;
3265                  fprintf(outfile, "Starting byte set: ");
3266                  for (i = 0; i < 256; i++)
3267                  {                  {
3268                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
                   {  
                   fprintf(outfile, "%c ", i);  
                   c += 2;  
                   }  
                 else  
3269                    {                    {
3270                    fprintf(outfile, "\\x%02x ", i);                    if (c > 75)
3271                    c += 5;                      {
3272                        fprintf(outfile, "\n  ");
3273                        c = 2;
3274                        }
3275                      if (PRINTOK(i) && i != ' ')
3276                        {
3277                        fprintf(outfile, "%c ", i);
3278                        c += 2;
3279                        }
3280                      else
3281                        {
3282                        fprintf(outfile, "\\x%02x ", i);
3283                        c += 5;
3284                        }
3285                    }                    }
3286                  }                  }
3287                  fprintf(outfile, "\n");
3288                }                }
3289              fprintf(outfile, "\n");              }
3290              }
3291    
3292            /* Show this only if the JIT was set by /S, not by -s. */
3293    
3294            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3295              {
3296              int jit;
3297              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3298                {
3299                if (jit)
3300                  fprintf(outfile, "JIT study was successful\n");
3301                else
3302    #ifdef SUPPORT_JIT
3303                  fprintf(outfile, "JIT study was not successful\n");
3304    #else
3305                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3306    #endif
3307              }              }
3308            }            }
3309          }          }
# Line 1601  while (!done) Line 3322  while (!done)
3322          }          }
3323        else        else
3324          {          {
3325          uschar sbuf[8];          pcre_uint8 sbuf[8];
3326          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3327          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3328          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3329          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3330            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3331          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3332          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3333          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3334          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3335            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3336    
3337          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3338              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1619  while (!done) Line 3341  while (!done)
3341            }            }
3342          else          else
3343            {            {
3344            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3345    
3346              /* If there is study data, write it. */
3347    
3348            if (extra != NULL)            if (extra != NULL)
3349              {              {
3350              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1629  while (!done) Line 3354  while (!done)
3354                  strerror(errno));                  strerror(errno));
3355                }                }
3356              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
3357              }              }
3358            }            }
3359          fclose(f);          fclose(f);
3360          }          }
3361    
3362        new_free(re);        new_free(re);
3363        if (extra != NULL) new_free(extra);        if (extra != NULL)
3364        if (tables != NULL) new_free((void *)tables);          {
3365            PCRE_FREE_STUDY(extra);
3366            }
3367          if (locale_set)
3368            {
3369            new_free((void *)tables);
3370            setlocale(LC_CTYPE, "C");
3371            locale_set = 0;
3372            }
3373        continue;  /* With next regex */        continue;  /* With next regex */
3374        }        }
3375      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1646  while (!done) Line 3378  while (!done)
3378    
3379    for (;;)    for (;;)
3380      {      {
3381      uschar *q;      pcre_uint8 *q;
3382      uschar *bptr;      pcre_uint8 *bptr;
3383      int *use_offsets = offsets;      int *use_offsets = offsets;
3384      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3385      int callout_data = 0;      int callout_data = 0;
3386      int callout_data_set = 0;      int callout_data_set = 0;
3387      int count, c;      int count, c;
3388      int copystrings = 0;      int copystrings = 0;
3389      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
3390      int getstrings = 0;      int getstrings = 0;
3391      int getlist = 0;      int getlist = 0;
3392      int gmatched = 0;      int gmatched = 0;
3393      int start_offset = 0;      int start_offset = 0;
3394        int start_offset_sign = 1;
3395      int g_notempty = 0;      int g_notempty = 0;
3396      int use_dfa = 0;      int use_dfa = 0;
3397        int jit_was_used = 0;
     options = 0;  
3398    
3399      *copynames = 0;      *copynames = 0;
3400      *getnames = 0;      *getnames = 0;
3401    
3402      copynamesptr = copynames;  #ifdef SUPPORT_PCRE16
3403      getnamesptr = getnames;      cn16ptr = copynames;
3404        gn16ptr = getnames;
3405    #endif
3406    #ifdef SUPPORT_PCRE8
3407        cn8ptr = copynames8;
3408        gn8ptr = getnames8;
3409    #endif
3410    
3411      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3412      first_callout = 1;      first_callout = 1;
3413        last_callout_mark = NULL;
3414      callout_extra = 0;      callout_extra = 0;
3415      callout_count = 0;      callout_count = 0;
3416      callout_fail_count = 999999;      callout_fail_count = 999999;
3417      callout_fail_id = -1;      callout_fail_id = -1;
3418      show_malloc = 0;      show_malloc = 0;
3419        options = 0;
3420    
3421      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3422        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 1684  while (!done) Line 3424  while (!done)
3424      len = 0;      len = 0;
3425      for (;;)      for (;;)
3426        {        {
3427        if (infile == stdin) printf("data> ");        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
       if (extend_inputline(infile, buffer + len) == NULL)  
3428          {          {
3429          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
3430              {
3431              fprintf(outfile, "\n");
3432              break;
3433              }
3434          done = 1;          done = 1;
3435          goto CONTINUE;          goto CONTINUE;
3436          }          }
# Line 1709  while (!done) Line 3452  while (!done)
3452        int i = 0;        int i = 0;
3453        int n = 0;        int n = 0;
3454    
3455        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3456          In non-UTF mode, allow the value of the byte to fall through to later,
3457          where values greater than 127 are turned into UTF-8 when running in
3458          16-bit mode. */
3459    
3460          if (c != '\\')
3461            {
3462            if (use_utf)
3463              {
3464              *q++ = c;
3465              continue;
3466              }
3467            }
3468    
3469          /* Handle backslash escapes */
3470    
3471          else switch ((c = *p++))
3472          {          {
3473          case 'a': c =    7; break;          case 'a': c =    7; break;
3474          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 1725  while (!done) Line 3484  while (!done)
3484          c -= '0';          c -= '0';
3485          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3486            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3487          break;          break;
3488    
3489          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3490          if (*p == '{')          if (*p == '{')
3491            {            {
3492            unsigned char *pt = p;            pcre_uint8 *pt = p;
3493            c = 0;            c = 0;
3494            while (isxdigit(*(++pt)))  
3495              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3496              when isxdigit() is a macro that refers to its argument more than
3497              once. This is banned by the C Standard, but apparently happens in at
3498              least one MacOS environment. */
3499    
3500              for (pt++; isxdigit(*pt); pt++)
3501                {
3502                if (++i == 9)
3503                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3504                                   "using only the first eight.\n");
3505                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3506                }
3507            if (*pt == '}')            if (*pt == '}')
3508              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             utn = ord2utf8(c, buff8);  
             for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
             c = buff8[ii];   /* Last byte */  
3509              p = pt + 1;              p = pt + 1;
3510              break;              break;
3511              }              }
3512            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3513            }            }
 #endif  
3514    
3515          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3516            allows UTF-8 characters to be constructed byte by byte, and also allows
3517            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3518            Otherwise, pass it down to later code so that it can be turned into
3519            UTF-8 when running in 16-bit mode. */
3520    
3521          c = 0;          c = 0;
3522          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3523            {            {
3524            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3525            p++;            p++;
3526            }            }
3527            if (use_utf)
3528              {
3529              *q++ = c;
3530              continue;
3531              }
3532          break;          break;
3533    
3534          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 1778  while (!done) Line 3536  while (!done)
3536          continue;          continue;
3537    
3538          case '>':          case '>':
3539            if (*p == '-')
3540              {
3541              start_offset_sign = -1;
3542              p++;
3543              }
3544          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3545            start_offset *= start_offset_sign;
3546          continue;          continue;
3547    
3548          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1797  while (!done) Line 3561  while (!done)
3561            }            }
3562          else if (isalnum(*p))          else if (isalnum(*p))
3563            {            {
3564            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3565            }            }
3566          else if (*p == '+')          else if (*p == '+')
3567            {            {
# Line 1813  while (!done) Line 3570  while (!done)
3570            }            }
3571          else if (*p == '-')          else if (*p == '-')
3572            {            {
3573            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3574            p++;            p++;
3575            }            }
3576          else if (*p == '!')          else if (*p == '!')
# Line 1851  while (!done) Line 3608  while (!done)
3608  #endif  #endif
3609            use_dfa = 1;            use_dfa = 1;
3610          continue;          continue;
3611    #endif
3612    
3613    #if !defined NODFA
3614          case 'F':          case 'F':
3615          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
3616          continue;          continue;
# Line 1865  while (!done) Line 3624  while (!done)
3624            }            }
3625          else if (isalnum(*p))          else if (isalnum(*p))
3626            {            {
3627            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3628            while (isalnum(*p)) *npp++ = *p++;            }
3629            *npp++ = 0;          continue;
3630            *npp = 0;  
3631            n = pcre_get_stringnumber(re, (char *)getnamesptr);          case 'J':
3632            if (n < 0)          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3633              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);          if (extra != NULL
3634            getnamesptr = npp;              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3635                && extra->executable_jit != NULL)
3636              {
3637              if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3638              jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3639              PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3640            }            }
3641          continue;          continue;
3642    
# Line 1885  while (!done) Line 3649  while (!done)
3649          continue;          continue;
3650    
3651          case 'N':          case 'N':
3652          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
3653              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3654            else
3655              options |= PCRE_NOTEMPTY;
3656          continue;          continue;
3657    
3658          case 'O':          case 'O':
# Line 1908  while (!done) Line 3675  while (!done)
3675          continue;          continue;
3676    
3677          case 'P':          case 'P':
3678          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3679              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3680          continue;          continue;
3681    
3682          case 'Q':          case 'Q':
# Line 1943  while (!done) Line 3711  while (!done)
3711          show_malloc = 1;          show_malloc = 1;
3712          continue;          continue;
3713    
3714            case 'Y':
3715            options |= PCRE_NO_START_OPTIMIZE;
3716            continue;
3717    
3718          case 'Z':          case 'Z':
3719          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
3720          continue;          continue;
# Line 1960  while (!done) Line 3732  while (!done)
3732            }            }
3733          continue;          continue;
3734          }          }
3735        *q++ = c;  
3736          /* We now have a character value in c that may be greater than 255. In
3737          16-bit mode, we always convert characters to UTF-8 so that values greater
3738          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3739          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3740          mode must have come from \x{...} or octal constructs because values from
3741          \x.. get this far only in non-UTF mode. */
3742    
3743    #if !defined NOUTF || defined SUPPORT_PCRE16
3744          if (use_pcre16 || use_utf)
3745            {
3746            pcre_uint8 buff8[8];
3747            int ii, utn;
3748            utn = ord2utf8(c, buff8);
3749            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3750            }
3751          else
3752    #endif
3753            {
3754            if (c > 255)
3755              {
3756              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3757                "and UTF-8 mode is not enabled.\n", c);
3758              fprintf(outfile, "** Truncation will probably give the wrong "
3759                "result.\n");
3760              }
3761            *q++ = c;
3762            }
3763        }        }
3764    
3765        /* Reached end of subject string */
3766    
3767      *q = 0;      *q = 0;
3768      len = q - dbuffer;      len = (int)(q - dbuffer);
3769    
3770        /* Move the data to the end of the buffer so that a read over the end of
3771        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3772        we are using the POSIX interface, we must include the terminating zero. */
3773    
3774    #if !defined NOPOSIX
3775        if (posix || do_posix)
3776          {
3777          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3778          bptr += buffer_size - len - 1;
3779          }
3780        else
3781    #endif
3782          {
3783          memmove(bptr + buffer_size - len, bptr, len);
3784          bptr += buffer_size - len;
3785          }
3786    
3787      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
3788        {        {
# Line 1984  while (!done) Line 3803  while (!done)
3803          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3804        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3805        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3806          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3807    
3808        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3809    
# Line 2005  while (!done) Line 3825  while (!done)
3825            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3826              {              {
3827              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3828              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3829                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3830              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3831              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3832                {                {
3833                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3834                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3835                  outfile);                  outfile);
3836                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3837                }                }
# Line 2019  while (!done) Line 3839  while (!done)
3839            }            }
3840          }          }
3841        free(pmatch);        free(pmatch);
3842          goto NEXT_DATA;
3843        }        }
3844    
3845    #endif  /* !defined NOPOSIX */
3846    
3847      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3848    
3849      else  #ifdef SUPPORT_PCRE16
3850  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3851          {
3852          len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3853          switch(len)
3854            {
3855            case -1:
3856            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3857              "converted to UTF-16\n");
3858            goto NEXT_DATA;
3859    
3860            case -2:
3861            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3862              "cannot be converted to UTF-16\n");
3863            goto NEXT_DATA;
3864    
3865            case -3:
3866            fprintf(outfile, "**Failed: character value greater than 0xffff "
3867              "cannot be converted to 16-bit in non-UTF mode\n");
3868            goto NEXT_DATA;
3869    
3870            default:
3871            break;
3872            }
3873          bptr = (pcre_uint8 *)buffer16;
3874          }
3875    #endif
3876    
3877      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3878        {        {
3879          markptr = NULL;
3880    
3881        if (timeitm > 0)        if (timeitm > 0)
3882          {          {
3883          register int i;          register int i;
# Line 2039  while (!done) Line 3889  while (!done)
3889            {            {
3890            int workspace[1000];            int workspace[1000];
3891            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3892              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              {
3893                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3894                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets, workspace,
3895                  (sizeof(workspace)/sizeof(int)));
3896                }
3897            }            }
3898          else          else
3899  #endif  #endif
3900    
3901          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3902            count = pcre_exec(re, extra, (char *)bptr, len,            {
3903              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3904                (options | g_notempty), use_offsets, use_size_offsets);
3905              }
3906          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3907          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3908            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2058  while (!done) Line 3911  while (!done)
3911    
3912        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
3913        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
3914        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
3915          running of pcre_exec(), so disable the JIT optimization. This makes it
3916          possible to run the same set of tests with and without JIT externally
3917          requested. */
3918    
3919        if (find_match_limit)        if (find_match_limit)
3920          {          {
# Line 2067  while (!done) Line 3923  while (!done)
3923            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3924            extra->flags = 0;            extra->flags = 0;
3925            }            }
3926            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3927    
3928          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
3929            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2090  while (!done) Line 3947  while (!done)
3947            }            }
3948          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3949          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3950          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3951            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3952          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3953          }          }
# Line 2102  while (!done) Line 3959  while (!done)
3959        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
3960          {          {
3961          int workspace[1000];          int workspace[1000];
3962          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3963            options | g_notempty, use_offsets, use_size_offsets, workspace,            (options | g_notempty), use_offsets, use_size_offsets, workspace,
3964            sizeof(workspace)/sizeof(int));            (sizeof(workspace)/sizeof(int)));
3965          if (count == 0)          if (count == 0)
3966            {            {
3967            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 2115  while (!done) Line 3972  while (!done)
3972    
3973        else        else
3974          {          {
3975          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3976            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3977          if (count == 0)          if (count == 0)
3978            {            {
3979            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
3980            count = use_size_offsets/3;            count = use_size_offsets/3;
3981            }            }
3982          }          }
3983    
3984          if (extra != NULL && (extra->flags & PCRE_EXTRA_USED_JIT) != 0)
3985            jit_was_used = TRUE;
3986    
3987        /* Matched */        /* Matched */
3988    
3989        if (count >= 0)        if (count >= 0)
3990          {          {
3991          int i, maxcount;          int i, maxcount;
3992            void *cnptr, *gnptr;
3993    
3994  #if !defined NODFA  #if !defined NODFA
3995          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
# Line 2150  while (!done) Line 4011  while (!done)
4011              }              }
4012            }            }
4013    
4014            /* do_allcaps requests showing of all captures in the pattern, to check
4015            unset ones at the end. */
4016    
4017            if (do_allcaps)
4018              {
4019              if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4020                goto SKIP_DATA;
4021              count++;   /* Allow for full match */
4022              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4023              }
4024    
4025            /* Output the captured substrings */
4026    
4027          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
4028            {            {
4029            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
4030                {
4031                if (use_offsets[i] != -1)
4032                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4033                    use_offsets[i], i);
4034                if (use_offsets[i+1] != -1)
4035                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4036                    use_offsets[i+1], i+1);
4037              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);