/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 675 by ph10, Sat Aug 27 10:18:46 2011 UTC revision 926 by ph10, Wed Feb 22 15:01:32 2012 UTC
# Line 1  Line 1 
1  /*************************************************  /*.************************************************
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
51  #include "config.h"  #include "config.h"
# Line 94  input mode under Windows. */ Line 105  input mode under Windows. */
105  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
106  #endif  #endif
107    
108    #define PRIV(name) name
109    
110  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
111  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 105  here before pcre_internal.h so that the Line 117  here before pcre_internal.h so that the
117  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
118    
119  #include "pcre.h"  #include "pcre.h"
120    
121    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
122    /* Configure internal macros to 16 bit mode. */
123    #define COMPILE_PCRE16
124    #endif
125    
126  #include "pcre_internal.h"  #include "pcre_internal.h"
127    
128    /* The pcre_printint() function, which prints the internal form of a compiled
129    regex, is held in a separate file so that (a) it can be compiled in either
130    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
131    when that is compiled in debug mode. */
132    
133    #ifdef SUPPORT_PCRE8
134    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
135    #endif
136    #ifdef SUPPORT_PCRE16
137    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
138    #endif
139    
140  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
141  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source file here, changing the names of the
142  external symbols to prevent clashes. */  external symbols to prevent clashes. */
143    
144  #define _pcre_ucp_gentype      ucp_gentype  #define PCRE_INCLUDED
 #define _pcre_ucp_typerange    ucp_typerange  
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utf8_char_sizes  utf8_char_sizes  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
145    
146  #include "pcre_tables.c"  #include "pcre_tables.c"
147    
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
   
148  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
149  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
150  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
151  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
152  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
153    
154    #ifdef EBCDIC
155    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
156    #else
157    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
158    #endif
159    
160    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
161    
162  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  /* Posix support is disabled in 16 bit only mode. */
163    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
164    #define NOPOSIX
165    #endif
166    
167  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
168  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 150  Makefile. */ Line 172  Makefile. */
172  #include "pcreposix.h"  #include "pcreposix.h"
173  #endif  #endif
174    
175  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
176  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
177  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
178  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
179  UTF8 support if PCRE is built without it. */  
180    #ifndef SUPPORT_UTF
181  #ifndef SUPPORT_UTF8  #ifndef NOUTF
182  #ifndef NOUTF8  #define NOUTF
183  #define NOUTF8  #endif
184  #endif  #endif
185    
186    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
187    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
188    only from one place and is handled differently). I couldn't dream up any way of
189    using a single macro to do this in a generic way, because of the many different
190    argument requirements. We know that at least one of SUPPORT_PCRE8 and
191    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
192    use these in the definitions of generic macros.
193    
194    **** Special note about the PCHARSxxx macros: the address of the string to be
195    printed is always given as two arguments: a base address followed by an offset.
196    The base address is cast to the correct data size for 8 or 16 bit data; the
197    offset is in units of this size. If the string were given as base+offset in one
198    argument, the casting might be incorrectly applied. */
199    
200    #ifdef SUPPORT_PCRE8
201    
202    #define PCHARS8(lv, p, offset, len, f) \
203      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
204    
205    #define PCHARSV8(p, offset, len, f) \
206      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
207    
208    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
209      p = read_capture_name8(p, cn8, re)
210    
211    #define STRLEN8(p) ((int)strlen((char *)p))
212    
213    #define SET_PCRE_CALLOUT8(callout) \
214      pcre_callout = callout
215    
216    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
217       pcre_assign_jit_stack(extra, callback, userdata)
218    
219    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
220      re = pcre_compile((char *)pat, options, error, erroffset, tables)
221    
222    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
223        namesptr, cbuffer, size) \
224      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
225        (char *)namesptr, cbuffer, size)
226    
227    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
228      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
229    
230    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
231        offsets, size_offsets, workspace, size_workspace) \
232      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
233        offsets, size_offsets, workspace, size_workspace)
234    
235    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
236        offsets, size_offsets) \
237      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
238        offsets, size_offsets)
239    
240    #define PCRE_FREE_STUDY8(extra) \
241      pcre_free_study(extra)
242    
243    #define PCRE_FREE_SUBSTRING8(substring) \
244      pcre_free_substring(substring)
245    
246    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
247      pcre_free_substring_list(listptr)
248    
249    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
250        getnamesptr, subsptr) \
251      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
252        (char *)getnamesptr, subsptr)
253    
254    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
255      n = pcre_get_stringnumber(re, (char *)ptr)
256    
257    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
258      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
259    
260    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
261      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
262    
263    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
264      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
265    
266    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
267      pcre_printint(re, outfile, debug_lengths)
268    
269    #define PCRE_STUDY8(extra, re, options, error) \
270      extra = pcre_study(re, options, error)
271    
272    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
273      pcre_jit_stack_alloc(startsize, maxsize)
274    
275    #define PCRE_JIT_STACK_FREE8(stack) \
276      pcre_jit_stack_free(stack)
277    
278    #endif /* SUPPORT_PCRE8 */
279    
280    /* -----------------------------------------------------------*/
281    
282    #ifdef SUPPORT_PCRE16
283    
284    #define PCHARS16(lv, p, offset, len, f) \
285      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
286    
287    #define PCHARSV16(p, offset, len, f) \
288      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
289    
290    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
291      p = read_capture_name16(p, cn16, re)
292    
293    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
294    
295    #define SET_PCRE_CALLOUT16(callout) \
296      pcre16_callout = (int (*)(pcre16_callout_block *))callout
297    
298    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
299      pcre16_assign_jit_stack((pcre16_extra *)extra, \
300        (pcre16_jit_callback)callback, userdata)
301    
302    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
303      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
304        tables)
305    
306    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
307        namesptr, cbuffer, size) \
308      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
309        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
310    
311    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
312      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
313        (PCRE_UCHAR16 *)cbuffer, size/2)
314    
315    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
316        offsets, size_offsets, workspace, size_workspace) \
317      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
318        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
319        workspace, size_workspace)
320    
321    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
322        offsets, size_offsets) \
323      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
324        len, start_offset, options, offsets, size_offsets)
325    
326    #define PCRE_FREE_STUDY16(extra) \
327      pcre16_free_study((pcre16_extra *)extra)
328    
329    #define PCRE_FREE_SUBSTRING16(substring) \
330      pcre16_free_substring((PCRE_SPTR16)substring)
331    
332    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
333      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
334    
335    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
336        getnamesptr, subsptr) \
337      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
338        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
339    
340    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
341      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
342    
343    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
344      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
345        (PCRE_SPTR16 *)(void*)subsptr)
346    
347    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
348      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
349        (PCRE_SPTR16 **)(void*)listptr)
350    
351    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
352      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
353        tables)
354    
355    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
356      pcre16_printint(re, outfile, debug_lengths)
357    
358    #define PCRE_STUDY16(extra, re, options, error) \
359      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
360    
361    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
362      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
363    
364    #define PCRE_JIT_STACK_FREE16(stack) \
365      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
366    
367    #endif /* SUPPORT_PCRE16 */
368    
369    
370    /* ----- Both modes are supported; a runtime test is needed, except for
371    pcre_config(), and the JIT stack functions, when it doesn't matter which
372    version is called. ----- */
373    
374    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
375    
376    #define CHAR_SIZE (use_pcre16? 2:1)
377    
378    #define PCHARS(lv, p, offset, len, f) \
379      if (use_pcre16) \
380        PCHARS16(lv, p, offset, len, f); \
381      else \
382        PCHARS8(lv, p, offset, len, f)
383    
384    #define PCHARSV(p, offset, len, f) \
385      if (use_pcre16) \
386        PCHARSV16(p, offset, len, f); \
387      else \
388        PCHARSV8(p, offset, len, f)
389    
390    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
391      if (use_pcre16) \
392        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
393      else \
394        READ_CAPTURE_NAME8(p, cn8, cn16, re)
395    
396    #define SET_PCRE_CALLOUT(callout) \
397      if (use_pcre16) \
398        SET_PCRE_CALLOUT16(callout); \
399      else \
400        SET_PCRE_CALLOUT8(callout)
401    
402    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
403    
404    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
405      if (use_pcre16) \
406        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
407      else \
408        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
409    
410    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
411      if (use_pcre16) \
412        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
413      else \
414        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
415    
416    #define PCRE_CONFIG pcre_config
417    
418    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
419        namesptr, cbuffer, size) \
420      if (use_pcre16) \
421        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
422          namesptr, cbuffer, size); \
423      else \
424        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
425          namesptr, cbuffer, size)
426    
427    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
428      if (use_pcre16) \
429        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
430      else \
431        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
432    
433    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
434        offsets, size_offsets, workspace, size_workspace) \
435      if (use_pcre16) \
436        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
437          offsets, size_offsets, workspace, size_workspace); \
438      else \
439        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
440          offsets, size_offsets, workspace, size_workspace)
441    
442    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
443        offsets, size_offsets) \
444      if (use_pcre16) \
445        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
446          offsets, size_offsets); \
447      else \
448        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
449          offsets, size_offsets)
450    
451    #define PCRE_FREE_STUDY(extra) \
452      if (use_pcre16) \
453        PCRE_FREE_STUDY16(extra); \
454      else \
455        PCRE_FREE_STUDY8(extra)
456    
457    #define PCRE_FREE_SUBSTRING(substring) \
458      if (use_pcre16) \
459        PCRE_FREE_SUBSTRING16(substring); \
460      else \
461        PCRE_FREE_SUBSTRING8(substring)
462    
463    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
464      if (use_pcre16) \
465        PCRE_FREE_SUBSTRING_LIST16(listptr); \
466      else \
467        PCRE_FREE_SUBSTRING_LIST8(listptr)
468    
469    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
470        getnamesptr, subsptr) \
471      if (use_pcre16) \
472        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
473          getnamesptr, subsptr); \
474      else \
475        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
476          getnamesptr, subsptr)
477    
478    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
479      if (use_pcre16) \
480        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
481      else \
482        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
483    
484    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
485      if (use_pcre16) \
486        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
487      else \
488        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
489    
490    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
491      if (use_pcre16) \
492        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
493      else \
494        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
495    
496    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
497      (use_pcre16 ? \
498         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
499        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
500    
501    #define PCRE_JIT_STACK_FREE(stack) \
502      if (use_pcre16) \
503        PCRE_JIT_STACK_FREE16(stack); \
504      else \
505        PCRE_JIT_STACK_FREE8(stack)
506    
507    #define PCRE_MAKETABLES \
508      (use_pcre16? pcre16_maketables() : pcre_maketables())
509    
510    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
511      if (use_pcre16) \
512        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
513      else \
514        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
515    
516    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
517      if (use_pcre16) \
518        PCRE_PRINTINT16(re, outfile, debug_lengths); \
519      else \
520        PCRE_PRINTINT8(re, outfile, debug_lengths)
521    
522    #define PCRE_STUDY(extra, re, options, error) \
523      if (use_pcre16) \
524        PCRE_STUDY16(extra, re, options, error); \
525      else \
526        PCRE_STUDY8(extra, re, options, error)
527    
528    /* ----- Only 8-bit mode is supported ----- */
529    
530    #elif defined SUPPORT_PCRE8
531    #define CHAR_SIZE                 1
532    #define PCHARS                    PCHARS8
533    #define PCHARSV                   PCHARSV8
534    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
535    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
536    #define STRLEN                    STRLEN8
537    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
538    #define PCRE_COMPILE              PCRE_COMPILE8
539    #define PCRE_CONFIG               pcre_config
540    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
541    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
542    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
543    #define PCRE_EXEC                 PCRE_EXEC8
544    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
545    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
546    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
547    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
548    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
549    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
550    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
551    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
552    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
553    #define PCRE_MAKETABLES           pcre_maketables()
554    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
555    #define PCRE_PRINTINT             PCRE_PRINTINT8
556    #define PCRE_STUDY                PCRE_STUDY8
557    
558    /* ----- Only 16-bit mode is supported ----- */
559    
560    #else
561    #define CHAR_SIZE                 2
562    #define PCHARS                    PCHARS16
563    #define PCHARSV                   PCHARSV16
564    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
565    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
566    #define STRLEN                    STRLEN16
567    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
568    #define PCRE_COMPILE              PCRE_COMPILE16
569    #define PCRE_CONFIG               pcre16_config
570    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
571    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
572    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
573    #define PCRE_EXEC                 PCRE_EXEC16
574    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
575    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
576    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
577    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
578    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
579    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
580    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
581    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
582    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
583    #define PCRE_MAKETABLES           pcre16_maketables()
584    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
585    #define PCRE_PRINTINT             PCRE_PRINTINT16
586    #define PCRE_STUDY                PCRE_STUDY16
587  #endif  #endif
588    
589    /* ----- End of mode-specific function call macros ----- */
590    
591    
592  /* Other parameters */  /* Other parameters */
593    
# Line 187  static int callout_fail_count; Line 613  static int callout_fail_count;
613  static int callout_fail_id;  static int callout_fail_id;
614  static int debug_lengths;  static int debug_lengths;
615  static int first_callout;  static int first_callout;
616    static int jit_was_used;
617  static int locale_set = 0;  static int locale_set = 0;
618  static int show_malloc;  static int show_malloc;
619  static int use_utf8;  static int use_utf;
620  static size_t gotten_store;  static size_t gotten_store;
621    static size_t first_gotten_store = 0;
622  static const unsigned char *last_callout_mark = NULL;  static const unsigned char *last_callout_mark = NULL;
623    
624  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
625    
626  static int buffer_size = 50000;  static int buffer_size = 50000;
627  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
628  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
629  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
630    
631    /* Another buffer is needed translation to 16-bit character strings. It will
632    obtained and extended as required. */
633    
634    #ifdef SUPPORT_PCRE16
635    static int buffer16_size = 0;
636    static pcre_uint16 *buffer16 = NULL;
637    
638    #ifdef SUPPORT_PCRE8
639    
640    /* We need the table of operator lengths that is used for 16-bit compiling, in
641    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
642    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
643    appropriately for the 16-bit world. Just as a safety check, make sure that
644    COMPILE_PCRE16 is *not* set. */
645    
646    #ifdef COMPILE_PCRE16
647    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
648    #endif
649    
650    #if LINK_SIZE == 2
651    #undef LINK_SIZE
652    #define LINK_SIZE 1
653    #elif LINK_SIZE == 3 || LINK_SIZE == 4
654    #undef LINK_SIZE
655    #define LINK_SIZE 2
656    #else
657    #error LINK_SIZE must be either 2, 3, or 4
658    #endif
659    
660    #undef IMM2_SIZE
661    #define IMM2_SIZE 1
662    
663    #endif /* SUPPORT_PCRE8 */
664    
665    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
666    #endif  /* SUPPORT_PCRE16 */
667    
668    /* If we have 8-bit support, default use_pcre16 to false; if there is also
669    16-bit support, it can be changed by an option. If there is no 8-bit support,
670    there must be 16-bit support, so default it to 1. */
671    
672    #ifdef SUPPORT_PCRE8
673    static int use_pcre16 = 0;
674    #else
675    static int use_pcre16 = 1;
676    #endif
677    
678    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
679    
680    static int jit_study_bits[] =
681      {
682      PCRE_STUDY_JIT_COMPILE,
683      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
684      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
685      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
686      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
687      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
688      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
689        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
690    };
691    
692  /* Textual explanations for runtime error codes */  /* Textual explanations for runtime error codes */
693    
# Line 213  static const char *errtexts[] = { Line 702  static const char *errtexts[] = {
702    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
703    "match limit exceeded",    "match limit exceeded",
704    "callout error code",    "callout error code",
705    NULL,  /* BADUTF8 is handled specially */    NULL,  /* BADUTF8/16 is handled specially */
706    "bad UTF-8 offset",    NULL,  /* BADUTF8/16 offset is handled specially */
707    NULL,  /* PARTIAL is handled specially */    NULL,  /* PARTIAL is handled specially */
708    "not used - internal error",    "not used - internal error",
709    "internal error - pattern overwritten?",    "internal error - pattern overwritten?",
# Line 228  static const char *errtexts[] = { Line 717  static const char *errtexts[] = {
717    "not used - internal error",    "not used - internal error",
718    "invalid combination of newline options",    "invalid combination of newline options",
719    "bad offset value",    "bad offset value",
720    NULL,  /* SHORTUTF8 is handled specially */    NULL,  /* SHORTUTF8/16 is handled specially */
721    "nested recursion at the same subject position"    "nested recursion at the same subject position",
722      "JIT stack limit reached",
723      "pattern compiled in wrong mode: 8-bit/16-bit error"
724  };  };
725    
726    
# Line 245  the L (locale) option also adjusts the t Line 736  the L (locale) option also adjusts the t
736  /* This is the set of tables distributed as default with PCRE. It recognizes  /* This is the set of tables distributed as default with PCRE. It recognizes
737  only ASCII characters. */  only ASCII characters. */
738    
739  static const unsigned char tables0[] = {  static const pcre_uint8 tables0[] = {
740    
741  /* This table is a lower casing table. */  /* This table is a lower casing table. */
742    
# Line 418  graph, print, punct, and cntrl. Other cl Line 909  graph, print, punct, and cntrl. Other cl
909  be at least an approximation of ISO 8859. In particular, there are characters  be at least an approximation of ISO 8859. In particular, there are characters
910  greater than 128 that are marked as spaces, letters, etc. */  greater than 128 that are marked as spaces, letters, etc. */
911    
912  static const unsigned char tables1[] = {  static const pcre_uint8 tables1[] = {
913  0,1,2,3,4,5,6,7,  0,1,2,3,4,5,6,7,
914  8,9,10,11,12,13,14,15,  8,9,10,11,12,13,14,15,
915  16,17,18,19,20,21,22,23,  16,17,18,19,20,21,22,23,
# Line 587  return sys_errlist[n]; Line 1078  return sys_errlist[n];
1078    
1079  static pcre_jit_stack* jit_callback(void *arg)  static pcre_jit_stack* jit_callback(void *arg)
1080  {  {
1081    jit_was_used = TRUE;
1082  return (pcre_jit_stack *)arg;  return (pcre_jit_stack *)arg;
1083  }  }
1084    
1085    
1086    #if !defined NOUTF || defined SUPPORT_PCRE16
1087    /*************************************************
1088    *            Convert UTF-8 string to value       *
1089    *************************************************/
1090    
1091    /* This function takes one or more bytes that represents a UTF-8 character,
1092    and returns the value of the character.
1093    
1094    Argument:
1095      utf8bytes   a pointer to the byte vector
1096      vptr        a pointer to an int to receive the value
1097    
1098    Returns:      >  0 => the number of bytes consumed
1099                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1100    */
1101    
1102    static int
1103    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1104    {
1105    int c = *utf8bytes++;
1106    int d = c;
1107    int i, j, s;
1108    
1109    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1110      {
1111      if ((d & 0x80) == 0) break;
1112      d <<= 1;
1113      }
1114    
1115    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1116    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1117    
1118    /* i now has a value in the range 1-5 */
1119    
1120    s = 6*i;
1121    d = (c & utf8_table3[i]) << s;
1122    
1123    for (j = 0; j < i; j++)
1124      {
1125      c = *utf8bytes++;
1126      if ((c & 0xc0) != 0x80) return -(j+1);
1127      s -= 6;
1128      d |= (c & 0x3f) << s;
1129      }
1130    
1131    /* Check that encoding was the correct unique one */
1132    
1133    for (j = 0; j < utf8_table1_size; j++)
1134      if (d <= utf8_table1[j]) break;
1135    if (j != i) return -(i+1);
1136    
1137    /* Valid value */
1138    
1139    *vptr = d;
1140    return i+1;
1141    }
1142    #endif /* NOUTF || SUPPORT_PCRE16 */
1143    
1144    
1145    
1146    #if !defined NOUTF || defined SUPPORT_PCRE16
1147    /*************************************************
1148    *       Convert character value to UTF-8         *
1149    *************************************************/
1150    
1151    /* This function takes an integer value in the range 0 - 0x7fffffff
1152    and encodes it as a UTF-8 character in 0 to 6 bytes.
1153    
1154    Arguments:
1155      cvalue     the character value
1156      utf8bytes  pointer to buffer for result - at least 6 bytes long
1157    
1158    Returns:     number of characters placed in the buffer
1159    */
1160    
1161    static int
1162    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1163    {
1164    register int i, j;
1165    for (i = 0; i < utf8_table1_size; i++)
1166      if (cvalue <= utf8_table1[i]) break;
1167    utf8bytes += i;
1168    for (j = i; j > 0; j--)
1169     {
1170     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1171     cvalue >>= 6;
1172     }
1173    *utf8bytes = utf8_table2[i] | cvalue;
1174    return i + 1;
1175    }
1176    #endif
1177    
1178    
1179    #ifdef SUPPORT_PCRE16
1180    /*************************************************
1181    *         Convert a string to 16-bit             *
1182    *************************************************/
1183    
1184    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1185    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1186    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1187    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1188    result is always left in buffer16.
1189    
1190    Note that this function does not object to surrogate values. This is
1191    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1192    for the purpose of testing that they are correctly faulted.
1193    
1194    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1195    in UTF-8 so that values greater than 255 can be handled.
1196    
1197    Arguments:
1198      data       TRUE if converting a data line; FALSE for a regex
1199      p          points to a byte string
1200      utf        true if UTF-8 (to be converted to UTF-16)
1201      len        number of bytes in the string (excluding trailing zero)
1202    
1203    Returns:     number of 16-bit data items used (excluding trailing zero)
1204                 OR -1 if a UTF-8 string is malformed
1205                 OR -2 if a value > 0x10ffff is encountered
1206                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1207    */
1208    
1209    static int
1210    to16(int data, pcre_uint8 *p, int utf, int len)
1211    {
1212    pcre_uint16 *pp;
1213    
1214    if (buffer16_size < 2*len + 2)
1215      {
1216      if (buffer16 != NULL) free(buffer16);
1217      buffer16_size = 2*len + 2;
1218      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1219      if (buffer16 == NULL)
1220        {
1221        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1222        exit(1);
1223        }
1224      }
1225    
1226    pp = buffer16;
1227    
1228    if (!utf && !data)
1229      {
1230      while (len-- > 0) *pp++ = *p++;
1231      }
1232    
1233    else
1234      {
1235      int c = 0;
1236      while (len > 0)
1237        {
1238        int chlen = utf82ord(p, &c);
1239        if (chlen <= 0) return -1;
1240        if (c > 0x10ffff) return -2;
1241        p += chlen;
1242        len -= chlen;
1243        if (c < 0x10000) *pp++ = c; else
1244          {
1245          if (!utf) return -3;
1246          c -= 0x10000;
1247          *pp++ = 0xD800 | (c >> 10);
1248          *pp++ = 0xDC00 | (c & 0x3ff);
1249          }
1250        }
1251      }
1252    
1253    *pp = 0;
1254    return pp - buffer16;
1255    }
1256    #endif
1257    
1258    
1259  /*************************************************  /*************************************************
1260  *        Read or extend an input line            *  *        Read or extend an input line            *
1261  *************************************************/  *************************************************/
# Line 614  Returns:       pointer to the start of n Line 1279  Returns:       pointer to the start of n
1279                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1280  */  */
1281    
1282  static uschar *  static pcre_uint8 *
1283  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1284  {  {
1285  uschar *here = start;  pcre_uint8 *here = start;
1286    
1287  for (;;)  for (;;)
1288    {    {
1289    int rlen = (int)(buffer_size - (here - buffer));    size_t rlen = (size_t)(buffer_size - (here - buffer));
1290    
1291    if (rlen > 1000)    if (rlen > 1000)
1292      {      {
# Line 664  for (;;) Line 1329  for (;;)
1329    else    else
1330      {      {
1331      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1332      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1333      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1334      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1335    
1336      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1337        {        {
# Line 697  return NULL;  /* Control never gets here Line 1362  return NULL;  /* Control never gets here
1362    
1363    
1364    
   
   
   
   
1365  /*************************************************  /*************************************************
1366  *          Read number from string               *  *          Read number from string               *
1367  *************************************************/  *************************************************/
# Line 717  Returns:        the unsigned long Line 1378  Returns:        the unsigned long
1378  */  */
1379    
1380  static int  static int
1381  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1382  {  {
1383  int result = 0;  int result = 0;
1384  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 728  return(result); Line 1389  return(result);
1389    
1390    
1391    
   
1392  /*************************************************  /*************************************************
1393  *            Convert UTF-8 string to value       *  *             Print one character                *
1394  *************************************************/  *************************************************/
1395    
1396  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
   
 Argument:  
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
1397    
1398  Returns:      >  0 => the number of bytes consumed  static int pchar(int c, FILE *f)
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1399  {  {
1400  int c = *utf8bytes++;  if (PRINTOK(c))
1401  int d = c;    {
1402  int i, j, s;    if (f != NULL) fprintf(f, "%c", c);
1403      return 1;
1404      }
1405    
1406  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  if (c < 0x100)
1407    {    {
1408    if ((d & 0x80) == 0) break;    if (use_utf)
1409    d <<= 1;      {
1410        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1411        return 6;
1412        }
1413      else
1414        {
1415        if (f != NULL) fprintf(f, "\\x%02x", c);
1416        return 4;
1417        }
1418    }    }
1419    
1420  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1421  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  return (c <= 0x000000ff)? 6 :
1422           (c <= 0x00000fff)? 7 :
1423           (c <= 0x0000ffff)? 8 :
1424           (c <= 0x000fffff)? 9 : 10;
1425    }
1426    
 /* i now has a value in the range 1-5 */  
1427    
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
1428    
1429  for (j = 0; j < i; j++)  #ifdef SUPPORT_PCRE8
1430    {  /*************************************************
1431    c = *utf8bytes++;  *         Print 8-bit character string           *
1432    if ((c & 0xc0) != 0x80) return -(j+1);  *************************************************/
   s -= 6;  
   d |= (c & 0x3f) << s;  
   }  
1433    
1434  /* Check that encoding was the correct unique one */  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1435    If handed a NULL file, just counts chars without printing. */
1436    
1437  for (j = 0; j < utf8_table1_size; j++)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1438    if (d <= utf8_table1[j]) break;  {
1439  if (j != i) return -(i+1);  int c = 0;
1440    int yield = 0;
1441    
1442  /* Valid value */  if (length < 0)
1443      length = strlen((char *)p);
1444    
1445  *vptr = d;  while (length-- > 0)
1446  return i+1;    {
1447  }  #if !defined NOUTF
1448      if (use_utf)
1449        {
1450        int rc = utf82ord(p, &c);
1451        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1452          {
1453          length -= rc - 1;
1454          p += rc;
1455          yield += pchar(c, f);
1456          continue;
1457          }
1458        }
1459    #endif
1460      c = *p++;
1461      yield += pchar(c, f);
1462      }
1463    
1464    return yield;
1465    }
1466  #endif  #endif
1467    
1468    
1469    
1470    #ifdef SUPPORT_PCRE16
1471  /*************************************************  /*************************************************
1472  *       Convert character value to UTF-8         *  *    Find length of 0-terminated 16-bit string   *
1473  *************************************************/  *************************************************/
1474    
1475  /* This function takes an integer value in the range 0 - 0x7fffffff  static int strlen16(PCRE_SPTR16 p)
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 ord2utf8(int cvalue, uschar *utf8bytes)  
1476  {  {
1477  register int i, j;  int len = 0;
1478  for (i = 0; i < utf8_table1_size; i++)  while (*p++ != 0) len++;
1479    if (cvalue <= utf8_table1[i]) break;  return len;
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1480  }  }
1481    #endif  /* SUPPORT_PCRE16 */
 #endif  
   
1482    
1483    
1484    #ifdef SUPPORT_PCRE16
1485  /*************************************************  /*************************************************
1486  *             Print character string             *  *           Print 16-bit character string        *
1487  *************************************************/  *************************************************/
1488    
1489  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1490  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1491    
1492  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1493  {  {
 int c = 0;  
1494  int yield = 0;  int yield = 0;
1495    
1496    if (length < 0)
1497      length = strlen16(p);
1498    
1499  while (length-- > 0)  while (length-- > 0)
1500    {    {
1501  #if !defined NOUTF8    int c = *p++ & 0xffff;
1502    if (use_utf8)  #if !defined NOUTF
1503      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1504      {      {
1505      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
1506        if (d >= 0xDC00 && d < 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
1507        {        {
1508        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1509        p += rc;        length--;
1510        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
1511        }        }
1512      }      }
1513  #endif  #endif
1514      yield += pchar(c, f);
1515      }
1516    
1517     /* Not UTF-8, or malformed UTF-8  */  return yield;
1518    }
1519    #endif  /* SUPPORT_PCRE16 */
1520    
1521    c = *p++;  
1522    if (PRINTHEX(c))  
1523      {  #ifdef SUPPORT_PCRE8
1524      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1525      yield++;  *     Read a capture name (8-bit) and check it   *
1526      }  *************************************************/
1527    else  
1528      {  static pcre_uint8 *
1529      if (f != NULL) fprintf(f, "\\x%02x", c);  read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1530      yield += 4;  {
1531      }  pcre_uint8 *npp = *pp;
1532    while (isalnum(*p)) *npp++ = *p++;
1533    *npp++ = 0;
1534    *npp = 0;
1535    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1536      {
1537      fprintf(outfile, "no parentheses with name \"");
1538      PCHARSV(*pp, 0, -1, outfile);
1539      fprintf(outfile, "\"\n");
1540    }    }
1541    
1542  return yield;  *pp = npp;
1543    return p;
1544  }  }
1545    #endif  /* SUPPORT_PCRE8 */
1546    
1547    
1548    
1549    #ifdef SUPPORT_PCRE16
1550    /*************************************************
1551    *     Read a capture name (16-bit) and check it  *
1552    *************************************************/
1553    
1554    /* Note that the text being read is 8-bit. */
1555    
1556    static pcre_uint8 *
1557    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1558    {
1559    pcre_uint16 *npp = *pp;
1560    while (isalnum(*p)) *npp++ = *p++;
1561    *npp++ = 0;
1562    *npp = 0;
1563    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1564      {
1565      fprintf(outfile, "no parentheses with name \"");
1566      PCHARSV(*pp, 0, -1, outfile);
1567      fprintf(outfile, "\"\n");
1568      }
1569    *pp = npp;
1570    return p;
1571    }
1572    #endif  /* SUPPORT_PCRE16 */
1573    
1574    
1575    
# Line 915  if (callout_extra) Line 1598  if (callout_extra)
1598      else      else
1599        {        {
1600        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1601        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1602          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1603        fprintf(f, "\n");        fprintf(f, "\n");
1604        }        }
# Line 928  printed lengths of the substrings. */ Line 1611  printed lengths of the substrings. */
1611    
1612  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1613    
1614  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1615  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1616    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1617    
1618  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1619    
1620  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1621    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1622    
1623  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 973  first_callout = 0; Line 1656  first_callout = 0;
1656    
1657  if (cb->mark != last_callout_mark)  if (cb->mark != last_callout_mark)
1658    {    {
1659    fprintf(outfile, "Latest Mark: %s\n",    if (cb->mark == NULL)
1660      (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));      fprintf(outfile, "Latest Mark: <unset>\n");
1661      else
1662        {
1663        fprintf(outfile, "Latest Mark: ");
1664        PCHARSV(cb->mark, 0, -1, outfile);
1665        putc('\n', outfile);
1666        }
1667    last_callout_mark = cb->mark;    last_callout_mark = cb->mark;
1668    }    }
1669    
# Line 998  return (cb->callout_number != callout_fa Line 1687  return (cb->callout_number != callout_fa
1687  *************************************************/  *************************************************/
1688    
1689  /* Alternative malloc function, to test functionality and save the size of a  /* Alternative malloc function, to test functionality and save the size of a
1690  compiled re. The show_malloc variable is set only during matching. */  compiled re, which is the first store request that pcre_compile() makes. The
1691    show_malloc variable is set only during matching. */
1692    
1693  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1694  {  {
1695  void *block = malloc(size);  void *block = malloc(size);
1696  gotten_store = size;  gotten_store = size;
1697    if (first_gotten_store == 0) first_gotten_store = size;
1698  if (show_malloc)  if (show_malloc)
1699    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1700  return block;  return block;
# Line 1038  free(block); Line 1729  free(block);
1729  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1730  *************************************************/  *************************************************/
1731    
1732  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1733    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1734    value, but the code is defensive.
1735    
1736    Arguments:
1737      re        compiled regex
1738      study     study data
1739      option    PCRE_INFO_xxx option
1740      ptr       where to put the data
1741    
1742    Returns:    0 when OK, < 0 on error
1743    */
1744    
1745  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static int
1746    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1747  {  {
1748  int rc;  int rc;
1749  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1750    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1751    #ifdef SUPPORT_PCRE16
1752      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1753    #else
1754      rc = PCRE_ERROR_BADMODE;
1755    #endif
1756    else
1757    #ifdef SUPPORT_PCRE8
1758      rc = pcre_fullinfo(re, study, option, ptr);
1759    #else
1760      rc = PCRE_ERROR_BADMODE;
1761    #endif
1762    
1763    if (rc < 0)
1764      {
1765      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1766        use_pcre16? "16" : "", option);
1767      if (rc == PCRE_ERROR_BADMODE)
1768        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1769          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1770      }
1771    
1772    return rc;
1773  }  }
1774    
1775    
1776    
1777  /*************************************************  /*************************************************
1778  *      Check for supported JIT architecture      *  *             Swap byte functions                *
1779  *************************************************/  *************************************************/
1780    
1781  /* If it won't JIT-compile a very simple regex, return FALSE. */  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1782    value, respectively.
1783    
1784  static int check_jit_arch(void)  Arguments:
1785      value        any number
1786    
1787    Returns:       the byte swapped value
1788    */
1789    
1790    static pcre_uint32
1791    swap_uint32(pcre_uint32 value)
1792  {  {
1793  const char *error;  return ((value & 0x000000ff) << 24) |
1794  int erroffset, rc;         ((value & 0x0000ff00) <<  8) |
1795  pcre *re = pcre_compile("abc", 0, &error, &erroffset, NULL);         ((value & 0x00ff0000) >>  8) |
1796  pcre_extra *extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);         (value >> 24);
1797  rc = extra != NULL && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&  }
1798    extra->executable_jit != NULL;  
1799  pcre_free_study(extra);  static pcre_uint16
1800  free(re);  swap_uint16(pcre_uint16 value)
1801  return rc;  {
1802    return (value >> 8) | (value << 8);
1803  }  }
1804    
1805    
1806    
1807  /*************************************************  /*************************************************
1808  *         Byte flipping function                 *  *        Flip bytes in a compiled pattern        *
1809  *************************************************/  *************************************************/
1810    
1811  static unsigned long int  /* This function is called if the 'F' option was present on a pattern that is
1812  byteflip(unsigned long int value, int n)  to be written to a file. We flip the bytes of all the integer fields in the
1813    regex data block and the study block. In 16-bit mode this also flips relevant
1814    bytes in the pattern itself. This is to make it possible to test PCRE's
1815    ability to reload byte-flipped patterns, e.g. those compiled on a different
1816    architecture. */
1817    
1818    static void
1819    regexflip(pcre *ere, pcre_extra *extra)
1820  {  {
1821  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  REAL_PCRE *re = (REAL_PCRE *)ere;
1822  return ((value & 0x000000ff) << 24) |  #ifdef SUPPORT_PCRE16
1823         ((value & 0x0000ff00) <<  8) |  int op;
1824         ((value & 0x00ff0000) >>  8) |  pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1825         ((value & 0xff000000) >> 24);  int length = re->name_count * re->name_entry_size;
1826  }  #ifdef SUPPORT_UTF
1827    BOOL utf = (re->options & PCRE_UTF16) != 0;
1828    BOOL utf16_char = FALSE;
1829    #endif /* SUPPORT_UTF */
1830    #endif /* SUPPORT_PCRE16 */
1831    
1832    /* Always flip the bytes in the main data block and study blocks. */
1833    
1834    re->magic_number = REVERSED_MAGIC_NUMBER;
1835    re->size = swap_uint32(re->size);
1836    re->options = swap_uint32(re->options);
1837    re->flags = swap_uint16(re->flags);
1838    re->top_bracket = swap_uint16(re->top_bracket);
1839    re->top_backref = swap_uint16(re->top_backref);
1840    re->first_char = swap_uint16(re->first_char);
1841    re->req_char = swap_uint16(re->req_char);
1842    re->name_table_offset = swap_uint16(re->name_table_offset);
1843    re->name_entry_size = swap_uint16(re->name_entry_size);
1844    re->name_count = swap_uint16(re->name_count);
1845    
1846    if (extra != NULL)
1847      {
1848      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1849      rsd->size = swap_uint32(rsd->size);
1850      rsd->flags = swap_uint32(rsd->flags);
1851      rsd->minlength = swap_uint32(rsd->minlength);
1852      }
1853    
1854    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1855    in the name table, if present, and then in the pattern itself. */
1856    
1857    #ifdef SUPPORT_PCRE16
1858    if (!use_pcre16) return;
1859    
1860    while(TRUE)
1861      {
1862      /* Swap previous characters. */
1863      while (length-- > 0)
1864        {
1865        *ptr = swap_uint16(*ptr);
1866        ptr++;
1867        }
1868    #ifdef SUPPORT_UTF
1869      if (utf16_char)
1870        {
1871        if ((ptr[-1] & 0xfc00) == 0xd800)
1872          {
1873          /* We know that there is only one extra character in UTF-16. */
1874          *ptr = swap_uint16(*ptr);
1875          ptr++;
1876          }
1877        }
1878      utf16_char = FALSE;
1879    #endif /* SUPPORT_UTF */
1880    
1881      /* Get next opcode. */
1882    
1883      length = 0;
1884      op = *ptr;
1885      *ptr++ = swap_uint16(op);
1886    
1887      switch (op)
1888        {
1889        case OP_END:
1890        return;
1891    
1892    #ifdef SUPPORT_UTF
1893        case OP_CHAR:
1894        case OP_CHARI:
1895        case OP_NOT:
1896        case OP_NOTI:
1897        case OP_STAR:
1898        case OP_MINSTAR:
1899        case OP_PLUS:
1900        case OP_MINPLUS:
1901        case OP_QUERY:
1902        case OP_MINQUERY:
1903        case OP_UPTO:
1904        case OP_MINUPTO:
1905        case OP_EXACT:
1906        case OP_POSSTAR:
1907        case OP_POSPLUS:
1908        case OP_POSQUERY:
1909        case OP_POSUPTO:
1910        case OP_STARI:
1911        case OP_MINSTARI:
1912        case OP_PLUSI:
1913        case OP_MINPLUSI:
1914        case OP_QUERYI:
1915        case OP_MINQUERYI:
1916        case OP_UPTOI:
1917        case OP_MINUPTOI:
1918        case OP_EXACTI:
1919        case OP_POSSTARI:
1920        case OP_POSPLUSI:
1921        case OP_POSQUERYI:
1922        case OP_POSUPTOI:
1923        case OP_NOTSTAR:
1924        case OP_NOTMINSTAR:
1925        case OP_NOTPLUS:
1926        case OP_NOTMINPLUS:
1927        case OP_NOTQUERY:
1928        case OP_NOTMINQUERY:
1929        case OP_NOTUPTO:
1930        case OP_NOTMINUPTO:
1931        case OP_NOTEXACT:
1932        case OP_NOTPOSSTAR:
1933        case OP_NOTPOSPLUS:
1934        case OP_NOTPOSQUERY:
1935        case OP_NOTPOSUPTO:
1936        case OP_NOTSTARI:
1937        case OP_NOTMINSTARI:
1938        case OP_NOTPLUSI:
1939        case OP_NOTMINPLUSI:
1940        case OP_NOTQUERYI:
1941        case OP_NOTMINQUERYI:
1942        case OP_NOTUPTOI:
1943        case OP_NOTMINUPTOI:
1944        case OP_NOTEXACTI:
1945        case OP_NOTPOSSTARI:
1946        case OP_NOTPOSPLUSI:
1947        case OP_NOTPOSQUERYI:
1948        case OP_NOTPOSUPTOI:
1949        if (utf) utf16_char = TRUE;
1950    #endif
1951        /* Fall through. */
1952    
1953        default:
1954        length = OP_lengths16[op] - 1;
1955        break;
1956    
1957        case OP_CLASS:
1958        case OP_NCLASS:
1959        /* Skip the character bit map. */
1960        ptr += 32/sizeof(pcre_uint16);
1961        length = 0;
1962        break;
1963    
1964        case OP_XCLASS:
1965        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1966        if (LINK_SIZE > 1)
1967          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1968            - (1 + LINK_SIZE + 1));
1969        else
1970          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1971    
1972        /* Reverse the size of the XCLASS instance. */
1973        *ptr = swap_uint16(*ptr);
1974        ptr++;
1975        if (LINK_SIZE > 1)
1976          {
1977          *ptr = swap_uint16(*ptr);
1978          ptr++;
1979          }
1980    
1981        op = *ptr;
1982        *ptr = swap_uint16(op);
1983        ptr++;
1984        if ((op & XCL_MAP) != 0)
1985          {
1986          /* Skip the character bit map. */
1987          ptr += 32/sizeof(pcre_uint16);
1988          length -= 32/sizeof(pcre_uint16);
1989          }
1990        break;
1991        }
1992      }
1993    /* Control should never reach here in 16 bit mode. */
1994    #endif /* SUPPORT_PCRE16 */
1995    }
1996    
1997    
1998    
# Line 1091  return ((value & 0x000000ff) << 24) | Line 2001  return ((value & 0x000000ff) << 24) |
2001  *************************************************/  *************************************************/
2002    
2003  static int  static int
2004  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2005    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
2006    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
2007  {  {
# Line 1106  for (;;) Line 2016  for (;;)
2016    {    {
2017    *limit = mid;    *limit = mid;
2018    
2019    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2020      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2021    
2022    if (count == errnumber)    if (count == errnumber)
# Line 1151  Returns:    < 0, = 0, or > 0, according Line 2061  Returns:    < 0, = 0, or > 0, according
2061  */  */
2062    
2063  static int  static int
2064  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2065  {  {
2066  while (n--)  while (n--)
2067    {    {
# Line 1178  Returns:      appropriate PCRE_NEWLINE_x Line 2088  Returns:      appropriate PCRE_NEWLINE_x
2088  */  */
2089    
2090  static int  static int
2091  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2092  {  {
2093  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2094  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2095  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2096  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2097  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2098  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2099  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2100  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2101  return 0;  return 0;
2102  }  }
# Line 1208  printf("If input is a terminal, readline Line 2118  printf("If input is a terminal, readline
2118  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2119  #endif  #endif
2120  printf("\nOptions:\n");  printf("\nOptions:\n");
2121  printf("  -b       show compiled code (bytecode)\n");  #ifdef SUPPORT_PCRE16
2122    printf("  -16      use the 16-bit library\n");
2123    #endif
2124    printf("  -b       show compiled code\n");
2125  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2126    printf("  -C arg   show a specific compile-time option\n");
2127    printf("           and exit with its value. The arg can be:\n");
2128    printf("     linksize     internal link size [2, 3, 4]\n");
2129    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2130    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2131    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2132    printf("     ucp          Unicode Properties supported [0, 1]\n");
2133    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2134    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2135  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2136  #if !defined NODFA  #if !defined NODFA
2137  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
# Line 1226  printf("  -q       quiet: do not output Line 2148  printf("  -q       quiet: do not output
2148  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2149  printf("  -s       force each pattern to be studied at basic level\n"  printf("  -s       force each pattern to be studied at basic level\n"
2150         "  -s+      force each pattern to be studied, using JIT if available\n"         "  -s+      force each pattern to be studied, using JIT if available\n"
2151           "  -s++     ditto, verifying when JIT was actually used\n"
2152           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2153           "             where 1 <= n <= 7 selects JIT options\n"
2154           "  -s++n    ditto, verifying when JIT was actually used\n"
2155         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2156  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2157  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 1245  options, followed by a set of test data, Line 2171  options, followed by a set of test data,
2171  int main(int argc, char **argv)  int main(int argc, char **argv)
2172  {  {
2173  FILE *infile = stdin;  FILE *infile = stdin;
2174    const char *version;
2175  int options = 0;  int options = 0;
2176  int study_options = 0;  int study_options = 0;
2177  int default_find_match_limit = FALSE;  int default_find_match_limit = FALSE;
# Line 1265  int posix = 0; Line 2192  int posix = 0;
2192  int debug = 0;  int debug = 0;
2193  int done = 0;  int done = 0;
2194  int all_use_dfa = 0;  int all_use_dfa = 0;
2195    int verify_jit = 0;
2196  int yield = 0;  int yield = 0;
2197  int stack_size;  int stack_size;
2198    
2199  pcre_jit_stack *jit_stack = NULL;  pcre_jit_stack *jit_stack = NULL;
2200    
2201    /* These vectors store, end-to-end, a list of zero-terminated captured
2202    substring names, each list itself being terminated by an empty name. Assume
2203    that 1024 is plenty long enough for the few names we'll be testing. It is
2204    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2205    for the actual memory, to ensure alignment. */
2206    
2207    pcre_uint16 copynames[1024];
2208    pcre_uint16 getnames[1024];
2209    
2210    #ifdef SUPPORT_PCRE16
2211    pcre_uint16 *cn16ptr;
2212    pcre_uint16 *gn16ptr;
2213    #endif
2214    
2215  /* These vectors store, end-to-end, a list of captured substring names. Assume  #ifdef SUPPORT_PCRE8
2216  that 1024 is plenty long enough for the few names we'll be testing. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2217    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2218  uschar copynames[1024];  pcre_uint8 *cn8ptr;
2219  uschar getnames[1024];  pcre_uint8 *gn8ptr;
2220    #endif
 uschar *copynamesptr;  
 uschar *getnamesptr;  
   
 /* Get buffers from malloc() so that Electric Fence will check their misuse  
 when I am debugging. They grow automatically when very long lines are read. */  
2221    
2222  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2223  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
2224  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
2225    
2226    buffer = (pcre_uint8 *)malloc(buffer_size);
2227    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2228    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2229    
2230  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2231    
# Line 1300  it set 0x8000, but then I was advised th Line 2240  it set 0x8000, but then I was advised th
2240  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2241  #endif  #endif
2242    
2243    /* Get the version number: both pcre_version() and pcre16_version() give the
2244    same answer. We just need to ensure that we call one that is available. */
2245    
2246    #ifdef SUPPORT_PCRE8
2247    version = pcre_version();
2248    #else
2249    version = pcre16_version();
2250    #endif
2251    
2252  /* Scan options */  /* Scan options */
2253    
2254  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2255    {    {
2256    unsigned char *endptr;    pcre_uint8 *endptr;
2257      char *arg = argv[op];
2258    
2259    if (strcmp(argv[op], "-m") == 0) showstore = 1;    if (strcmp(arg, "-m") == 0) showstore = 1;
2260    else if (strcmp(argv[op], "-s") == 0) force_study = 0;    else if (strcmp(arg, "-s") == 0) force_study = 0;
2261    else if (strcmp(argv[op], "-s+") == 0)  
2262      else if (strncmp(arg, "-s+", 3) == 0)
2263      {      {
2264        arg += 3;
2265        if (*arg == '+') { arg++; verify_jit = TRUE; }
2266      force_study = 1;      force_study = 1;
2267      force_study_options = PCRE_STUDY_JIT_COMPILE;      if (*arg == 0)
2268      }        force_study_options = jit_study_bits[6];
2269    else if (strcmp(argv[op], "-q") == 0) quiet = 1;      else if (*arg >= '1' && *arg <= '7')
2270    else if (strcmp(argv[op], "-b") == 0) debug = 1;        force_study_options = jit_study_bits[*arg - '1'];
2271    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;      else goto BAD_ARG;
2272    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;      }
2273    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;    else if (strcmp(arg, "-16") == 0)
2274        {
2275    #ifdef SUPPORT_PCRE16
2276        use_pcre16 = 1;
2277    #else
2278        printf("** This version of PCRE was built without 16-bit support\n");
2279        exit(1);
2280    #endif
2281        }
2282      else if (strcmp(arg, "-q") == 0) quiet = 1;
2283      else if (strcmp(arg, "-b") == 0) debug = 1;
2284      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2285      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2286      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2287  #if !defined NODFA  #if !defined NODFA
2288    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2289  #endif  #endif
2290    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2291        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2292          *endptr == 0))          *endptr == 0))
2293      {      {
2294      op++;      op++;
2295      argc--;      argc--;
2296      }      }
2297    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2298      {      {
2299      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2300      int temp;      int temp;
2301      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2302                       *endptr == 0))                       *endptr == 0))
2303        {        {
2304        timeitm = temp;        timeitm = temp;
# Line 1342  while (argc > 1 && argv[op][0] == '-') Line 2308  while (argc > 1 && argv[op][0] == '-')
2308      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2309      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2310      }      }
2311    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2312        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2313          *endptr == 0))          *endptr == 0))
2314      {      {
2315  #if defined(_WIN32) || defined(WIN32) || defined(__minix)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
# Line 1365  while (argc > 1 && argv[op][0] == '-') Line 2331  while (argc > 1 && argv[op][0] == '-')
2331  #endif  #endif
2332      }      }
2333  #if !defined NOPOSIX  #if !defined NOPOSIX
2334    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2335  #endif  #endif
2336    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2337      {      {
2338      int rc;      int rc;
2339      unsigned long int lrc;      unsigned long int lrc;
2340      printf("PCRE version %s\n", pcre_version());  
2341        if (argc > 2)
2342          {
2343          if (strcmp(argv[op + 1], "linksize") == 0)
2344            {
2345            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2346            printf("%d\n", rc);
2347            yield = rc;
2348            goto EXIT;
2349            }
2350          if (strcmp(argv[op + 1], "pcre8") == 0)
2351            {
2352    #ifdef SUPPORT_PCRE8
2353            printf("1\n");
2354            yield = 1;
2355    #else
2356            printf("0\n");
2357            yield = 0;
2358    #endif
2359            goto EXIT;
2360            }
2361          if (strcmp(argv[op + 1], "pcre16") == 0)
2362            {
2363    #ifdef SUPPORT_PCRE16
2364            printf("1\n");
2365            yield = 1;
2366    #else
2367            printf("0\n");
2368            yield = 0;
2369    #endif
2370            goto EXIT;
2371            }
2372          if (strcmp(argv[op + 1], "utf") == 0)
2373            {
2374    #ifdef SUPPORT_PCRE8
2375            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2376            printf("%d\n", rc);
2377            yield = rc;
2378    #else
2379            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2380            printf("%d\n", rc);
2381            yield = rc;
2382    #endif
2383            goto EXIT;
2384            }
2385          if (strcmp(argv[op + 1], "ucp") == 0)
2386            {
2387            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2388            printf("%d\n", rc);
2389            yield = rc;
2390            goto EXIT;
2391            }
2392          if (strcmp(argv[op + 1], "jit") == 0)
2393            {
2394            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2395            printf("%d\n", rc);
2396            yield = rc;
2397            goto EXIT;
2398            }
2399          if (strcmp(argv[op + 1], "newline") == 0)
2400            {
2401            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2402            /* Note that these values are always the ASCII values, even
2403            in EBCDIC environments. CR is 13 and NL is 10. */
2404            printf("%s\n", (rc == 13)? "CR" :
2405              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2406              (rc == -2)? "ANYCRLF" :
2407              (rc == -1)? "ANY" : "???");
2408            goto EXIT;
2409            }
2410          printf("Unknown -C option: %s\n", argv[op + 1]);
2411          goto EXIT;
2412          }
2413    
2414        printf("PCRE version %s\n", version);
2415      printf("Compiled with\n");      printf("Compiled with\n");
2416    
2417    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2418    are set, either both UTFs are supported or both are not supported. */
2419    
2420    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2421        printf("  8-bit and 16-bit support\n");
2422        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2423        if (rc)
2424          printf("  UTF-8 and UTF-16 support\n");
2425        else
2426          printf("  No UTF-8 or UTF-16 support\n");
2427    #elif defined SUPPORT_PCRE8
2428        printf("  8-bit support only\n");
2429      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2430      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2431      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2432        printf("  16-bit support only\n");
2433        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2434        printf("  %sUTF-16 support\n", rc? "" : "No ");
2435    #endif
2436    
2437        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2438      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2439      (void)pcre_config(PCRE_CONFIG_JIT, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2440      if (rc)      if (rc)
2441        printf("  Just-in-time compiler support%s\n", check_jit_arch()?        {
2442          "" : " (but this architecture is unsupported)");        const char *arch;
2443          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2444          printf("  Just-in-time compiler support: %s\n", arch);
2445          }
2446      else      else
2447        printf("  No just-in-time compiler support\n");        printf("  No just-in-time compiler support\n");
2448      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2449      /* Note that these values are always the ASCII values, even      /* Note that these values are always the ASCII values, even
2450      in EBCDIC environments. CR is 13 and NL is 10. */      in EBCDIC environments. CR is 13 and NL is 10. */
2451      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2452        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2453        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
2454        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2455      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2456      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2457                                       "all Unicode newlines");                                       "all Unicode newlines");
2458      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2459      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2460      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2461      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2462      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2463      printf("  Default match limit = %ld\n", lrc);      printf("  Default match limit = %ld\n", lrc);
2464      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2465      printf("  Default recursion depth limit = %ld\n", lrc);      printf("  Default recursion depth limit = %ld\n", lrc);
2466      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2467      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
2468        if (showstore)
2469          {
2470          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2471          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2472          }
2473        printf("\n");
2474      goto EXIT;      goto EXIT;
2475      }      }
2476    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
2477             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
2478      {      {
2479      usage();      usage();
2480      goto EXIT;      goto EXIT;
2481      }      }
2482    else    else
2483      {      {
2484      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
2485        printf("** Unknown or malformed option %s\n", arg);
2486      usage();      usage();
2487      yield = 1;      yield = 1;
2488      goto EXIT;      goto EXIT;
# Line 1460  if (argc > 2) Line 2529  if (argc > 2)
2529    
2530  /* Set alternative malloc function */  /* Set alternative malloc function */
2531    
2532    #ifdef SUPPORT_PCRE8
2533  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2534  pcre_free = new_free;  pcre_free = new_free;
2535  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2536  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2537    #endif
2538    
2539    #ifdef SUPPORT_PCRE16
2540    pcre16_malloc = new_malloc;
2541    pcre16_free = new_free;
2542    pcre16_stack_malloc = stack_malloc;
2543    pcre16_stack_free = stack_free;
2544    #endif
2545    
2546  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2547    
2548  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2549    
2550  /* Main loop */  /* Main loop */
2551    
# Line 1482  while (!done) Line 2560  while (!done)
2560  #endif  #endif
2561    
2562    const char *error;    const char *error;
2563    unsigned char *markptr;    pcre_uint8 *markptr;
2564    unsigned char *p, *pp, *ppp;    pcre_uint8 *p, *pp, *ppp;
2565    unsigned char *to_file = NULL;    pcre_uint8 *to_file = NULL;
2566    const unsigned char *tables = NULL;    const pcre_uint8 *tables = NULL;
2567      unsigned long int get_options;
2568    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2569    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2570    int do_allcaps = 0;    int do_allcaps = 0;
# Line 1501  while (!done) Line 2580  while (!done)
2580    int do_flip = 0;    int do_flip = 0;
2581    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2582    
2583    use_utf8 = 0;    use_utf = 0;
2584    debug_lengths = 1;    debug_lengths = 1;
2585    
2586    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1516  while (!done) Line 2595  while (!done)
2595    
2596    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2597      {      {
2598      unsigned long int magic, get_options;      pcre_uint32 magic;
2599      uschar sbuf[8];      pcre_uint8 sbuf[8];
2600      FILE *f;      FILE *f;
2601    
2602      p++;      p++;
2603        if (*p == '!')
2604          {
2605          do_debug = TRUE;
2606          do_showinfo = TRUE;
2607          p++;
2608          }
2609    
2610      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2611      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2612      *pp = 0;      *pp = 0;
# Line 1532  while (!done) Line 2618  while (!done)
2618        continue;        continue;
2619        }        }
2620    
2621        first_gotten_store = 0;
2622      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2623    
2624      true_size =      true_size =
# Line 1539  while (!done) Line 2626  while (!done)
2626      true_study_size =      true_study_size =
2627        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2628    
2629      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
2630      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2631    
2632      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2633    
2634      magic = ((real_pcre *)re)->magic_number;      magic = ((REAL_PCRE *)re)->magic_number;
2635      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2636        {        {
2637        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2638          {          {
2639          do_flip = 1;          do_flip = 1;
2640          }          }
# Line 1559  while (!done) Line 2646  while (!done)
2646          }          }
2647        }        }
2648    
2649        /* We hide the byte-invert info for little and big endian tests. */
2650      fprintf(outfile, "Compiled pattern%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2651        do_flip? " (byte-inverted)" : "", p);        do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
   
     /* Need to know if UTF-8 for printing data strings */  
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
2652    
2653      /* Now see if there is any following study data. */      /* Now see if there is any following study data. */
2654    
# Line 1583  while (!done) Line 2666  while (!done)
2666          {          {
2667          FAIL_READ:          FAIL_READ:
2668          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2669          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL)
2670              {
2671              PCRE_FREE_STUDY(extra);
2672              }
2673          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
2674          fclose(f);          fclose(f);
2675          continue;          continue;
# Line 1593  while (!done) Line 2679  while (!done)
2679        }        }
2680      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2681    
2682        /* Flip the necessary bytes. */
2683        if (do_flip)
2684          {
2685          int rc;
2686          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2687          if (rc == PCRE_ERROR_BADMODE)
2688            {
2689            /* Simulate the result of the function call below. */
2690            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2691              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2692            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2693              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2694            continue;
2695            }
2696          }
2697    
2698        /* Need to know if UTF-8 for printing data strings. */
2699    
2700        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2701        use_utf = (get_options & PCRE_UTF8) != 0;
2702    
2703      fclose(f);      fclose(f);
2704      goto SHOW_INFO;      goto SHOW_INFO;
2705      }      }
2706    
2707    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2708    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2709    
2710    delimiter = *p++;    delimiter = *p++;
2711    
# Line 1649  while (!done) Line 2756  while (!done)
2756    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2757    
2758    options = 0;    options = 0;
2759      study_options = 0;
2760    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
2761    
2762    while (*pp != 0)    while (*pp != 0)
# Line 1685  while (!done) Line 2793  while (!done)
2793  #endif  #endif
2794    
2795        case 'S':        case 'S':
2796        if (do_study == 0)        if (do_study == 0)
2797          {          {
2798          do_study = 1;          do_study = 1;
2799          if (*pp == '+')          if (*pp == '+')
2800            {            {
2801            study_options |= PCRE_STUDY_JIT_COMPILE;            if (*(++pp) == '+')
2802            pp++;              {
2803            }              verify_jit = TRUE;
2804          }              pp++;
2805                }
2806              if (*pp >= '1' && *pp <= '7')
2807                study_options |= jit_study_bits[*pp++ - '1'];
2808              else
2809                study_options |= jit_study_bits[6];
2810              }
2811            }
2812        else        else
2813          {          {
2814          do_study = 0;          do_study = 0;
# Line 1706  while (!done) Line 2821  while (!done)
2821        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2822        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2823        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2824        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2825        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2826    
2827        case 'T':        case 'T':
# Line 1740  while (!done) Line 2855  while (!done)
2855          goto SKIP_DATA;          goto SKIP_DATA;
2856          }          }
2857        locale_set = 1;        locale_set = 1;
2858        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2859        pp = ppp;        pp = ppp;
2860        break;        break;
2861    
# Line 1753  while (!done) Line 2868  while (!done)
2868    
2869        case '<':        case '<':
2870          {          {
2871          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2872            {            {
2873            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
2874            pp += 3;            pp += 3;
# Line 1781  while (!done) Line 2896  while (!done)
2896    
2897    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2898    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2899    local character tables. */    local character tables. Neither does it have 16-bit support. */
2900    
2901  #if !defined NOPOSIX  #if !defined NOPOSIX
2902    if (posix || do_posix)    if (posix || do_posix)
# Line 1797  while (!done) Line 2912  while (!done)
2912      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2913      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2914    
2915        first_gotten_store = 0;
2916      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2917    
2918      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1816  while (!done) Line 2932  while (!done)
2932  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2933    
2934      {      {
2935      unsigned long int get_options;      /* In 16-bit mode, convert the input. */
2936    
2937    #ifdef SUPPORT_PCRE16
2938        if (use_pcre16)
2939          {
2940          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2941            {
2942            case -1:
2943            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2944              "converted to UTF-16\n");
2945            goto SKIP_DATA;
2946    
2947            case -2:
2948            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2949              "cannot be converted to UTF-16\n");
2950            goto SKIP_DATA;
2951    
2952            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2953            fprintf(outfile, "**Failed: character value greater than 0xffff "
2954              "cannot be converted to 16-bit in non-UTF mode\n");
2955            goto SKIP_DATA;
2956    
2957            default:
2958            break;
2959            }
2960          p = (pcre_uint8 *)buffer16;
2961          }
2962    #endif
2963    
2964        /* Compile many times when timing */
2965    
2966      if (timeit > 0)      if (timeit > 0)
2967        {        {
# Line 1825  while (!done) Line 2970  while (!done)
2970        clock_t start_time = clock();        clock_t start_time = clock();
2971        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2972          {          {
2973          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2974          if (re != NULL) free(re);          if (re != NULL) free(re);
2975          }          }
2976        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1834  while (!done) Line 2979  while (!done)
2979            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2980        }        }
2981    
2982      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2983        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2984    
2985      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2986      if non-interactive. */      if non-interactive. */
# Line 1865  while (!done) Line 3011  while (!done)
3011      within the regex; check for this so that we know how to process the data      within the regex; check for this so that we know how to process the data
3012      lines. */      lines. */
3013    
3014      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3015      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;        goto SKIP_DATA;
3016        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
     /* Print information if required. There are now two info-returning  
     functions. The old one has a limited interface and returns only limited  
     data. Check that it agrees with the newer one. */  
   
     if (log_store)  
       fprintf(outfile, "Memory allocation (code space): %d\n",  
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
3017    
3018      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3019      and remember the store that was got. */      and remember the store that was got. */
3020    
3021      true_size = ((real_pcre *)re)->size;      true_size = ((REAL_PCRE *)re)->size;
3022      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3023    
3024        /* Output code size information if requested */
3025    
3026        if (log_store)
3027          fprintf(outfile, "Memory allocation (code space): %d\n",
3028            (int)(first_gotten_store -
3029                  sizeof(REAL_PCRE) -
3030                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3031    
3032      /* If -s or /S was present, study the regex to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
3033      help with the matching, unless the pattern has the SS option, which      help with the matching, unless the pattern has the SS option, which
# Line 1897  while (!done) Line 3042  while (!done)
3042          clock_t time_taken;          clock_t time_taken;
3043          clock_t start_time = clock();          clock_t start_time = clock();
3044          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3045            extra = pcre_study(re, study_options | force_study_options, &error);            {
3046              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3047              }
3048          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3049          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL)
3050              {
3051              PCRE_FREE_STUDY(extra);
3052              }
3053          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3054            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3055              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3056          }          }
3057        extra = pcre_study(re, study_options | force_study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3058        if (error != NULL)        if (error != NULL)
3059          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3060        else if (extra != NULL)        else if (extra != NULL)
3061            {
3062          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3063            if (log_store)
3064              {
3065              size_t jitsize;
3066              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3067                  jitsize != 0)
3068                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3069              }
3070            }
3071        }        }
3072    
3073      /* If /K was present, we set up for handling MARK data. */      /* If /K was present, we set up for handling MARK data. */
# Line 1924  while (!done) Line 3083  while (!done)
3083        extra->flags |= PCRE_EXTRA_MARK;        extra->flags |= PCRE_EXTRA_MARK;
3084        }        }
3085    
3086      /* If the 'F' option was present, we flip the bytes of all the integer      /* Extract and display information from the compiled data if required. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
   
     if (do_flip)  
       {  
       real_pcre *rre = (real_pcre *)re;  
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));  
         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));  
         }  
       }  
   
     /* Extract information from the compiled data if required */  
3087    
3088      SHOW_INFO:      SHOW_INFO:
3089    
3090      if (do_debug)      if (do_debug)
3091        {        {
3092        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3093        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3094        }        }
3095    
3096      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1976  while (!done) Line 3098  while (!done)
3098      if (do_showinfo)      if (do_showinfo)
3099        {        {
3100        unsigned long int all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3101        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3102          hascrorlf;          hascrorlf;
3103        int nameentrysize, namecount;        int nameentrysize, namecount;
3104        const uschar *nametable;        const pcre_uint8 *nametable;
3105    
3106        new_info(re, NULL, PCRE_INFO_SIZE, &size);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3107        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3108        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3109        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3110        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3111        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3112        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3113        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3114        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3115        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3116        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3117              != 0)
3118  #if !defined NOINFOCHECK          goto SKIP_DATA;
       old_count = pcre_info(re, &old_options, &old_first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3119    
3120        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3121          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 2029  while (!done) Line 3130  while (!done)
3130          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3131          while (namecount-- > 0)          while (namecount-- > 0)
3132            {            {
3133            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3134              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3135              GET2(nametable, 0));  #else
3136              int imm2_size = IMM2_SIZE;
3137    #endif
3138              int length = (int)STRLEN(nametable + imm2_size);
3139              fprintf(outfile, "  ");
3140              PCHARSV(nametable, imm2_size, length, outfile);
3141              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3142    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3143              fprintf(outfile, "%3d\n", use_pcre16?
3144                 (int)(((PCRE_SPTR16)nametable)[0])
3145                :((int)nametable[0] << 8) | (int)nametable[1]);
3146              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3147    #else
3148              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3149    #ifdef SUPPORT_PCRE8
3150            nametable += nameentrysize;            nametable += nameentrysize;
3151    #else
3152              nametable += nameentrysize * 2;
3153    #endif
3154    #endif
3155            }            }
3156          }          }
3157    
3158        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3159        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3160    
3161        all_options = ((real_pcre *)re)->options;        all_options = ((REAL_PCRE *)re)->options;
3162        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3163    
3164        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3165          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 2056  while (!done) Line 3175  while (!done)
3175            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3176            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3177            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3178            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3179            ((get_options & PCRE_UCP) != 0)? " ucp" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3180            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3181            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3182            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3183    
# Line 2100  while (!done) Line 3219  while (!done)
3219          }          }
3220        else        else
3221          {          {
3222          int ch = first_char & 255;          const char *caseless =
3223          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3224            "" : " (caseless)";            "" : " (caseless)";
3225          if (PRINTHEX(ch))  
3226            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3227              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3228          else          else
3229            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3230              fprintf(outfile, "First char = ");
3231              pchar(first_char, outfile);
3232              fprintf(outfile, "%s\n", caseless);
3233              }
3234          }          }
3235    
3236        if (need_char < 0)        if (need_char < 0)
# Line 2115  while (!done) Line 3239  while (!done)
3239          }          }
3240        else        else
3241          {          {
3242          int ch = need_char & 255;          const char *caseless =
3243          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3244            "" : " (caseless)";            "" : " (caseless)";
3245          if (PRINTHEX(ch))  
3246            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3247              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3248          else          else
3249            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3250              fprintf(outfile, "Need char = ");
3251              pchar(need_char, outfile);
3252              fprintf(outfile, "%s\n", caseless);
3253              }
3254          }          }
3255    
3256        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
# Line 2138  while (!done) Line 3267  while (!done)
3267            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3268          else          else
3269            {            {
3270            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3271            int minlength;            int minlength;
3272    
3273            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3274            fprintf(outfile, "Subject length lower bound = %d\n", minlength);              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3275    
3276            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
           if (start_bits == NULL)  
             fprintf(outfile, "No set of starting bytes\n");  
           else  
3277              {              {
3278              int i;              if (start_bits == NULL)
3279              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3280              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3281                {                {
3282                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3283                  int c = 24;
3284                  fprintf(outfile, "Starting byte set: ");
3285                  for (i = 0; i < 256; i++)
3286                  {                  {
3287                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
3288                    {                    {
3289                    fprintf(outfile, "%c ", i);                    if (c > 75)
3290                    c += 2;                      {
3291                    }                      fprintf(outfile, "\n  ");
3292                  else                      c = 2;
3293                    {                      }
3294                    fprintf(outfile, "\\x%02x ", i);                    if (PRINTOK(i) && i != ' ')
3295                    c += 5;                      {
3296                        fprintf(outfile, "%c ", i);
3297                        c += 2;
3298                        }
3299                      else
3300                        {
3301                        fprintf(outfile, "\\x%02x ", i);
3302                        c += 5;
3303                        }
3304                    }                    }
3305                  }                  }
3306                  fprintf(outfile, "\n");
3307                }                }
             fprintf(outfile, "\n");  
3308              }              }
3309            }            }
3310    
3311          /* Show this only if the JIT was set by /S, not by -s. */          /* Show this only if the JIT was set by /S, not by -s. */
3312    
3313          if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)          if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3314            {            {
3315            int jit;            int jit;
3316            new_info(re, extra, PCRE_INFO_JIT, &jit);            if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3317            if (jit)              {
3318              fprintf(outfile, "JIT study was successful\n");              if (jit)
3319            else                fprintf(outfile, "JIT study was successful\n");
3320  #ifdef SUPPORT_JIT              else
3321              fprintf(outfile, "JIT study was not successful\n");  #ifdef SUPPORT_JIT
3322                  fprintf(outfile, "JIT study was not successful\n");
3323  #else  #else
3324              fprintf(outfile, "JIT support is not available in this version of PCRE\n");                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3325  #endif  #endif
3326            }              }
3327              }
3328          }          }
3329        }        }
3330    
# Line 2208  while (!done) Line 3341  while (!done)
3341          }          }
3342        else        else
3343          {          {
3344          uschar sbuf[8];          pcre_uint8 sbuf[8];
3345          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3346          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3347          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3348          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3349            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3350          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3351          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3352          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3353          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3354            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3355    
3356          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3357              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 2245  while (!done) Line 3379  while (!done)
3379          }          }
3380    
3381        new_free(re);        new_free(re);
3382        if (extra != NULL) pcre_free_study(extra);        if (extra != NULL)
3383            {
3384            PCRE_FREE_STUDY(extra);
3385            }
3386        if (locale_set)        if (locale_set)
3387          {          {
3388          new_free((void *)tables);          new_free((void *)tables);
# Line 2260  while (!done) Line 3397  while (!done)
3397    
3398    for (;;)    for (;;)
3399      {      {
3400      uschar *q;      pcre_uint8 *q;
3401      uschar *bptr;      pcre_uint8 *bptr;
3402      int *use_offsets = offsets;      int *use_offsets = offsets;
3403      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3404      int callout_data = 0;      int callout_data = 0;
# Line 2277  while (!done) Line 3414  while (!done)
3414      int g_notempty = 0;      int g_notempty = 0;
3415      int use_dfa = 0;      int use_dfa = 0;
3416    
     options = 0;  
   
3417      *copynames = 0;      *copynames = 0;
3418      *getnames = 0;      *getnames = 0;
3419    
3420      copynamesptr = copynames;  #ifdef SUPPORT_PCRE16
3421      getnamesptr = getnames;      cn16ptr = copynames;
3422        gn16ptr = getnames;
3423    #endif
3424    #ifdef SUPPORT_PCRE8
3425        cn8ptr = copynames8;
3426        gn8ptr = getnames8;
3427    #endif
3428    
3429      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3430      first_callout = 1;      first_callout = 1;
3431      last_callout_mark = NULL;      last_callout_mark = NULL;
3432      callout_extra = 0;      callout_extra = 0;
# Line 2293  while (!done) Line 3434  while (!done)
3434      callout_fail_count = 999999;      callout_fail_count = 999999;
3435      callout_fail_id = -1;      callout_fail_id = -1;
3436      show_malloc = 0;      show_malloc = 0;
3437        options = 0;
3438    
3439      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3440        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 2328  while (!done) Line 3470  while (!done)
3470        int i = 0;        int i = 0;
3471        int n = 0;        int n = 0;
3472    
3473        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3474          In non-UTF mode, allow the value of the byte to fall through to later,
3475          where values greater than 127 are turned into UTF-8 when running in
3476          16-bit mode. */
3477    
3478          if (c != '\\')
3479            {
3480            if (use_utf)
3481              {
3482              *q++ = c;
3483              continue;
3484              }
3485            }
3486    
3487          /* Handle backslash escapes */
3488    
3489          else switch ((c = *p++))
3490          {          {
3491          case 'a': c =    7; break;          case 'a': c =    7; break;
3492          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 2344  while (!done) Line 3502  while (!done)
3502          c -= '0';          c -= '0';
3503          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3504            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3505          break;          break;
3506    
3507          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3508          if (*p == '{')          if (*p == '{')
3509            {            {
3510            unsigned char *pt = p;            pcre_uint8 *pt = p;
3511            c = 0;            c = 0;
3512            while (isxdigit(*(++pt)))  
3513              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3514              when isxdigit() is a macro that refers to its argument more than
3515              once. This is banned by the C Standard, but apparently happens in at
3516              least one MacOS environment. */
3517    
3518              for (pt++; isxdigit(*pt); pt++)
3519                {
3520                if (++i == 9)
3521                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3522                                   "using only the first eight.\n");
3523                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3524                }
3525            if (*pt == '}')            if (*pt == '}')
3526              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             if (use_utf8)  
               {  
               utn = ord2utf8(c, buff8);  
               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
               c = buff8[ii];   /* Last byte */  
               }  
             else  
              {  
              if (c > 255)  
                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "  
                  "UTF-8 mode is not enabled.\n"  
                  "** Truncation will probably give the wrong result.\n", c);  
              }  
3527              p = pt + 1;              p = pt + 1;
3528              break;              break;
3529              }              }
3530            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3531            }            }
 #endif  
3532    
3533          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3534            allows UTF-8 characters to be constructed byte by byte, and also allows
3535            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3536            Otherwise, pass it down to later code so that it can be turned into
3537            UTF-8 when running in 16-bit mode. */
3538    
3539          c = 0;          c = 0;
3540          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3541            {            {
3542            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3543            p++;            p++;
3544            }            }
3545            if (use_utf)
3546              {
3547              *q++ = c;
3548              continue;
3549              }
3550          break;          break;
3551    
3552          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 2432  while (!done) Line 3579  while (!done)
3579            }            }
3580          else if (isalnum(*p))          else if (isalnum(*p))
3581            {            {
3582            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3583            }            }
3584          else if (*p == '+')          else if (*p == '+')
3585            {            {
# Line 2448  while (!done) Line 3588  while (!done)
3588            }            }
3589          else if (*p == '-')          else if (*p == '-')
3590            {            {
3591            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3592            p++;            p++;
3593            }            }
3594          else if (*p == '!')          else if (*p == '!')
# Line 2502  while (!done) Line 3642  while (!done)
3642            }            }
3643          else if (isalnum(*p))          else if (isalnum(*p))
3644            {            {
3645            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)getnamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);  
           getnamesptr = npp;  
3646            }            }
3647          continue;          continue;
3648    
3649          case 'J':          case 'J':
3650          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3651          if (extra != NULL          if (extra != NULL
3652              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3653              && extra->executable_jit != NULL)              && extra->executable_jit != NULL)
3654            {            {
3655            if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);            if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3656            jit_stack = pcre_jit_stack_alloc(1, n * 1024);            jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3657            pcre_assign_jit_stack(extra, jit_callback, jit_stack);            PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3658            }            }
3659          continue;          continue;
3660    
3661          case 'L':          case 'L':
# Line 2617  while (!done) Line 3750  while (!done)
3750            }            }
3751          continue;          continue;
3752          }          }
3753        *q++ = c;  
3754          /* We now have a character value in c that may be greater than 255. In
3755          16-bit mode, we always convert characters to UTF-8 so that values greater
3756          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3757          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3758          mode must have come from \x{...} or octal constructs because values from
3759          \x.. get this far only in non-UTF mode. */
3760    
3761    #if !defined NOUTF || defined SUPPORT_PCRE16
3762          if (use_pcre16 || use_utf)
3763            {
3764            pcre_uint8 buff8[8];
3765            int ii, utn;
3766            utn = ord2utf8(c, buff8);
3767            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3768            }
3769          else
3770    #endif
3771            {
3772            if (c > 255)
3773              {
3774              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3775                "and UTF-8 mode is not enabled.\n", c);
3776              fprintf(outfile, "** Truncation will probably give the wrong "
3777                "result.\n");
3778              }
3779            *q++ = c;
3780            }
3781        }        }
3782    
3783        /* Reached end of subject string */
3784    
3785      *q = 0;      *q = 0;
3786      len = (int)(q - dbuffer);      len = (int)(q - dbuffer);
3787    
# Line 2680  while (!done) Line 3843  while (!done)
3843            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3844              {              {
3845              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3846              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3847                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3848              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3849              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
3850                {                {
3851                fprintf(outfile, "%2d+ ", (int)i);                fprintf(outfile, "%2d+ ", (int)i);
3852                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3853                  outfile);                  outfile);
3854                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3855                }                }
# Line 2694  while (!done) Line 3857  while (!done)
3857            }            }
3858          }          }
3859        free(pmatch);        free(pmatch);
3860          goto NEXT_DATA;
3861        }        }
3862    
3863    #endif  /* !defined NOPOSIX */
3864    
3865      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3866    
3867      else  #ifdef SUPPORT_PCRE16
3868  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3869          {
3870          len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3871          switch(len)
3872            {
3873            case -1:
3874            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3875              "converted to UTF-16\n");
3876            goto NEXT_DATA;
3877    
3878            case -2:
3879            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3880              "cannot be converted to UTF-16\n");
3881            goto NEXT_DATA;
3882    
3883            case -3:
3884            fprintf(outfile, "**Failed: character value greater than 0xffff "
3885              "cannot be converted to 16-bit in non-UTF mode\n");
3886            goto NEXT_DATA;
3887    
3888            default:
3889            break;
3890            }
3891          bptr = (pcre_uint8 *)buffer16;
3892          }
3893    #endif
3894    
3895        /* Ensure that there is a JIT callback if we want to verify that JIT was
3896        actually used. If jit_stack == NULL, no stack has yet been assigned. */
3897    
3898        if (verify_jit && jit_stack == NULL && extra != NULL)
3899           { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
3900    
3901      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3902        {        {
3903        markptr = NULL;        markptr = NULL;
3904          jit_was_used = FALSE;
3905    
3906        if (timeitm > 0)        if (timeitm > 0)
3907          {          {
# Line 2716  while (!done) Line 3914  while (!done)
3914            {            {
3915            int workspace[1000];            int workspace[1000];
3916            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3917              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,              {
3918                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3919                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets, workspace,
3920                  (sizeof(workspace)/sizeof(int)));
3921                }
3922            }            }
3923          else          else
3924  #endif  #endif
3925    
3926          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3927            count = pcre_exec(re, extra, (char *)bptr, len,            {
3928              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3929                (options | g_notempty), use_offsets, use_size_offsets);
3930              }
3931          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3932          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3933            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2748  while (!done) Line 3949  while (!done)
3949            extra->flags = 0;            extra->flags = 0;
3950            }            }
3951          else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;          else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3952    
3953          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
3954            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
3955            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
# Line 2771  while (!done) Line 3972  while (!done)
3972            }            }
3973          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3974          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3975          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3976            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3977          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3978          }          }
# Line 2783  while (!done) Line 3984  while (!done)
3984        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
3985          {          {
3986          int workspace[1000];          int workspace[1000];
3987          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3988            options | g_notempty, use_offsets, use_size_offsets, workspace,            (options | g_notempty), use_offsets, use_size_offsets, workspace,
3989            sizeof(workspace)/sizeof(int));            (sizeof(workspace)/sizeof(int)));
3990          if (count == 0)          if (count == 0)
3991            {            {
3992            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 2796  while (!done) Line 3997  while (!done)
3997    
3998        else        else
3999          {          {
4000          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4001            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
4002          if (count == 0)          if (count == 0)
4003            {            {
4004            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
4005            count = use_size_offsets/3;            count = use_size_offsets/3;
4006            }            }
4007          }          }
4008    
4009        /* Matched */        /* Matched */
4010    
4011        if (count >= 0)        if (count >= 0)
4012          {          {
4013          int i, maxcount;          int i, maxcount;
4014            void *cnptr, *gnptr;
4015    
4016  #if !defined NODFA  #if !defined NODFA
4017          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
# Line 2836  while (!done) Line 4038  while (!done)
4038    
4039          if (do_allcaps)          if (do_allcaps)
4040            {            {
4041            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4042                goto SKIP_DATA;
4043            count++;   /* Allow for full match */            count++;   /* Allow for full match */
4044            if (count * 2 > use_size_offsets) count = use_size_offsets/2;            if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4045            }            }
# Line 2858  while (!done) Line 4061  while (!done)
4061            else            else
4062              {              {
4063              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
4064              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr, use_offsets[i],
4065                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
4066                if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4067              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4068              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
4069                {                {
4070                fprintf(outfile, "%2d+ ", i/2);                fprintf(outfile, "%2d+ ", i/2);
4071                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4072                  outfile);                  outfile);
4073                fprintf(outfile, "\n");                fprintf(outfile, "\n");
4074                }                }
4075              }              }
4076            }            }
4077    
4078          if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);          if (markptr != NULL)
4079              {
4080              fprintf(outfile, "MK: ");
4081              PCHARSV(markptr, 0, -1, outfile);
4082              fprintf(outfile, "\n");
4083              }
4084    
4085          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4086            {            {
4087            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
4088              {              {
4089                int rc;
4090              char copybuffer[256];              char copybuffer[256];
4091              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4092                i, copybuffer, sizeof(copybuffer));                copybuffer, sizeof(copybuffer));
4093              if (rc < 0)              if (rc < 0)
4094                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4095              else              else
4096                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);                {
4097                  fprintf(outfile, "%2dC ", i);
4098                  PCHARSV(copybuffer, 0, rc, outfile);
4099                  fprintf(outfile, " (%d)\n", rc);
4100                  }
4101              }              }
4102            }            }
4103    
4104          for (copynamesptr = copynames;          cnptr = copynames;
4105               *copynamesptr != 0;          for (;;)
              copynamesptr += (int)strlen((char*)copynamesptr) + 1)  
4106            {            {
4107              int rc;
4108            char copybuffer[256];            char copybuffer[256];
4109            int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,  
4110              count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));            if (use_pcre16)
4111                {
4112                if (*(pcre_uint16 *)cnptr == 0) break;
4113                }
4114              else
4115                {
4116                if (*(pcre_uint8 *)cnptr == 0) break;
4117                }
4118    
4119              PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4120                cnptr, copybuffer, sizeof(copybuffer));
4121    
4122            if (rc < 0)            if (rc < 0)
4123              fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);              {
4124                fprintf(outfile, "copy substring ");
4125                PCHARSV(cnptr, 0, -1, outfile);
4126                fprintf(outfile, " failed %d\n", rc);
4127                }
4128            else            else
4129              fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);              {
4130                fprintf(outfile, "  C ");
4131                PCHARSV(copybuffer, 0, rc, outfile);
4132                fprintf(outfile, " (%d) ", rc);
4133                PCHARSV(cnptr, 0, -1, outfile);
4134                putc('\n', outfile);
4135                }
4136    
4137              cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4138            }            }
4139    
4140          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4141            {            {
4142            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
4143              {              {
4144                int rc;
4145              const char *substring;              const char *substring;
4146              int rc = pcre_get_substring((char *)bptr, use_offsets, count,              PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
               i, &substring);  
4147              if (rc < 0)              if (rc < 0)
4148                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
4149              else              else
4150                {                {
4151                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG ", i);
4152                pcre_free_substring(substring);                PCHARSV(substring, 0, rc, outfile);
4153                  fprintf(outfile, " (%d)\n", rc);
4154                  PCRE_FREE_SUBSTRING(substring);
4155                }                }
4156              }              }
4157            }            }
4158    
4159          for (getnamesptr = getnames;          gnptr = getnames;
4160               *getnamesptr != 0;          for (;;)
              getnamesptr += (int)strlen((char*)getnamesptr) + 1)  
4161            {            {
4162              int rc;
4163            const char *substring;            const char *substring;
4164            int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,  
4165              count, (char *)getnamesptr, &substring);            if (use_pcre16)
4166                {
4167                if (*(pcre_uint16 *)gnptr == 0) break;
4168                }
4169              else
4170                {
4171                if (*(pcre_uint8 *)gnptr == 0) break;
4172                }
4173    
4174              PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4175                gnptr, &substring);
4176            if (rc < 0)            if (rc < 0)
4177              fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);              {
4178                fprintf(outfile, "get substring ");
4179                PCHARSV(gnptr, 0, -1, outfile);
4180                fprintf(outfile, " failed %d\n", rc);
4181                }
4182            else            else
4183              {              {
4184              fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);              fprintf(outfile, "  G ");
4185              pcre_free_substring(substring);              PCHARSV(substring, 0, rc, outfile);
4186                fprintf(outfile, " (%d) ", rc);
4187                PCHARSV(gnptr, 0, -1, outfile);
4188                PCRE_FREE_SUBSTRING(substring);
4189                putc('\n', outfile);
4190              }              }
4191    
4192              gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4193            }            }
4194    
4195          if (getlist)          if (getlist)
4196            {            {
4197              int rc;
4198            const char **stringlist;            const char **stringlist;
4199            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,            PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
             &stringlist);  
4200            if (rc < 0)            if (rc < 0)
4201              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
4202            else            else
4203              {              {
4204              for (i = 0; i < count; i++)              for (i = 0; i < count; i++)
4205                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                {
4206                  fprintf(outfile, "%2dL ", i);
4207                  PCHARSV(stringlist[i], 0, -1, outfile);
4208                  putc('\n', outfile);
4209                  }
4210              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
4211                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
4212              pcre_free_substring_list(stringlist);              PCRE_FREE_SUBSTRING_LIST(stringlist);
4213              }              }
4214            }            }
4215          }          }
# Line 2956  while (!done) Line 4219  while (!done)
4219        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
4220          {          {
4221          if (markptr == NULL) fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
4222            else fprintf(outfile, "Partial match, mark=%s", markptr);          else
4223              {
4224              fprintf(outfile, "Partial match, mark=");
4225              PCHARSV(markptr, 0, -1, outfile);
4226              }
4227          if (use_size_offsets > 1)          if (use_size_offsets > 1)
4228            {            {
4229            fprintf(outfile, ": ");            fprintf(outfile, ": ");
4230            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],            PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4231              outfile);              outfile);
4232            }            }
4233            if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4234          fprintf(outfile, "\n");          fprintf(outfile, "\n");
4235          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
4236          }          }
# Line 2977  while (!done) Line 4245  while (!done)
4245        terminated by CRLF, an advance of one character just passes the \r,        terminated by CRLF, an advance of one character just passes the \r,
4246        whereas we should prefer the longer newline sequence, as does the code in        whereas we should prefer the longer newline sequence, as does the code in
4247        pcre_exec(). Fudge the offset value to achieve this. We check for a        pcre_exec(). Fudge the offset value to achieve this. We check for a
4248        newline setting in the pattern; if none was set, use pcre_config() to        newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4249        find the default.        find the default.
4250    
4251        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
# Line 2988  while (!done) Line 4256  while (!done)
4256          if (g_notempty != 0)          if (g_notempty != 0)
4257            {            {
4258            int onechar = 1;            int onechar = 1;
4259            unsigned int obits = ((real_pcre *)re)->options;            unsigned int obits = ((REAL_PCRE *)re)->options;
4260            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
4261            if ((obits & PCRE_NEWLINE_BITS) == 0)            if ((obits & PCRE_NEWLINE_BITS) == 0)
4262              {              {
4263              int d;              int d;
4264              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4265              /* Note that these values are always the ASCII ones, even in              /* Note that these values are always the ASCII ones, even in
4266              EBCDIC environments. CR = 13, NL = 10. */              EBCDIC environments. CR = 13, NL = 10. */
4267              obits = (d == 13)? PCRE_NEWLINE_CR :              obits = (d == 13)? PCRE_NEWLINE_CR :
# Line 3007  while (!done) Line 4275  while (!done)
4275                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4276                &&                &&
4277                start_offset < len - 1 &&                start_offset < len - 1 &&
4278                bptr[start_offset] == '\r' &&  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4279                bptr[start_offset+1] == '\n')                (use_pcre16?
4280                       ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4281                    && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4282                  :
4283                       bptr[start_offset] == '\r'
4284                    && bptr[start_offset + 1] == '\n')
4285    #elif defined SUPPORT_PCRE16
4286                     ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4287                  && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4288    #else
4289                     bptr[start_offset] == '\r'
4290                  && bptr[start_offset + 1] == '\n'
4291    #endif
4292                  )
4293              onechar++;              onechar++;
4294            else if (use_utf8)            else if (use_utf)
4295              {              {
4296              while (start_offset + onechar < len)              while (start_offset + onechar < len)
4297                {                {
# Line 3027  while (!done) Line 4308  while (!done)
4308              case PCRE_ERROR_NOMATCH:              case PCRE_ERROR_NOMATCH:
4309              if (gmatched == 0)              if (gmatched == 0)
4310                {                {
4311                if (markptr == NULL) fprintf(outfile, "No match\n");                if (markptr == NULL)
4312                  else fprintf(outfile, "No match, mark = %s\n", markptr);                  {
4313                    fprintf(outfile, "No match");
4314                    }
4315                  else
4316                    {
4317                    fprintf(outfile, "No match, mark = ");
4318                    PCHARSV(markptr, 0, -1, outfile);
4319                    }
4320                  if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4321                  putc('\n', outfile);
4322                }                }
4323              break;              break;
4324    
4325              case PCRE_ERROR_BADUTF8:              case PCRE_ERROR_BADUTF8:
4326              case PCRE_ERROR_SHORTUTF8:              case PCRE_ERROR_SHORTUTF8:
4327              fprintf(outfile, "Error %d (%s UTF-8 string)", count,              fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4328                (count == PCRE_ERROR_BADUTF8)? "bad" : "short");                (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4329                  use_pcre16? "16" : "8");
4330              if (use_size_offsets >= 2)              if (use_size_offsets >= 2)
4331                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4332                  use_offsets[1]);                  use_offsets[1]);
4333              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4334              break;              break;
4335    
4336                case PCRE_ERROR_BADUTF8_OFFSET:
4337                fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4338                  use_pcre16? "16" : "8");
4339                break;
4340    
4341              default:              default:
4342              if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))              if (count < 0 &&
4343                    (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4344                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4345              else              else
4346                fprintf(outfile, "Error %d (Unexpected value)\n", count);                fprintf(outfile, "Error %d (Unexpected value)\n", count);
# Line 3081  while (!done) Line 4378  while (!done)
4378    
4379        else        else
4380          {          {
4381          bptr += use_offsets[1];          bptr += use_offsets[1] * CHAR_SIZE;
4382          len -= use_offsets[1];          len -= use_offsets[1];
4383          }          }
4384        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
# Line 3096  while (!done) Line 4393  while (!done)
4393  #endif  #endif
4394    
4395    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
4396    if (extra != NULL) pcre_free_study(extra);    if (extra != NULL)
4397        {
4398        PCRE_FREE_STUDY(extra);
4399        }
4400    if (locale_set)    if (locale_set)
4401      {      {
4402      new_free((void *)tables);      new_free((void *)tables);
4403      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
4404      locale_set = 0;      locale_set = 0;
4405      }      }
4406    if (jit_stack != NULL)    if (jit_stack != NULL)
4407      {      {
4408      pcre_jit_stack_free(jit_stack);      PCRE_JIT_STACK_FREE(jit_stack);
4409      jit_stack = NULL;      jit_stack = NULL;
4410      }      }
4411    }    }
4412    
4413  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
# Line 3122  free(dbuffer); Line 4422  free(dbuffer);
4422  free(pbuffer);  free(pbuffer);
4423  free(offsets);  free(offsets);
4424    
4425    #ifdef SUPPORT_PCRE16
4426    if (buffer16 != NULL) free(buffer16);
4427    #endif
4428    
4429  return yield;  return yield;
4430  }  }
4431    

Legend:
Removed from v.675  
changed lines
  Added in v.926

  ViewVC Help
Powered by ViewVC 1.1.5