/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 689 by ph10, Fri Sep 9 10:34:57 2011 UTC revision 841 by zherczeg, Sat Dec 31 07:04:43 2011 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
51  #include "config.h"  #include "config.h"
# Line 105  here before pcre_internal.h so that the Line 116  here before pcre_internal.h so that the
116  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
117    
118  #include "pcre.h"  #include "pcre.h"
119    
120    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121    /* Configure internal macros to 16 bit mode. */
122    #define COMPILE_PCRE16
123    #endif
124    
125  #include "pcre_internal.h"  #include "pcre_internal.h"
126    
127    /* The pcre_printint() function, which prints the internal form of a compiled
128    regex, is held in a separate file so that (a) it can be compiled in either
129    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130    when that is compiled in debug mode. */
131    
132    #ifdef SUPPORT_PCRE8
133    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134    #endif
135    #ifdef SUPPORT_PCRE16
136    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137    #endif
138    
139  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
140  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source file here, changing the names of the
141  external symbols to prevent clashes. */  external symbols to prevent clashes. */
142    
143  #define _pcre_ucp_gentype      ucp_gentype  #define PCRE_INCLUDED
144  #define _pcre_ucp_typerange    ucp_typerange  #undef PRIV
145  #define _pcre_utf8_table1      utf8_table1  #define PRIV(name) name
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utf8_char_sizes  utf8_char_sizes  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
146    
147  #include "pcre_tables.c"  #include "pcre_tables.c"
148    
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
   
149  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
150  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
151  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
152  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
153  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
154    
155  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #ifdef EBCDIC
156    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157    #else
158    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159    #endif
160    
161    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162    
163    /* Posix support is disabled in 16 bit only mode. */
164    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165    #define NOPOSIX
166    #endif
167    
168  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
169  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 150  Makefile. */ Line 173  Makefile. */
173  #include "pcreposix.h"  #include "pcreposix.h"
174  #endif  #endif
175    
176  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
177  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
180  UTF8 support if PCRE is built without it. */  
181    #ifndef SUPPORT_UTF
182  #ifndef SUPPORT_UTF8  #ifndef NOUTF
183  #ifndef NOUTF8  #define NOUTF
 #define NOUTF8  
184  #endif  #endif
185  #endif  #endif
186    
187    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189    only from one place and is handled differently). I couldn't dream up any way of
190    using a single macro to do this in a generic way, because of the many different
191    argument requirements. We know that at least one of SUPPORT_PCRE8 and
192    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193    use these in the definitions of generic macros.
194    
195    **** Special note about the PCHARSxxx macros: the address of the string to be
196    printed is always given as two arguments: a base address followed by an offset.
197    The base address is cast to the correct data size for 8 or 16 bit data; the
198    offset is in units of this size. If the string were given as base+offset in one
199    argument, the casting might be incorrectly applied. */
200    
201    #ifdef SUPPORT_PCRE8
202    
203    #define PCHARS8(lv, p, offset, len, f) \
204      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205    
206    #define PCHARSV8(p, offset, len, f) \
207      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208    
209    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210      p = read_capture_name8(p, cn8, re)
211    
212    #define SET_PCRE_CALLOUT8(callout) \
213      pcre_callout = callout
214    
215    #define STRLEN8(p) ((int)strlen((char *)p))
216    
217    
218    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
219      re = pcre_compile((char *)pat, options, error, erroffset, tables)
220    
221    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
222        namesptr, cbuffer, size) \
223      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
224        (char *)namesptr, cbuffer, size)
225    
226    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
227      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
228    
229    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
230        offsets, size_offsets, workspace, size_workspace) \
231      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
232        offsets, size_offsets, workspace, size_workspace)
233    
234    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
235        offsets, size_offsets) \
236      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
237        offsets, size_offsets)
238    
239    #define PCRE_FREE_STUDY8(extra) \
240      pcre_free_study(extra)
241    
242    #define PCRE_FREE_SUBSTRING8(substring) \
243      pcre_free_substring(substring)
244    
245    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
246      pcre_free_substring_list(listptr)
247    
248    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
249        getnamesptr, subsptr) \
250      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
251        (char *)getnamesptr, subsptr)
252    
253    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
254      n = pcre_get_stringnumber(re, (char *)ptr)
255    
256    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
257      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
258    
259    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
260      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
261    
262    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
263      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
264    
265    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
266      pcre_printint(re, outfile, debug_lengths)
267    
268    #define PCRE_STUDY8(extra, re, options, error) \
269      extra = pcre_study(re, options, error)
270    
271    #endif /* SUPPORT_PCRE8 */
272    
273    /* -----------------------------------------------------------*/
274    
275    #ifdef SUPPORT_PCRE16
276    
277    #define PCHARS16(lv, p, offset, len, f) \
278      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
279    
280    #define PCHARSV16(p, offset, len, f) \
281      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
282    
283    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
284      p = read_capture_name16(p, cn16, re)
285    
286    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
287    
288    #define SET_PCRE_CALLOUT16(callout) \
289      pcre16_callout = callout
290    
291    
292    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
293      re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
294    
295    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
296        namesptr, cbuffer, size) \
297      rc = pcre16_copy_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
298        (PCRE_SPTR16)namesptr, (PCRE_SCHAR16 *)cbuffer, size/2)
299    
300    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
301      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
302        (PCRE_SCHAR16 *)cbuffer, size/2)
303    
304    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
305        offsets, size_offsets, workspace, size_workspace) \
306      count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
307        options, offsets, size_offsets, workspace, size_workspace)
308    
309    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
310        offsets, size_offsets) \
311      count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
312        options, offsets, size_offsets)
313    
314    #define PCRE_FREE_STUDY16(extra) \
315      pcre16_free_study(extra)
316    
317    #define PCRE_FREE_SUBSTRING16(substring) \
318      pcre16_free_substring((PCRE_SPTR16)substring)
319    
320    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
321      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
322    
323    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
324        getnamesptr, subsptr) \
325      rc = pcre16_get_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
326        (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
327    
328    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
329      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
330    
331    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
332      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
333        (PCRE_SPTR16 *)(void*)subsptr)
334    
335    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
336      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
337        (PCRE_SPTR16 **)(void*)listptr)
338    
339    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
340      rc = pcre16_pattern_to_host_byte_order(re, extra, tables)
341    
342    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
343      pcre16_printint(re, outfile, debug_lengths)
344    
345    #define PCRE_STUDY16(extra, re, options, error) \
346      extra = pcre16_study(re, options, error)
347    
348    #endif /* SUPPORT_PCRE16 */
349    
350    
351    /* ----- Both modes are supported; a runtime test is needed, except for
352    pcre_config(), and the JIT stack functions, when it doesn't matter which
353    version is called. ----- */
354    
355    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
356    
357    #define CHAR_SIZE (use_pcre16? 2:1)
358    
359    #define PCHARS(lv, p, offset, len, f) \
360      if (use_pcre16) \
361        PCHARS16(lv, p, offset, len, f); \
362      else \
363        PCHARS8(lv, p, offset, len, f)
364    
365    #define PCHARSV(p, offset, len, f) \
366      if (use_pcre16) \
367        PCHARSV16(p, offset, len, f); \
368      else \
369        PCHARSV8(p, offset, len, f)
370    
371    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
372      if (use_pcre16) \
373        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
374      else \
375        READ_CAPTURE_NAME8(p, cn8, cn16, re)
376    
377    #define SET_PCRE_CALLOUT(callout) \
378      if (use_pcre16) \
379        SET_PCRE_CALLOUT16(callout); \
380      else \
381        SET_PCRE_CALLOUT8(callout)
382    
383    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
384    
385    #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
386    
387    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
388      if (use_pcre16) \
389        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
390      else \
391        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
392    
393    #define PCRE_CONFIG pcre_config
394    
395    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
396        namesptr, cbuffer, size) \
397      if (use_pcre16) \
398        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
399          namesptr, cbuffer, size); \
400      else \
401        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
402          namesptr, cbuffer, size)
403    
404    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
405      if (use_pcre16) \
406        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
407      else \
408        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
409    
410    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
411        offsets, size_offsets, workspace, size_workspace) \
412      if (use_pcre16) \
413        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
414          offsets, size_offsets, workspace, size_workspace); \
415      else \
416        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
417          offsets, size_offsets, workspace, size_workspace)
418    
419    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
420        offsets, size_offsets) \
421      if (use_pcre16) \
422        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
423          offsets, size_offsets); \
424      else \
425        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
426          offsets, size_offsets)
427    
428    #define PCRE_FREE_STUDY(extra) \
429      if (use_pcre16) \
430        PCRE_FREE_STUDY16(extra); \
431      else \
432        PCRE_FREE_STUDY8(extra)
433    
434    #define PCRE_FREE_SUBSTRING(substring) \
435      if (use_pcre16) \
436        PCRE_FREE_SUBSTRING16(substring); \
437      else \
438        PCRE_FREE_SUBSTRING8(substring)
439    
440    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
441      if (use_pcre16) \
442        PCRE_FREE_SUBSTRING_LIST16(listptr); \
443      else \
444        PCRE_FREE_SUBSTRING_LIST8(listptr)
445    
446    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
447        getnamesptr, subsptr) \
448      if (use_pcre16) \
449        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
450          getnamesptr, subsptr); \
451      else \
452        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
453          getnamesptr, subsptr)
454    
455    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
456      if (use_pcre16) \
457        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
458      else \
459        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
460    
461    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
462      if (use_pcre16) \
463        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
464      else \
465        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
466    
467    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
468      if (use_pcre16) \
469        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
470      else \
471        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
472    
473    #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
474    #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
475    
476    #define PCRE_MAKETABLES \
477      (use_pcre16? pcre16_maketables() : pcre_maketables())
478    
479    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
480      if (use_pcre16) \
481        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
482      else \
483        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
484    
485    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
486      if (use_pcre16) \
487        PCRE_PRINTINT16(re, outfile, debug_lengths); \
488      else \
489        PCRE_PRINTINT8(re, outfile, debug_lengths)
490    
491    #define PCRE_STUDY(extra, re, options, error) \
492      if (use_pcre16) \
493        PCRE_STUDY16(extra, re, options, error); \
494      else \
495        PCRE_STUDY8(extra, re, options, error)
496    
497    /* ----- Only 8-bit mode is supported ----- */
498    
499    #elif defined SUPPORT_PCRE8
500    #define CHAR_SIZE                 1
501    #define PCHARS                    PCHARS8
502    #define PCHARSV                   PCHARSV8
503    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
504    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
505    #define STRLEN                    STRLEN8
506    #define PCRE_ASSIGN_JIT_STACK     pcre_assign_jit_stack
507    #define PCRE_COMPILE              PCRE_COMPILE8
508    #define PCRE_CONFIG               pcre_config
509    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
510    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
511    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
512    #define PCRE_EXEC                 PCRE_EXEC8
513    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
514    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
515    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
516    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
517    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
518    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
519    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
520    #define PCRE_JIT_STACK_ALLOC      pcre_jit_stack_alloc
521    #define PCRE_JIT_STACK_FREE       pcre_jit_stack_free
522    #define PCRE_MAKETABLES           pcre_maketables()
523    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
524    #define PCRE_PRINTINT             PCRE_PRINTINT8
525    #define PCRE_STUDY                PCRE_STUDY8
526    
527    /* ----- Only 16-bit mode is supported ----- */
528    
529    #else
530    #define CHAR_SIZE                 2
531    #define PCHARS                    PCHARS16
532    #define PCHARSV                   PCHARSV16
533    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
534    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
535    #define STRLEN                    STRLEN16
536    #define PCRE_ASSIGN_JIT_STACK     pcre16_assign_jit_stack
537    #define PCRE_COMPILE              PCRE_COMPILE16
538    #define PCRE_CONFIG               pcre16_config
539    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
540    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
541    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
542    #define PCRE_EXEC                 PCRE_EXEC16
543    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
544    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
545    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
546    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
547    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
548    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
549    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
550    #define PCRE_JIT_STACK_ALLOC      pcre16_jit_stack_alloc
551    #define PCRE_JIT_STACK_FREE       pcre16_jit_stack_free
552    #define PCRE_MAKETABLES           pcre16_maketables()
553    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
554    #define PCRE_PRINTINT             PCRE_PRINTINT16
555    #define PCRE_STUDY                PCRE_STUDY16
556    #endif
557    
558    /* ----- End of mode-specific function call macros ----- */
559    
560    
561  /* Other parameters */  /* Other parameters */
562    
# Line 189  static int debug_lengths; Line 584  static int debug_lengths;
584  static int first_callout;  static int first_callout;
585  static int locale_set = 0;  static int locale_set = 0;
586  static int show_malloc;  static int show_malloc;
587  static int use_utf8;  static int use_utf;
588  static size_t gotten_store;  static size_t gotten_store;
589    static size_t first_gotten_store = 0;
590  static const unsigned char *last_callout_mark = NULL;  static const unsigned char *last_callout_mark = NULL;
591    
592  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
593    
594  static int buffer_size = 50000;  static int buffer_size = 50000;
595  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
596  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
597  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
598    
599    /* Another buffer is needed translation to 16-bit character strings. It will
600    obtained and extended as required. */
601    
602    #ifdef SUPPORT_PCRE16
603    static int buffer16_size = 0;
604    static pcre_uint16 *buffer16 = NULL;
605    
606    #ifdef SUPPORT_PCRE8
607    
608    /* We need the table of operator lengths that is used for 16-bit compiling, in
609    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
610    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
611    appropriately for the 16-bit world. Just as a safety check, make sure that
612    COMPILE_PCRE16 is *not* set. */
613    
614    #ifdef COMPILE_PCRE16
615    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
616    #endif
617    
618    #if LINK_SIZE == 2
619    #undef LINK_SIZE
620    #define LINK_SIZE 1
621    #elif LINK_SIZE == 3 || LINK_SIZE == 4
622    #undef LINK_SIZE
623    #define LINK_SIZE 2
624    #else
625    #error LINK_SIZE must be either 2, 3, or 4
626    #endif
627    
628    #undef IMM2_SIZE
629    #define IMM2_SIZE 1
630    
631    #endif /* SUPPORT_PCRE8 */
632    
633    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
634    #endif  /* SUPPORT_PCRE16 */
635    
636    /* If we have 8-bit support, default use_pcre16 to false; if there is also
637    16-bit support, it can be changed by an option. If there is no 8-bit support,
638    there must be 16-bit support, so default it to 1. */
639    
640    #ifdef SUPPORT_PCRE8
641    static int use_pcre16 = 0;
642    #else
643    static int use_pcre16 = 1;
644    #endif
645    
646  /* Textual explanations for runtime error codes */  /* Textual explanations for runtime error codes */
647    
# Line 213  static const char *errtexts[] = { Line 656  static const char *errtexts[] = {
656    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
657    "match limit exceeded",    "match limit exceeded",
658    "callout error code",    "callout error code",
659    NULL,  /* BADUTF8 is handled specially */    NULL,  /* BADUTF8/16 is handled specially */
660    "bad UTF-8 offset",    NULL,  /* BADUTF8/16 offset is handled specially */
661    NULL,  /* PARTIAL is handled specially */    NULL,  /* PARTIAL is handled specially */
662    "not used - internal error",    "not used - internal error",
663    "internal error - pattern overwritten?",    "internal error - pattern overwritten?",
# Line 228  static const char *errtexts[] = { Line 671  static const char *errtexts[] = {
671    "not used - internal error",    "not used - internal error",
672    "invalid combination of newline options",    "invalid combination of newline options",
673    "bad offset value",    "bad offset value",
674    NULL,  /* SHORTUTF8 is handled specially */    NULL,  /* SHORTUTF8/16 is handled specially */
675    "nested recursion at the same subject position",    "nested recursion at the same subject position",
676    "JIT stack limit reached"    "JIT stack limit reached",
677      "pattern compiled in wrong mode: 8-bit/16-bit error"
678  };  };
679    
680    
# Line 246  the L (locale) option also adjusts the t Line 690  the L (locale) option also adjusts the t
690  /* This is the set of tables distributed as default with PCRE. It recognizes  /* This is the set of tables distributed as default with PCRE. It recognizes
691  only ASCII characters. */  only ASCII characters. */
692    
693  static const unsigned char tables0[] = {  static const pcre_uint8 tables0[] = {
694    
695  /* This table is a lower casing table. */  /* This table is a lower casing table. */
696    
# Line 419  graph, print, punct, and cntrl. Other cl Line 863  graph, print, punct, and cntrl. Other cl
863  be at least an approximation of ISO 8859. In particular, there are characters  be at least an approximation of ISO 8859. In particular, there are characters
864  greater than 128 that are marked as spaces, letters, etc. */  greater than 128 that are marked as spaces, letters, etc. */
865    
866  static const unsigned char tables1[] = {  static const pcre_uint8 tables1[] = {
867  0,1,2,3,4,5,6,7,  0,1,2,3,4,5,6,7,
868  8,9,10,11,12,13,14,15,  8,9,10,11,12,13,14,15,
869  16,17,18,19,20,21,22,23,  16,17,18,19,20,21,22,23,
# Line 592  return (pcre_jit_stack *)arg; Line 1036  return (pcre_jit_stack *)arg;
1036  }  }
1037    
1038    
1039    #if !defined NOUTF || defined SUPPORT_PCRE16
1040    /*************************************************
1041    *            Convert UTF-8 string to value       *
1042    *************************************************/
1043    
1044    /* This function takes one or more bytes that represents a UTF-8 character,
1045    and returns the value of the character.
1046    
1047    Argument:
1048      utf8bytes   a pointer to the byte vector
1049      vptr        a pointer to an int to receive the value
1050    
1051    Returns:      >  0 => the number of bytes consumed
1052                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1053    */
1054    
1055    static int
1056    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1057    {
1058    int c = *utf8bytes++;
1059    int d = c;
1060    int i, j, s;
1061    
1062    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1063      {
1064      if ((d & 0x80) == 0) break;
1065      d <<= 1;
1066      }
1067    
1068    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1069    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1070    
1071    /* i now has a value in the range 1-5 */
1072    
1073    s = 6*i;
1074    d = (c & utf8_table3[i]) << s;
1075    
1076    for (j = 0; j < i; j++)
1077      {
1078      c = *utf8bytes++;
1079      if ((c & 0xc0) != 0x80) return -(j+1);
1080      s -= 6;
1081      d |= (c & 0x3f) << s;
1082      }
1083    
1084    /* Check that encoding was the correct unique one */
1085    
1086    for (j = 0; j < utf8_table1_size; j++)
1087      if (d <= utf8_table1[j]) break;
1088    if (j != i) return -(i+1);
1089    
1090    /* Valid value */
1091    
1092    *vptr = d;
1093    return i+1;
1094    }
1095    #endif /* NOUTF || SUPPORT_PCRE16 */
1096    
1097    
1098    
1099    #if !defined NOUTF || defined SUPPORT_PCRE16
1100    /*************************************************
1101    *       Convert character value to UTF-8         *
1102    *************************************************/
1103    
1104    /* This function takes an integer value in the range 0 - 0x7fffffff
1105    and encodes it as a UTF-8 character in 0 to 6 bytes.
1106    
1107    Arguments:
1108      cvalue     the character value
1109      utf8bytes  pointer to buffer for result - at least 6 bytes long
1110    
1111    Returns:     number of characters placed in the buffer
1112    */
1113    
1114    static int
1115    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1116    {
1117    register int i, j;
1118    for (i = 0; i < utf8_table1_size; i++)
1119      if (cvalue <= utf8_table1[i]) break;
1120    utf8bytes += i;
1121    for (j = i; j > 0; j--)
1122     {
1123     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1124     cvalue >>= 6;
1125     }
1126    *utf8bytes = utf8_table2[i] | cvalue;
1127    return i + 1;
1128    }
1129    #endif /* NOUTF || SUPPORT_PCRE16 */
1130    
1131    
1132    
1133    #ifdef SUPPORT_PCRE16
1134    /*************************************************
1135    *         Convert a string to 16-bit             *
1136    *************************************************/
1137    
1138    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1139    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1140    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1141    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1142    result is always left in buffer16.
1143    
1144    Note that this function does not object to surrogate values. This is
1145    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1146    for the purpose of testing that they are correctly faulted.
1147    
1148    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1149    in UTF-8 so that values greater than 255 can be handled.
1150    
1151    Arguments:
1152      data       TRUE if converting a data line; FALSE for a regex
1153      p          points to a byte string
1154      utf        true if UTF-8 (to be converted to UTF-16)
1155      len        number of bytes in the string (excluding trailing zero)
1156    
1157    Returns:     number of 16-bit data items used (excluding trailing zero)
1158                 OR -1 if a UTF-8 string is malformed
1159                 OR -2 if a value > 0x10ffff is encountered
1160                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1161    */
1162    
1163    static int
1164    to16(int data, pcre_uint8 *p, int utf, int len)
1165    {
1166    pcre_uint16 *pp;
1167    
1168    if (buffer16_size < 2*len + 2)
1169      {
1170      if (buffer16 != NULL) free(buffer16);
1171      buffer16_size = 2*len + 2;
1172      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1173      if (buffer16 == NULL)
1174        {
1175        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1176        exit(1);
1177        }
1178      }
1179    
1180    pp = buffer16;
1181    
1182    if (!utf && !data)
1183      {
1184      while (len-- > 0) *pp++ = *p++;
1185      }
1186    
1187    else
1188      {
1189      int c = 0;
1190      while (len > 0)
1191        {
1192        int chlen = utf82ord(p, &c);
1193        if (chlen <= 0) return -1;
1194        if (c > 0x10ffff) return -2;
1195        p += chlen;
1196        len -= chlen;
1197        if (c < 0x10000) *pp++ = c; else
1198          {
1199          if (!utf) return -3;
1200          c -= 0x10000;
1201          *pp++ = 0xD800 | (c >> 10);
1202          *pp++ = 0xDC00 | (c & 0x3ff);
1203          }
1204        }
1205      }
1206    
1207    *pp = 0;
1208    return pp - buffer16;
1209    }
1210    #endif
1211    
1212    
1213  /*************************************************  /*************************************************
1214  *        Read or extend an input line            *  *        Read or extend an input line            *
1215  *************************************************/  *************************************************/
# Line 615  Returns:       pointer to the start of n Line 1233  Returns:       pointer to the start of n
1233                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1234  */  */
1235    
1236  static uschar *  static pcre_uint8 *
1237  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1238  {  {
1239  uschar *here = start;  pcre_uint8 *here = start;
1240    
1241  for (;;)  for (;;)
1242    {    {
# Line 665  for (;;) Line 1283  for (;;)
1283    else    else
1284      {      {
1285      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1286      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1287      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1288      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1289    
1290      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1291        {        {
# Line 698  return NULL;  /* Control never gets here Line 1316  return NULL;  /* Control never gets here
1316    
1317    
1318    
   
   
   
   
1319  /*************************************************  /*************************************************
1320  *          Read number from string               *  *          Read number from string               *
1321  *************************************************/  *************************************************/
# Line 718  Returns:        the unsigned long Line 1332  Returns:        the unsigned long
1332  */  */
1333    
1334  static int  static int
1335  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1336  {  {
1337  int result = 0;  int result = 0;
1338  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 729  return(result); Line 1343  return(result);
1343    
1344    
1345    
   
1346  /*************************************************  /*************************************************
1347  *            Convert UTF-8 string to value       *  *             Print one character                *
1348  *************************************************/  *************************************************/
1349    
1350  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
   
 Argument:  
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
   
 Returns:      >  0 => the number of bytes consumed  
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
1351    
1352  static int  static int pchar(int c, FILE *f)
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1353  {  {
1354  int c = *utf8bytes++;  if (PRINTOK(c))
1355  int d = c;    {
1356  int i, j, s;    if (f != NULL) fprintf(f, "%c", c);
1357      return 1;
1358      }
1359    
1360  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  if (c < 0x100)
1361    {    {
1362    if ((d & 0x80) == 0) break;    if (use_utf)
1363    d <<= 1;      {
1364        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1365        return 6;
1366        }
1367      else
1368        {
1369        if (f != NULL) fprintf(f, "\\x%02x", c);
1370        return 4;
1371        }
1372    }    }
1373    
1374  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1375  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  return (c <= 0x000000ff)? 6 :
1376           (c <= 0x00000fff)? 7 :
1377           (c <= 0x0000ffff)? 8 :
1378           (c <= 0x000fffff)? 9 : 10;
1379    }
1380    
 /* i now has a value in the range 1-5 */  
1381    
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
1382    
1383  for (j = 0; j < i; j++)  #ifdef SUPPORT_PCRE8
1384    {  /*************************************************
1385    c = *utf8bytes++;  *         Print 8-bit character string           *
1386    if ((c & 0xc0) != 0x80) return -(j+1);  *************************************************/
   s -= 6;  
   d |= (c & 0x3f) << s;  
   }  
1387    
1388  /* Check that encoding was the correct unique one */  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1389    If handed a NULL file, just counts chars without printing. */
1390    
1391  for (j = 0; j < utf8_table1_size; j++)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1392    if (d <= utf8_table1[j]) break;  {
1393  if (j != i) return -(i+1);  int c = 0;
1394    int yield = 0;
1395    
1396  /* Valid value */  if (length < 0)
1397      length = strlen((char *)p);
1398    
1399  *vptr = d;  while (length-- > 0)
1400  return i+1;    {
1401  }  #if !defined NOUTF
1402      if (use_utf)
1403        {
1404        int rc = utf82ord(p, &c);
1405        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1406          {
1407          length -= rc - 1;
1408          p += rc;
1409          yield += pchar(c, f);
1410          continue;
1411          }
1412        }
1413    #endif
1414      c = *p++;
1415      yield += pchar(c, f);
1416      }
1417    
1418    return yield;
1419    }
1420  #endif  #endif
1421    
1422    
1423    
1424    #ifdef SUPPORT_PCRE16
1425  /*************************************************  /*************************************************
1426  *       Convert character value to UTF-8         *  *    Find length of 0-terminated 16-bit string   *
1427  *************************************************/  *************************************************/
1428    
1429  /* This function takes an integer value in the range 0 - 0x7fffffff  static int strlen16(PCRE_SPTR16 p)
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 ord2utf8(int cvalue, uschar *utf8bytes)  
1430  {  {
1431  register int i, j;  int len = 0;
1432  for (i = 0; i < utf8_table1_size; i++)  while (*p++ != 0) len++;
1433    if (cvalue <= utf8_table1[i]) break;  return len;
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1434  }  }
1435    #endif  /* SUPPORT_PCRE16 */
 #endif  
   
1436    
1437    
1438    #ifdef SUPPORT_PCRE16
1439  /*************************************************  /*************************************************
1440  *             Print character string             *  *           Print 16-bit character string        *
1441  *************************************************/  *************************************************/
1442    
1443  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1444  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1445    
1446  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1447  {  {
 int c = 0;  
1448  int yield = 0;  int yield = 0;
1449    
1450    if (length < 0)
1451      length = strlen16(p);
1452    
1453  while (length-- > 0)  while (length-- > 0)
1454    {    {
1455  #if !defined NOUTF8    int c = *p++ & 0xffff;
1456    if (use_utf8)  #if !defined NOUTF
1457      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1458      {      {
1459      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
1460        if (d >= 0xDC00 && d < 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
1461        {        {
1462        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1463        p += rc;        length--;
1464        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
1465        }        }
1466      }      }
1467  #endif  #endif
1468      yield += pchar(c, f);
1469      }
1470    
1471     /* Not UTF-8, or malformed UTF-8  */  return yield;
1472    }
1473    #endif  /* SUPPORT_PCRE16 */
1474    
1475    c = *p++;  
1476    if (PRINTHEX(c))  
1477      {  #ifdef SUPPORT_PCRE8
1478      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1479      yield++;  *     Read a capture name (8-bit) and check it   *
1480      }  *************************************************/
1481    else  
1482      {  static pcre_uint8 *
1483      if (f != NULL) fprintf(f, "\\x%02x", c);  read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1484      yield += 4;  {
1485      }  pcre_uint8 *npp = *pp;
1486    while (isalnum(*p)) *npp++ = *p++;
1487    *npp++ = 0;
1488    *npp = 0;
1489    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1490      {
1491      fprintf(outfile, "no parentheses with name \"");
1492      PCHARSV(*pp, 0, -1, outfile);
1493      fprintf(outfile, "\"\n");
1494    }    }
1495    
1496  return yield;  *pp = npp;
1497    return p;
1498  }  }
1499    #endif  /* SUPPORT_PCRE8 */
1500    
1501    
1502    
1503    #ifdef SUPPORT_PCRE16
1504    /*************************************************
1505    *     Read a capture name (16-bit) and check it  *
1506    *************************************************/
1507    
1508    /* Note that the text being read is 8-bit. */
1509    
1510    static pcre_uint8 *
1511    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1512    {
1513    pcre_uint16 *npp = *pp;
1514    while (isalnum(*p)) *npp++ = *p++;
1515    *npp++ = 0;
1516    *npp = 0;
1517    if (pcre16_get_stringnumber(re, (PCRE_SPTR16)(*pp)) < 0)
1518      {
1519      fprintf(outfile, "no parentheses with name \"");
1520      PCHARSV(*pp, 0, -1, outfile);
1521      fprintf(outfile, "\"\n");
1522      }
1523    *pp = npp;
1524    return p;
1525    }
1526    #endif  /* SUPPORT_PCRE16 */
1527    
1528    
1529    
# Line 916  if (callout_extra) Line 1552  if (callout_extra)
1552      else      else
1553        {        {
1554        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1555        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1556          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1557        fprintf(f, "\n");        fprintf(f, "\n");
1558        }        }
# Line 929  printed lengths of the substrings. */ Line 1565  printed lengths of the substrings. */
1565    
1566  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1567    
1568  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1569  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1570    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1571    
1572  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1573    
1574  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1575    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1576    
1577  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 974  first_callout = 0; Line 1610  first_callout = 0;
1610    
1611  if (cb->mark != last_callout_mark)  if (cb->mark != last_callout_mark)
1612    {    {
1613    fprintf(outfile, "Latest Mark: %s\n",    if (cb->mark == NULL)
1614      (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));      fprintf(outfile, "Latest Mark: <unset>\n");
1615      else
1616        {
1617        fprintf(outfile, "Latest Mark: ");
1618        PCHARSV(cb->mark, 0, -1, outfile);
1619        putc('\n', outfile);
1620        }
1621    last_callout_mark = cb->mark;    last_callout_mark = cb->mark;
1622    }    }
1623    
# Line 999  return (cb->callout_number != callout_fa Line 1641  return (cb->callout_number != callout_fa
1641  *************************************************/  *************************************************/
1642    
1643  /* Alternative malloc function, to test functionality and save the size of a  /* Alternative malloc function, to test functionality and save the size of a
1644  compiled re. The show_malloc variable is set only during matching. */  compiled re, which is the first store request that pcre_compile() makes. The
1645    show_malloc variable is set only during matching. */
1646    
1647  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1648  {  {
1649  void *block = malloc(size);  void *block = malloc(size);
1650  gotten_store = size;  gotten_store = size;
1651    if (first_gotten_store == 0) first_gotten_store = size;
1652  if (show_malloc)  if (show_malloc)
1653    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1654  return block;  return block;
# Line 1039  free(block); Line 1683  free(block);
1683  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1684  *************************************************/  *************************************************/
1685    
1686  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1687    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1688    value, but the code is defensive.
1689    
1690    Arguments:
1691      re        compiled regex
1692      study     study data
1693      option    PCRE_INFO_xxx option
1694      ptr       where to put the data
1695    
1696    Returns:    0 when OK, < 0 on error
1697    */
1698    
1699  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static int
1700    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1701  {  {
1702  int rc;  int rc;
1703  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1704    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1705    #ifdef SUPPORT_PCRE16
1706      rc = pcre16_fullinfo(re, study, option, ptr);
1707    #else
1708      rc = PCRE_ERROR_BADMODE;
1709    #endif
1710    else
1711    #ifdef SUPPORT_PCRE8
1712      rc = pcre_fullinfo(re, study, option, ptr);
1713    #else
1714      rc = PCRE_ERROR_BADMODE;
1715    #endif
1716    
1717    if (rc < 0)
1718      {
1719      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1720        use_pcre16? "16" : "", option);
1721      if (rc == PCRE_ERROR_BADMODE)
1722        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1723          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1724      }
1725    
1726    return rc;
1727  }  }
1728    
1729    
1730    
1731  /*************************************************  /*************************************************
1732  *         Byte flipping function                 *  *             Swap byte functions                *
1733  *************************************************/  *************************************************/
1734    
1735    /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1736    value, respectively.
1737    
1738    Arguments:
1739      value        any number
1740    
1741    Returns:       the byte swapped value
1742    */
1743    
1744    static pcre_uint32
1745    swap_uint32(pcre_uint32 value)
1746    {
1747    return ((value & 0x000000ff) << 24) |
1748           ((value & 0x0000ff00) <<  8) |
1749           ((value & 0x00ff0000) >>  8) |
1750           (value >> 24);
1751    }
1752    
1753    static pcre_uint16
1754    swap_uint16(pcre_uint16 value)
1755    {
1756    return (value >> 8) | (value << 8);
1757    }
1758    
1759    
1760    
1761    /*************************************************
1762    *        Flip bytes in a compiled pattern        *
1763    *************************************************/
1764    
1765    /* This function is called if the 'F' option was present on a pattern that is
1766    to be written to a file. We flip the bytes of all the integer fields in the
1767    regex data block and the study block. In 16-bit mode this also flips relevant
1768    bytes in the pattern itself. This is to make it possible to test PCRE's
1769    ability to reload byte-flipped patterns, e.g. those compiled on a different
1770    architecture. */
1771    
1772    static void
1773    regexflip(pcre *ere, pcre_extra *extra)
1774    {
1775    real_pcre *re = (real_pcre *)ere;
1776    #ifdef SUPPORT_PCRE16
1777    int op;
1778    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1779    int length = re->name_count * re->name_entry_size;
1780    #ifdef SUPPORT_UTF
1781    BOOL utf = (re->options & PCRE_UTF16) != 0;
1782    BOOL utf16_char = FALSE;
1783    #endif /* SUPPORT_UTF */
1784    #endif /* SUPPORT_PCRE16 */
1785    
1786    /* Always flip the bytes in the main data block and study blocks. */
1787    
1788    re->magic_number = REVERSED_MAGIC_NUMBER;
1789    re->size = swap_uint32(re->size);
1790    re->options = swap_uint32(re->options);
1791    re->flags = swap_uint16(re->flags);
1792    re->top_bracket = swap_uint16(re->top_bracket);
1793    re->top_backref = swap_uint16(re->top_backref);
1794    re->first_char = swap_uint16(re->first_char);
1795    re->req_char = swap_uint16(re->req_char);
1796    re->name_table_offset = swap_uint16(re->name_table_offset);
1797    re->name_entry_size = swap_uint16(re->name_entry_size);
1798    re->name_count = swap_uint16(re->name_count);
1799    
1800    if (extra != NULL)
1801      {
1802      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1803      rsd->size = swap_uint32(rsd->size);
1804      rsd->flags = swap_uint32(rsd->flags);
1805      rsd->minlength = swap_uint32(rsd->minlength);
1806      }
1807    
1808    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1809    in the name table, if present, and then in the pattern itself. */
1810    
1811    #ifdef SUPPORT_PCRE16
1812    if (!use_pcre16) return;
1813    
1814    while(TRUE)
1815      {
1816      /* Swap previous characters. */
1817      while (length-- > 0)
1818        {
1819        *ptr = swap_uint16(*ptr);
1820        ptr++;
1821        }
1822    #ifdef SUPPORT_UTF
1823      if (utf16_char)
1824        {
1825        if ((ptr[-1] & 0xfc00) == 0xd800)
1826          {
1827          /* We know that there is only one extra character in UTF-16. */
1828          *ptr = swap_uint16(*ptr);
1829          ptr++;
1830          }
1831        }
1832      utf16_char = FALSE;
1833    #endif /* SUPPORT_UTF */
1834    
1835      /* Get next opcode. */
1836    
1837      length = 0;
1838      op = *ptr;
1839      *ptr++ = swap_uint16(op);
1840    
1841      switch (op)
1842        {
1843        case OP_END:
1844        return;
1845    
1846    #ifdef SUPPORT_UTF
1847        case OP_CHAR:
1848        case OP_CHARI:
1849        case OP_NOT:
1850        case OP_NOTI:
1851        case OP_STAR:
1852        case OP_MINSTAR:
1853        case OP_PLUS:
1854        case OP_MINPLUS:
1855        case OP_QUERY:
1856        case OP_MINQUERY:
1857        case OP_UPTO:
1858        case OP_MINUPTO:
1859        case OP_EXACT:
1860        case OP_POSSTAR:
1861        case OP_POSPLUS:
1862        case OP_POSQUERY:
1863        case OP_POSUPTO:
1864        case OP_STARI:
1865        case OP_MINSTARI:
1866        case OP_PLUSI:
1867        case OP_MINPLUSI:
1868        case OP_QUERYI:
1869        case OP_MINQUERYI:
1870        case OP_UPTOI:
1871        case OP_MINUPTOI:
1872        case OP_EXACTI:
1873        case OP_POSSTARI:
1874        case OP_POSPLUSI:
1875        case OP_POSQUERYI:
1876        case OP_POSUPTOI:
1877        case OP_NOTSTAR:
1878        case OP_NOTMINSTAR:
1879        case OP_NOTPLUS:
1880        case OP_NOTMINPLUS:
1881        case OP_NOTQUERY:
1882        case OP_NOTMINQUERY:
1883        case OP_NOTUPTO:
1884        case OP_NOTMINUPTO:
1885        case OP_NOTEXACT:
1886        case OP_NOTPOSSTAR:
1887        case OP_NOTPOSPLUS:
1888        case OP_NOTPOSQUERY:
1889        case OP_NOTPOSUPTO:
1890        case OP_NOTSTARI:
1891        case OP_NOTMINSTARI:
1892        case OP_NOTPLUSI:
1893        case OP_NOTMINPLUSI:
1894        case OP_NOTQUERYI:
1895        case OP_NOTMINQUERYI:
1896        case OP_NOTUPTOI:
1897        case OP_NOTMINUPTOI:
1898        case OP_NOTEXACTI:
1899        case OP_NOTPOSSTARI:
1900        case OP_NOTPOSPLUSI:
1901        case OP_NOTPOSQUERYI:
1902        case OP_NOTPOSUPTOI:
1903        if (utf) utf16_char = TRUE;
1904    #endif
1905        /* Fall through. */
1906    
1907        default:
1908        length = OP_lengths16[op] - 1;
1909        break;
1910    
1911        case OP_CLASS:
1912        case OP_NCLASS:
1913        /* Skip the character bit map. */
1914        ptr += 32/sizeof(pcre_uint16);
1915        length = 0;
1916        break;
1917    
1918        case OP_XCLASS:
1919        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1920        if (LINK_SIZE > 1)
1921          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1922            - (1 + LINK_SIZE + 1));
1923        else
1924          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1925    
1926        /* Reverse the size of the XCLASS instance. */
1927        *ptr = swap_uint16(*ptr);
1928        ptr++;
1929        if (LINK_SIZE > 1)
1930          {
1931          *ptr = swap_uint16(*ptr);
1932          ptr++;
1933          }
1934    
1935  static unsigned long int      op = *ptr;
1936  byteflip(unsigned long int value, int n)      *ptr = swap_uint16(op);
1937  {      ptr++;
1938  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);      if ((op & XCL_MAP) != 0)
1939  return ((value & 0x000000ff) << 24) |        {
1940         ((value & 0x0000ff00) <<  8) |        /* Skip the character bit map. */
1941         ((value & 0x00ff0000) >>  8) |        ptr += 32/sizeof(pcre_uint16);
1942         ((value & 0xff000000) >> 24);        length -= 32/sizeof(pcre_uint16);
1943          }
1944        break;
1945        }
1946      }
1947    /* Control should never reach here in 16 bit mode. */
1948    #endif /* SUPPORT_PCRE16 */
1949  }  }
1950    
1951    
1952    
   
1953  /*************************************************  /*************************************************
1954  *        Check match or recursion limit          *  *        Check match or recursion limit          *
1955  *************************************************/  *************************************************/
1956    
1957  static int  static int
1958  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1959    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
1960    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
1961  {  {
# Line 1087  for (;;) Line 1970  for (;;)
1970    {    {
1971    *limit = mid;    *limit = mid;
1972    
1973    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1974      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
1975    
1976    if (count == errnumber)    if (count == errnumber)
# Line 1132  Returns:    < 0, = 0, or > 0, according Line 2015  Returns:    < 0, = 0, or > 0, according
2015  */  */
2016    
2017  static int  static int
2018  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2019  {  {
2020  while (n--)  while (n--)
2021    {    {
# Line 1159  Returns:      appropriate PCRE_NEWLINE_x Line 2042  Returns:      appropriate PCRE_NEWLINE_x
2042  */  */
2043    
2044  static int  static int
2045  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2046  {  {
2047  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2048  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2049  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2050  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2051  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2052  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2053  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2054  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2055  return 0;  return 0;
2056  }  }
# Line 1189  printf("If input is a terminal, readline Line 2072  printf("If input is a terminal, readline
2072  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2073  #endif  #endif
2074  printf("\nOptions:\n");  printf("\nOptions:\n");
2075    #ifdef SUPPORT_PCRE16
2076    printf("  -16      use 16-bit interface\n");
2077    #endif
2078  printf("  -b       show compiled code (bytecode)\n");  printf("  -b       show compiled code (bytecode)\n");
2079  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2080    printf("  -C arg   show a specific compile-time option\n");
2081    printf("           and exit with its value. The arg can be:\n");
2082    printf("     linksize     internal link size [2, 3, 4]\n");
2083    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2084    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2085    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2086    printf("     ucp          Unicode Properties supported [0, 1]\n");
2087    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2088    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2089  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2090  #if !defined NODFA  #if !defined NODFA
2091  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
# Line 1226  options, followed by a set of test data, Line 2121  options, followed by a set of test data,
2121  int main(int argc, char **argv)  int main(int argc, char **argv)
2122  {  {
2123  FILE *infile = stdin;  FILE *infile = stdin;
2124    const char *version;
2125  int options = 0;  int options = 0;
2126  int study_options = 0;  int study_options = 0;
2127  int default_find_match_limit = FALSE;  int default_find_match_limit = FALSE;
# Line 1251  int stack_size; Line 2147  int stack_size;
2147    
2148  pcre_jit_stack *jit_stack = NULL;  pcre_jit_stack *jit_stack = NULL;
2149    
2150    /* These vectors store, end-to-end, a list of zero-terminated captured
2151  /* These vectors store, end-to-end, a list of captured substring names. Assume  substring names, each list itself being terminated by an empty name. Assume
2152  that 1024 is plenty long enough for the few names we'll be testing. */  that 1024 is plenty long enough for the few names we'll be testing. It is
2153    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2154  uschar copynames[1024];  for the actual memory, to ensure alignment. By defining these variables always
2155  uschar getnames[1024];  (whether or not 8-bit or 16-bit is supported), we avoid too much mess with
2156    #ifdefs in the code. */
2157  uschar *copynamesptr;  
2158  uschar *getnamesptr;  pcre_uint16 copynames[1024];
2159    pcre_uint16 getnames[1024];
2160  /* Get buffers from malloc() so that Electric Fence will check their misuse  
2161  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint16 *cn16ptr;
2162    pcre_uint16 *gn16ptr;
2163  buffer = (unsigned char *)malloc(buffer_size);  
2164  dbuffer = (unsigned char *)malloc(buffer_size);  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2165  pbuffer = (unsigned char *)malloc(buffer_size);  pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2166    pcre_uint8 *cn8ptr;
2167    pcre_uint8 *gn8ptr;
2168    
2169    /* Get buffers from malloc() so that valgrind will check their misuse when
2170    debugging. They grow automatically when very long lines are read. The 16-bit
2171    buffer (buffer16) is obtained only if needed. */
2172    
2173    buffer = (pcre_uint8 *)malloc(buffer_size);
2174    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2175    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2176    
2177  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2178    
# Line 1281  it set 0x8000, but then I was advised th Line 2187  it set 0x8000, but then I was advised th
2187  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2188  #endif  #endif
2189    
2190    /* Get the version number: both pcre_version() and pcre16_version() give the
2191    same answer. We just need to ensure that we call one that is available. */
2192    
2193    #ifdef SUPPORT_PCRE8
2194    version = pcre_version();
2195    #else
2196    version = pcre16_version();
2197    #endif
2198    
2199  /* Scan options */  /* Scan options */
2200    
2201  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2202    {    {
2203    unsigned char *endptr;    pcre_uint8 *endptr;
2204    
2205    if (strcmp(argv[op], "-m") == 0) showstore = 1;    if (strcmp(argv[op], "-m") == 0) showstore = 1;
2206    else if (strcmp(argv[op], "-s") == 0) force_study = 0;    else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2207    else if (strcmp(argv[op], "-s+") == 0)    else if (strcmp(argv[op], "-s+") == 0)
2208      {      {
2209      force_study = 1;      force_study = 1;
2210      force_study_options = PCRE_STUDY_JIT_COMPILE;      force_study_options = PCRE_STUDY_JIT_COMPILE;
2211      }      }
2212      else if (strcmp(argv[op], "-16") == 0)
2213        {
2214    #ifdef SUPPORT_PCRE16
2215        use_pcre16 = 1;
2216    #else
2217        printf("** This version of PCRE was built without 16-bit support\n");
2218        exit(1);
2219    #endif
2220        }
2221    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2222    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
2223    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
# Line 1303  while (argc > 1 && argv[op][0] == '-') Line 2227  while (argc > 1 && argv[op][0] == '-')
2227    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2228  #endif  #endif
2229    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2230        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2231          *endptr == 0))          *endptr == 0))
2232      {      {
2233      op++;      op++;
# Line 1313  while (argc > 1 && argv[op][0] == '-') Line 2237  while (argc > 1 && argv[op][0] == '-')
2237      {      {
2238      int both = argv[op][2] == 0;      int both = argv[op][2] == 0;
2239      int temp;      int temp;
2240      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2241                       *endptr == 0))                       *endptr == 0))
2242        {        {
2243        timeitm = temp;        timeitm = temp;
# Line 1324  while (argc > 1 && argv[op][0] == '-') Line 2248  while (argc > 1 && argv[op][0] == '-')
2248      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2249      }      }
2250    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2251        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2252          *endptr == 0))          *endptr == 0))
2253      {      {
2254  #if defined(_WIN32) || defined(WIN32) || defined(__minix)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
# Line 1352  while (argc > 1 && argv[op][0] == '-') Line 2276  while (argc > 1 && argv[op][0] == '-')
2276      {      {
2277      int rc;      int rc;
2278      unsigned long int lrc;      unsigned long int lrc;
2279      printf("PCRE version %s\n", pcre_version());  
2280        if (argc > 2)
2281          {
2282          if (strcmp(argv[op + 1], "linksize") == 0)
2283            {
2284            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2285            printf("%d\n", rc);
2286            yield = rc;
2287            goto EXIT;
2288            }
2289          if (strcmp(argv[op + 1], "pcre8") == 0)
2290            {
2291    #ifdef SUPPORT_PCRE8
2292            printf("1\n");
2293            yield = 1;
2294    #else
2295            printf("0\n");
2296            yield = 0;
2297    #endif
2298            goto EXIT;
2299            }
2300          if (strcmp(argv[op + 1], "pcre16") == 0)
2301            {
2302    #ifdef SUPPORT_PCRE16
2303            printf("1\n");
2304            yield = 1;
2305    #else
2306            printf("0\n");
2307            yield = 0;
2308    #endif
2309            goto EXIT;
2310            }
2311          if (strcmp(argv[op + 1], "utf") == 0)
2312            {
2313    #ifdef SUPPORT_PCRE8
2314            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2315            printf("%d\n", rc);
2316            yield = rc;
2317    #else
2318            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2319            printf("%d\n", rc);
2320            yield = rc;
2321    #endif
2322            goto EXIT;
2323            }
2324          if (strcmp(argv[op + 1], "ucp") == 0)
2325            {
2326            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2327            printf("%d\n", rc);
2328            yield = rc;
2329            goto EXIT;
2330            }
2331          if (strcmp(argv[op + 1], "jit") == 0)
2332            {
2333            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2334            printf("%d\n", rc);
2335            yield = rc;
2336            goto EXIT;
2337            }
2338          if (strcmp(argv[op + 1], "newline") == 0)
2339            {
2340            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2341            /* Note that these values are always the ASCII values, even
2342            in EBCDIC environments. CR is 13 and NL is 10. */
2343            printf("%s\n", (rc == 13)? "CR" :
2344              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2345              (rc == -2)? "ANYCRLF" :
2346              (rc == -1)? "ANY" : "???");
2347            goto EXIT;
2348            }
2349          printf("Unknown -C option: %s\n", argv[op + 1]);
2350          goto EXIT;
2351          }
2352    
2353        printf("PCRE version %s\n", version);
2354      printf("Compiled with\n");      printf("Compiled with\n");
2355    
2356    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2357    are set, either both UTFs are supported or both are not supported. */
2358    
2359    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2360        printf("  8-bit and 16-bit support\n");
2361        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2362        if (rc)
2363          printf("  UTF-8 and UTF-16 support\n");
2364        else
2365          printf("  No UTF-8 or UTF-16 support\n");
2366    #elif defined SUPPORT_PCRE8
2367        printf("  8-bit support only\n");
2368      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2369      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2370      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2371        printf("  16-bit support only\n");
2372        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2373        printf("  %sUTF-16 support\n", rc? "" : "No ");
2374    #endif
2375    
2376        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2377      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2378      (void)pcre_config(PCRE_CONFIG_JIT, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2379      if (rc)      if (rc)
2380        printf("  Just-in-time compiler support\n");        printf("  Just-in-time compiler support\n");
2381      else      else
2382        printf("  No just-in-time compiler support\n");        printf("  No just-in-time compiler support\n");
2383      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2384      /* Note that these values are always the ASCII values, even      /* Note that these values are always the ASCII values, even
2385      in EBCDIC environments. CR is 13 and NL is 10. */      in EBCDIC environments. CR is 13 and NL is 10. */
2386      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2387        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2388        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
2389        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2390      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2391      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2392                                       "all Unicode newlines");                                       "all Unicode newlines");
2393      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2394      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2395      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2396      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2397      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2398      printf("  Default match limit = %ld\n", lrc);      printf("  Default match limit = %ld\n", lrc);
2399      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2400      printf("  Default recursion depth limit = %ld\n", lrc);      printf("  Default recursion depth limit = %ld\n", lrc);
2401      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2402      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
2403      goto EXIT;      goto EXIT;
2404      }      }
# Line 1440  if (argc > 2) Line 2457  if (argc > 2)
2457    
2458  /* Set alternative malloc function */  /* Set alternative malloc function */
2459    
2460    #ifdef SUPPORT_PCRE8
2461  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2462  pcre_free = new_free;  pcre_free = new_free;
2463  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2464  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2465    #endif
2466    
2467    #ifdef SUPPORT_PCRE16
2468    pcre16_malloc = new_malloc;
2469    pcre16_free = new_free;
2470    pcre16_stack_malloc = stack_malloc;
2471    pcre16_stack_free = stack_free;
2472    #endif
2473    
2474  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2475    
2476  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2477    
2478  /* Main loop */  /* Main loop */
2479    
# Line 1462  while (!done) Line 2488  while (!done)
2488  #endif  #endif
2489    
2490    const char *error;    const char *error;
2491    unsigned char *markptr;    pcre_uint8 *markptr;
2492    unsigned char *p, *pp, *ppp;    pcre_uint8 *p, *pp, *ppp;
2493    unsigned char *to_file = NULL;    pcre_uint8 *to_file = NULL;
2494    const unsigned char *tables = NULL;    const pcre_uint8 *tables = NULL;
2495      pcre_uint32 get_options;
2496    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2497    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2498    int do_allcaps = 0;    int do_allcaps = 0;
# Line 1481  while (!done) Line 2508  while (!done)
2508    int do_flip = 0;    int do_flip = 0;
2509    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2510    
2511    use_utf8 = 0;    use_utf = 0;
2512    debug_lengths = 1;    debug_lengths = 1;
2513    
2514    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1496  while (!done) Line 2523  while (!done)
2523    
2524    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2525      {      {
2526      unsigned long int magic, get_options;      pcre_uint32 magic;
2527      uschar sbuf[8];      pcre_uint8 sbuf[8];
2528      FILE *f;      FILE *f;
2529    
2530      p++;      p++;
2531        if (*p == '!')
2532          {
2533          do_debug = TRUE;
2534          do_showinfo = TRUE;
2535          p++;
2536          }
2537    
2538      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2539      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2540      *pp = 0;      *pp = 0;
# Line 1512  while (!done) Line 2546  while (!done)
2546        continue;        continue;
2547        }        }
2548    
2549        first_gotten_store = 0;
2550      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2551    
2552      true_size =      true_size =
# Line 1520  while (!done) Line 2555  while (!done)
2555        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2556    
2557      re = (real_pcre *)new_malloc(true_size);      re = (real_pcre *)new_malloc(true_size);
2558      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2559    
2560      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2561    
2562      magic = ((real_pcre *)re)->magic_number;      magic = ((real_pcre *)re)->magic_number;
2563      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2564        {        {
2565        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2566          {          {
2567          do_flip = 1;          do_flip = 1;
2568          }          }
# Line 1539  while (!done) Line 2574  while (!done)
2574          }          }
2575        }        }
2576    
2577        /* We hide the byte-invert info for little and big endian tests. */
2578      fprintf(outfile, "Compiled pattern%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2579        do_flip? " (byte-inverted)" : "", p);        do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
   
     /* Need to know if UTF-8 for printing data strings */  
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
2580    
2581      /* Now see if there is any following study data. */      /* Now see if there is any following study data. */
2582    
# Line 1563  while (!done) Line 2594  while (!done)
2594          {          {
2595          FAIL_READ:          FAIL_READ:
2596          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2597          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL)
2598              {
2599              PCRE_FREE_STUDY(extra);
2600              }
2601          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
2602          fclose(f);          fclose(f);
2603          continue;          continue;
# Line 1573  while (!done) Line 2607  while (!done)
2607        }        }
2608      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2609    
2610        /* Flip the necessary bytes. */
2611        if (do_flip)
2612          {
2613          int rc;
2614          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2615          if (rc == PCRE_ERROR_BADMODE)
2616            {
2617            /* Simulate the result of the function call below. */
2618            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2619              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2620            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2621              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2622            continue;
2623            }
2624          }
2625    
2626        /* Need to know if UTF-8 for printing data strings. */
2627    
2628        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2629        use_utf = (get_options & PCRE_UTF8) != 0;
2630    
2631      fclose(f);      fclose(f);
2632      goto SHOW_INFO;      goto SHOW_INFO;
2633      }      }
2634    
2635    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2636    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2637    
2638    delimiter = *p++;    delimiter = *p++;
2639    
# Line 1629  while (!done) Line 2684  while (!done)
2684    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2685    
2686    options = 0;    options = 0;
2687      study_options = 0;
2688    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
2689    
2690    while (*pp != 0)    while (*pp != 0)
# Line 1665  while (!done) Line 2721  while (!done)
2721  #endif  #endif
2722    
2723        case 'S':        case 'S':
2724        if (do_study == 0)        if (do_study == 0)
2725          {          {
2726          do_study = 1;          do_study = 1;
2727          if (*pp == '+')          if (*pp == '+')
2728            {            {
2729            study_options |= PCRE_STUDY_JIT_COMPILE;            study_options |= PCRE_STUDY_JIT_COMPILE;
2730            pp++;            pp++;
2731            }            }
2732          }          }
2733        else        else
2734          {          {
2735          do_study = 0;          do_study = 0;
# Line 1686  while (!done) Line 2742  while (!done)
2742        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2743        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2744        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2745        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2746        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2747    
2748        case 'T':        case 'T':
# Line 1720  while (!done) Line 2776  while (!done)
2776          goto SKIP_DATA;          goto SKIP_DATA;
2777          }          }
2778        locale_set = 1;        locale_set = 1;
2779        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2780        pp = ppp;        pp = ppp;
2781        break;        break;
2782    
# Line 1733  while (!done) Line 2789  while (!done)
2789    
2790        case '<':        case '<':
2791          {          {
2792          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2793            {            {
2794            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
2795            pp += 3;            pp += 3;
# Line 1761  while (!done) Line 2817  while (!done)
2817    
2818    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2819    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2820    local character tables. */    local character tables. Neither does it have 16-bit support. */
2821    
2822  #if !defined NOPOSIX  #if !defined NOPOSIX
2823    if (posix || do_posix)    if (posix || do_posix)
# Line 1777  while (!done) Line 2833  while (!done)
2833      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2834      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2835    
2836        first_gotten_store = 0;
2837      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2838    
2839      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1796  while (!done) Line 2853  while (!done)
2853  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2854    
2855      {      {
2856      unsigned long int get_options;      /* In 16-bit mode, convert the input. */
2857    
2858    #ifdef SUPPORT_PCRE16
2859        if (use_pcre16)
2860          {
2861          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2862            {
2863            case -1:
2864            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2865              "converted to UTF-16\n");
2866            goto SKIP_DATA;
2867    
2868            case -2:
2869            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2870              "cannot be converted to UTF-16\n");
2871            goto SKIP_DATA;
2872    
2873            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2874            fprintf(outfile, "**Failed: character value greater than 0xffff "
2875              "cannot be converted to 16-bit in non-UTF mode\n");
2876            goto SKIP_DATA;
2877    
2878            default:
2879            break;
2880            }
2881          p = (pcre_uint8 *)buffer16;
2882          }
2883    #endif
2884    
2885        /* Compile many times when timing */
2886    
2887      if (timeit > 0)      if (timeit > 0)
2888        {        {
# Line 1805  while (!done) Line 2891  while (!done)
2891        clock_t start_time = clock();        clock_t start_time = clock();
2892        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2893          {          {
2894          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2895          if (re != NULL) free(re);          if (re != NULL) free(re);
2896          }          }
2897        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1814  while (!done) Line 2900  while (!done)
2900            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2901        }        }
2902    
2903      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2904        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2905    
2906      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2907      if non-interactive. */      if non-interactive. */
# Line 1845  while (!done) Line 2932  while (!done)
2932      within the regex; check for this so that we know how to process the data      within the regex; check for this so that we know how to process the data
2933      lines. */      lines. */
2934    
2935      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2936      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;        goto SKIP_DATA;
2937        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2938    
2939        /* Extract the size for possible writing before possibly flipping it,
2940        and remember the store that was got. */
2941    
2942        true_size = ((real_pcre *)re)->size;
2943        regex_gotten_store = first_gotten_store;
2944    
2945      /* Print information if required. There are now two info-returning      /* Output code size information if requested */
     functions. The old one has a limited interface and returns only limited  
     data. Check that it agrees with the newer one. */  
2946    
2947      if (log_store)      if (log_store)
2948        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
2949          (int)(gotten_store -          (int)(first_gotten_store -
2950                sizeof(real_pcre) -                sizeof(real_pcre) -
2951                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2952    
     /* Extract the size for possible writing before possibly flipping it,  
     and remember the store that was got. */  
   
     true_size = ((real_pcre *)re)->size;  
     regex_gotten_store = gotten_store;  
   
2953      /* If -s or /S was present, study the regex to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
2954      help with the matching, unless the pattern has the SS option, which      help with the matching, unless the pattern has the SS option, which
2955      suppresses the effect of /S (used for a few test patterns where studying is      suppresses the effect of /S (used for a few test patterns where studying is
# Line 1877  while (!done) Line 2963  while (!done)
2963          clock_t time_taken;          clock_t time_taken;
2964          clock_t start_time = clock();          clock_t start_time = clock();
2965          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
2966            extra = pcre_study(re, study_options | force_study_options, &error);            {
2967              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2968              }
2969          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2970          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL)
2971              {
2972              PCRE_FREE_STUDY(extra);
2973              }
2974          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
2975            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
2976              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2977          }          }
2978        extra = pcre_study(re, study_options | force_study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2979        if (error != NULL)        if (error != NULL)
2980          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
2981        else if (extra != NULL)        else if (extra != NULL)
2982            {
2983          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2984            if (log_store)
2985              {
2986              size_t jitsize;
2987              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
2988                  jitsize != 0)
2989                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
2990              }
2991            }
2992        }        }
2993    
2994      /* If /K was present, we set up for handling MARK data. */      /* If /K was present, we set up for handling MARK data. */
# Line 1904  while (!done) Line 3004  while (!done)
3004        extra->flags |= PCRE_EXTRA_MARK;        extra->flags |= PCRE_EXTRA_MARK;
3005        }        }
3006    
3007      /* If the 'F' option was present, we flip the bytes of all the integer      /* Extract and display information from the compiled data if required. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
   
     if (do_flip)  
       {  
       real_pcre *rre = (real_pcre *)re;  
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));  
         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));  
         }  
       }  
   
     /* Extract information from the compiled data if required */  
3008    
3009      SHOW_INFO:      SHOW_INFO:
3010    
3011      if (do_debug)      if (do_debug)
3012        {        {
3013        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3014        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3015        }        }
3016    
3017      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1956  while (!done) Line 3019  while (!done)
3019      if (do_showinfo)      if (do_showinfo)
3020        {        {
3021        unsigned long int all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3022        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3023          hascrorlf;          hascrorlf;
3024        int nameentrysize, namecount;        int nameentrysize, namecount;
3025        const uschar *nametable;        const pcre_uint8 *nametable;
3026    
3027        new_info(re, NULL, PCRE_INFO_SIZE, &size);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3028        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3029        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3030        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3031        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3032        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3033        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3034        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3035        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3036        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3037        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3038              != 0)
3039  #if !defined NOINFOCHECK          goto SKIP_DATA;
       old_count = pcre_info(re, &old_options, &old_first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3040    
3041        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3042          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 2009  while (!done) Line 3051  while (!done)
3051          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3052          while (namecount-- > 0)          while (namecount-- > 0)
3053            {            {
3054            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3055              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3056              GET2(nametable, 0));  #else
3057              int imm2_size = IMM2_SIZE;
3058    #endif
3059              int length = (int)STRLEN(nametable + imm2_size);
3060              fprintf(outfile, "  ");
3061              PCHARSV(nametable, imm2_size, length, outfile);
3062              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3063    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3064              fprintf(outfile, "%3d\n", use_pcre16?
3065                 (int)(((PCRE_SPTR16)nametable)[0])
3066                :((int)nametable[0] << 8) | (int)nametable[1]);
3067              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3068    #else
3069              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3070    #ifdef SUPPORT_PCRE8
3071            nametable += nameentrysize;            nametable += nameentrysize;
3072    #else
3073              nametable += nameentrysize * 2;
3074    #endif
3075    #endif
3076            }            }
3077          }          }
3078    
# Line 2020  while (!done) Line 3080  while (!done)
3080        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3081    
3082        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
3083        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3084    
3085        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3086          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 2036  while (!done) Line 3096  while (!done)
3096            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3097            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3098            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3099            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3100            ((get_options & PCRE_UCP) != 0)? " ucp" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3101            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3102            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3103            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3104    
# Line 2080  while (!done) Line 3140  while (!done)
3140          }          }
3141        else        else
3142          {          {
3143          int ch = first_char & 255;          const char *caseless =
3144          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3145            "" : " (caseless)";            "" : " (caseless)";
3146          if (PRINTHEX(ch))  
3147            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3148              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3149          else          else
3150            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3151              fprintf(outfile, "First char = ");
3152              pchar(first_char, outfile);
3153              fprintf(outfile, "%s\n", caseless);
3154              }
3155          }          }
3156    
3157        if (need_char < 0)        if (need_char < 0)
# Line 2095  while (!done) Line 3160  while (!done)
3160          }          }
3161        else        else
3162          {          {
3163          int ch = need_char & 255;          const char *caseless =
3164          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3165            "" : " (caseless)";            "" : " (caseless)";
3166          if (PRINTHEX(ch))  
3167            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3168              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3169          else          else
3170            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3171              fprintf(outfile, "Need char = ");
3172              pchar(need_char, outfile);
3173              fprintf(outfile, "%s\n", caseless);
3174              }
3175          }          }
3176    
3177        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
# Line 2118  while (!done) Line 3188  while (!done)
3188            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3189          else          else
3190            {            {
3191            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3192            int minlength;            int minlength;
3193    
3194            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3195            fprintf(outfile, "Subject length lower bound = %d\n", minlength);              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3196    
3197            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
           if (start_bits == NULL)  
             fprintf(outfile, "No set of starting bytes\n");  
           else  
3198              {              {
3199              int i;              if (start_bits == NULL)
3200              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3201              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3202                {                {
3203                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3204                  int c = 24;
3205                  fprintf(outfile, "Starting byte set: ");
3206                  for (i = 0; i < 256; i++)
3207                  {                  {
3208                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
                   {  
                   fprintf(outfile, "%c ", i);  
                   c += 2;  
                   }  
                 else  
3209                    {                    {
3210                    fprintf(outfile, "\\x%02x ", i);                    if (c > 75)
3211                    c += 5;                      {
3212                        fprintf(outfile, "\n  ");
3213                        c = 2;
3214                        }
3215                      if (PRINTOK(i) && i != ' ')
3216                        {
3217                        fprintf(outfile, "%c ", i);
3218                        c += 2;
3219                        }
3220                      else
3221                        {
3222                        fprintf(outfile, "\\x%02x ", i);
3223                        c += 5;
3224                        }
3225                    }                    }
3226                  }                  }
3227                  fprintf(outfile, "\n");
3228                }                }
             fprintf(outfile, "\n");  
3229              }              }
3230            }            }
3231    
3232          /* Show this only if the JIT was set by /S, not by -s. */          /* Show this only if the JIT was set by /S, not by -s. */
3233    
3234          if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)          if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3235            {            {
3236            int jit;            int jit;
3237            new_info(re, extra, PCRE_INFO_JIT, &jit);            if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3238            if (jit)              {
3239              fprintf(outfile, "JIT study was successful\n");              if (jit)
3240            else                fprintf(outfile, "JIT study was successful\n");
3241  #ifdef SUPPORT_JIT              else
3242              fprintf(outfile, "JIT study was not successful\n");  #ifdef SUPPORT_JIT
3243                  fprintf(outfile, "JIT study was not successful\n");
3244  #else  #else
3245              fprintf(outfile, "JIT support is not available in this version of PCRE\n");                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3246  #endif  #endif
3247            }              }
3248              }
3249          }          }
3250        }        }
3251    
# Line 2188  while (!done) Line 3262  while (!done)
3262          }          }
3263        else        else
3264          {          {
3265          uschar sbuf[8];          pcre_uint8 sbuf[8];
3266          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3267          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3268          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3269          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3270            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3271          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3272          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3273          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3274          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3275            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3276    
3277          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3278              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 2225  while (!done) Line 3300  while (!done)
3300          }          }
3301    
3302        new_free(re);        new_free(re);
3303        if (extra != NULL) pcre_free_study(extra);        if (extra != NULL)
3304            {
3305            PCRE_FREE_STUDY(extra);
3306            }
3307        if (locale_set)        if (locale_set)
3308          {          {
3309          new_free((void *)tables);          new_free((void *)tables);
# Line 2240  while (!done) Line 3318  while (!done)
3318    
3319    for (;;)    for (;;)
3320      {      {
3321      uschar *q;      pcre_uint8 *q;
3322      uschar *bptr;      pcre_uint8 *bptr;
3323      int *use_offsets = offsets;      int *use_offsets = offsets;
3324      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3325      int callout_data = 0;      int callout_data = 0;
# Line 2257  while (!done) Line 3335  while (!done)
3335      int g_notempty = 0;      int g_notempty = 0;
3336      int use_dfa = 0;      int use_dfa = 0;
3337    
     options = 0;  
   
3338      *copynames = 0;      *copynames = 0;
3339      *getnames = 0;      *getnames = 0;
3340    
3341      copynamesptr = copynames;      cn16ptr = copynames;
3342      getnamesptr = getnames;      gn16ptr = getnames;
3343        cn8ptr = copynames8;
3344        gn8ptr = getnames8;
3345    
3346      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3347      first_callout = 1;      first_callout = 1;
3348      last_callout_mark = NULL;      last_callout_mark = NULL;
3349      callout_extra = 0;      callout_extra = 0;
# Line 2273  while (!done) Line 3351  while (!done)
3351      callout_fail_count = 999999;      callout_fail_count = 999999;
3352      callout_fail_id = -1;      callout_fail_id = -1;
3353      show_malloc = 0;      show_malloc = 0;
3354        options = 0;
3355    
3356      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3357        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 2307  while (!done) Line 3386  while (!done)
3386        {        {
3387        int i = 0;        int i = 0;
3388        int n = 0;        int n = 0;
3389    
3390        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3391          In non-UTF mode, allow the value of the byte to fall through to later,
3392          where values greater than 127 are turned into UTF-8 when running in
3393          16-bit mode. */
3394    
3395          if (c != '\\')
3396            {
3397            if (use_utf)
3398              {
3399              *q++ = c;
3400              continue;
3401              }
3402            }
3403    
3404          /* Handle backslash escapes */
3405    
3406          else switch ((c = *p++))
3407          {          {
3408          case 'a': c =    7; break;          case 'a': c =    7; break;
3409          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 2324  while (!done) Line 3419  while (!done)
3419          c -= '0';          c -= '0';
3420          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3421            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3422          break;          break;
3423    
3424          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3425          if (*p == '{')          if (*p == '{')
3426            {            {
3427            unsigned char *pt = p;            pcre_uint8 *pt = p;
3428            c = 0;            c = 0;
3429            while (isxdigit(*(++pt)))  
3430              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3431              when isxdigit() is a macro that refers to its argument more than
3432              once. This is banned by the C Standard, but apparently happens in at
3433              least one MacOS environment. */
3434    
3435              for (pt++; isxdigit(*pt); pt++)
3436                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3437            if (*pt == '}')            if (*pt == '}')
3438              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             if (use_utf8)  
               {  
               utn = ord2utf8(c, buff8);  
               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
               c = buff8[ii];   /* Last byte */  
               }  
             else  
              {  
              if (c > 255)  
                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "  
                  "UTF-8 mode is not enabled.\n"  
                  "** Truncation will probably give the wrong result.\n", c);  
              }  
3439              p = pt + 1;              p = pt + 1;
3440              break;              break;
3441              }              }
3442            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3443            }            }
 #endif  
3444    
3445          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3446            allows UTF-8 characters to be constructed byte by byte, and also allows
3447            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3448            Otherwise, pass it down to later code so that it can be turned into
3449            UTF-8 when running in 16-bit mode. */
3450    
3451          c = 0;          c = 0;
3452          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3453            {            {
3454            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3455            p++;            p++;
3456            }            }
3457            if (use_utf)
3458              {
3459              *q++ = c;
3460              continue;
3461              }
3462          break;          break;
3463    
3464          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 2412  while (!done) Line 3491  while (!done)
3491            }            }
3492          else if (isalnum(*p))          else if (isalnum(*p))
3493            {            {
3494            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3495            }            }
3496          else if (*p == '+')          else if (*p == '+')
3497            {            {
# Line 2428  while (!done) Line 3500  while (!done)
3500            }            }
3501          else if (*p == '-')          else if (*p == '-')
3502            {            {
3503            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3504            p++;            p++;
3505            }            }
3506          else if (*p == '!')          else if (*p == '!')
# Line 2482  while (!done) Line 3554  while (!done)
3554            }            }
3555          else if (isalnum(*p))          else if (isalnum(*p))
3556            {            {
3557            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)getnamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);  
           getnamesptr = npp;  
3558            }            }
3559          continue;          continue;
3560    
3561          case 'J':          case 'J':
3562          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3563          if (extra != NULL          if (extra != NULL
3564              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3565              && extra->executable_jit != NULL)              && extra->executable_jit != NULL)
3566            {            {
3567            if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);            if (jit_stack != NULL) PCRE_JIT_STACK_FREE(jit_stack);
3568            jit_stack = pcre_jit_stack_alloc(1, n * 1024);            jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3569            pcre_assign_jit_stack(extra, jit_callback, jit_stack);            PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3570            }            }
3571          continue;          continue;
3572    
3573          case 'L':          case 'L':
# Line 2597  while (!done) Line 3662  while (!done)
3662            }            }
3663          continue;          continue;
3664          }          }
3665        *q++ = c;  
3666          /* We now have a character value in c that may be greater than 255. In
3667          16-bit mode, we always convert characters to UTF-8 so that values greater
3668          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3669          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3670          mode must have come from \x{...} or octal constructs because values from
3671          \x.. get this far only in non-UTF mode. */
3672    
3673          if (use_pcre16 || use_utf)
3674            {
3675            pcre_uint8 buff8[8];
3676            int ii, utn;
3677            utn = ord2utf8(c, buff8);
3678            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3679            }
3680          else
3681            {
3682            if (c > 255)
3683              {
3684              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3685                "and UTF-8 mode is not enabled.\n", c);
3686              fprintf(outfile, "** Truncation will probably give the wrong "
3687                "result.\n");
3688              }
3689            *q++ = c;
3690            }
3691        }        }
3692    
3693        /* Reached end of subject string */
3694    
3695      *q = 0;      *q = 0;
3696      len = (int)(q - dbuffer);      len = (int)(q - dbuffer);
3697    
# Line 2660  while (!done) Line 3753  while (!done)
3753            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3754              {              {
3755              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3756              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3757                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3758              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3759              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
3760                {                {
3761                fprintf(outfile, "%2d+ ", (int)i);                fprintf(outfile, "%2d+ ", (int)i);
3762                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3763                  outfile);                  outfile);
3764                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3765                }                }
# Line 2674  while (!done) Line 3767  while (!done)
3767            }            }
3768          }          }
3769        free(pmatch);        free(pmatch);
3770          goto NEXT_DATA;
3771        }        }
3772    
3773    #endif  /* !defined NOPOSIX */
3774    
3775      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3776    
3777      else  #ifdef SUPPORT_PCRE16
3778  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3779          {
3780          len = to16(TRUE, bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3781          switch(len)
3782            {
3783            case -1:
3784            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3785              "converted to UTF-16\n");
3786            goto NEXT_DATA;
3787    
3788            case -2:
3789            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3790              "cannot be converted to UTF-16\n");
3791            goto NEXT_DATA;
3792    
3793            case -3:
3794            fprintf(outfile, "**Failed: character value greater than 0xffff "
3795              "cannot be converted to 16-bit in non-UTF mode\n");
3796            goto NEXT_DATA;
3797    
3798            default:
3799            break;
3800            }
3801          bptr = (pcre_uint8 *)buffer16;
3802          }
3803    #endif
3804    
3805      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3806        {        {
# Line 2696  while (!done) Line 3817  while (!done)
3817            {            {
3818            int workspace[1000];            int workspace[1000];
3819            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3820              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,              {
3821                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3822                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets, workspace,
3823                  (sizeof(workspace)/sizeof(int)));
3824                }
3825            }            }
3826          else          else
3827  #endif  #endif
3828    
3829          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3830            count = pcre_exec(re, extra, (char *)bptr, len,            {
3831              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3832                (options | g_notempty), use_offsets, use_size_offsets);
3833              }
3834          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3835          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3836            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2728  while (!done) Line 3852  while (!done)
3852            extra->flags = 0;            extra->flags = 0;
3853            }            }
3854          else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;          else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3855    
3856          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
3857            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
3858            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
# Line 2751  while (!done) Line 3875  while (!done)
3875            }            }
3876          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3877          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3878          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3879            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3880          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3881          }          }
# Line 2763  while (!done) Line 3887  while (!done)
3887        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
3888          {          {
3889          int workspace[1000];          int workspace[1000];
3890          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3891            options | g_notempty, use_offsets, use_size_offsets, workspace,            (options | g_notempty), use_offsets, use_size_offsets, workspace,
3892            sizeof(workspace)/sizeof(int));            (sizeof(workspace)/sizeof(int)));
3893          if (count == 0)          if (count == 0)
3894            {            {
3895            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 2776  while (!done) Line 3900  while (!done)
3900    
3901        else        else
3902          {          {
3903          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3904            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3905          if (count == 0)          if (count == 0)
3906            {            {
3907            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
# Line 2790  while (!done) Line 3914  while (!done)
3914        if (count >= 0)        if (count >= 0)
3915          {          {
3916          int i, maxcount;          int i, maxcount;
3917            void *cnptr, *gnptr;
3918    
3919  #if !defined NODFA  #if !defined NODFA
3920          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
# Line 2816  while (!done) Line 3941  while (!done)
3941    
3942          if (do_allcaps)          if (do_allcaps)
3943            {            {
3944            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3945                goto SKIP_DATA;
3946            count++;   /* Allow for full match */            count++;   /* Allow for full match */
3947            if (count * 2 > use_size_offsets) count = use_size_offsets/2;            if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3948            }            }
# Line 2838  while (!done) Line 3964  while (!done)
3964            else            else
3965              {              {
3966              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
3967              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr, use_offsets[i],
3968                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
3969              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3970              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
3971                {                {
3972                fprintf(outfile, "%2d+ ", i/2);                fprintf(outfile, "%2d+ ", i/2);
3973                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
3974                  outfile);                  outfile);
3975                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3976                }                }
3977              }              }
3978            }            }
3979    
3980          if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);          if (markptr != NULL)
3981              {
3982              fprintf(outfile, "MK: ");
3983              PCHARSV(markptr, 0, -1, outfile);
3984              fprintf(outfile, "\n");
3985              }
3986    
3987          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
3988            {            {
3989            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
3990              {              {
3991                int rc;
3992              char copybuffer[256];              char copybuffer[256];
3993              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
3994                i, copybuffer, sizeof(copybuffer));                copybuffer, sizeof(copybuffer));
3995              if (rc < 0)              if (rc < 0)
3996                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3997              else              else
3998                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);                {
3999                  fprintf(outfile, "%2dC ", i);
4000                  PCHARSV(copybuffer, 0, rc, outfile);
4001                  fprintf(outfile, " (%d)\n", rc);
4002                  }
4003              }              }
4004            }            }
4005    
4006          for (copynamesptr = copynames;          cnptr = copynames;
4007               *copynamesptr != 0;          for (;;)
              copynamesptr += (int)strlen((char*)copynamesptr) + 1)  
4008            {            {
4009              int rc;
4010            char copybuffer[256];            char copybuffer[256];
4011            int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,  
4012              count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));            if (use_pcre16)
4013                {
4014                if (*(pcre_uint16 *)cnptr == 0) break;
4015                }
4016              else
4017                {
4018                if (*(pcre_uint8 *)cnptr == 0) break;
4019                }
4020    
4021              PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4022                cnptr, copybuffer, sizeof(copybuffer));
4023    
4024            if (rc < 0)            if (rc < 0)
4025              fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);              {
4026                fprintf(outfile, "copy substring ");
4027                PCHARSV(cnptr, 0, -1, outfile);
4028                fprintf(outfile, " failed %d\n", rc);
4029                }
4030            else            else
4031              fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);              {
4032                fprintf(outfile, "  C ");
4033                PCHARSV(copybuffer, 0, rc, outfile);
4034                fprintf(outfile, " (%d) ", rc);
4035                PCHARSV(cnptr, 0, -1, outfile);
4036                putc('\n', outfile);
4037                }
4038    
4039              cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4040            }            }
4041    
4042          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4043            {            {
4044            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
4045              {              {
4046                int rc;
4047              const char *substring;              const char *substring;
4048              int rc = pcre_get_substring((char *)bptr, use_offsets, count,              PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
               i, &substring);  
4049              if (rc < 0)              if (rc < 0)
4050                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
4051              else              else
4052                {                {
4053                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG ", i);
4054                pcre_free_substring(substring);                PCHARSV(substring, 0, rc, outfile);
4055                  fprintf(outfile, " (%d)\n", rc);
4056                  PCRE_FREE_SUBSTRING(substring);
4057                }                }
4058              }              }
4059            }            }
4060    
4061          for (getnamesptr = getnames;          gnptr = getnames;
4062               *getnamesptr != 0;          for (;;)
              getnamesptr += (int)strlen((char*)getnamesptr) + 1)  
4063            {            {
4064              int rc;
4065            const char *substring;            const char *substring;
4066            int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,  
4067              count, (char *)getnamesptr, &substring);            if (use_pcre16)
4068                {
4069                if (*(pcre_uint16 *)gnptr == 0) break;
4070                }
4071              else
4072                {
4073                if (*(pcre_uint8 *)gnptr == 0) break;
4074                }
4075    
4076              PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4077                gnptr, &substring);
4078            if (rc < 0)            if (rc < 0)
4079              fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);              {
4080                fprintf(outfile, "get substring ");
4081                PCHARSV(gnptr, 0, -1, outfile);
4082                fprintf(outfile, " failed %d\n", rc);
4083                }
4084            else            else
4085              {              {
4086              fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);              fprintf(outfile, "  G ");
4087              pcre_free_substring(substring);              PCHARSV(substring, 0, rc, outfile);
4088                fprintf(outfile, " (%d) ", rc);
4089                PCHARSV(gnptr, 0, -1, outfile);
4090                PCRE_FREE_SUBSTRING(substring);
4091                putc('\n', outfile);
4092              }              }
4093    
4094              gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4095            }            }
4096    
4097          if (getlist)          if (getlist)
4098            {            {
4099              int rc;
4100            const char **stringlist;            const char **stringlist;
4101            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,            PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
             &stringlist);  
4102            if (rc < 0)            if (rc < 0)
4103              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
4104            else            else
4105              {              {
4106              for (i = 0; i < count; i++)              for (i = 0; i < count; i++)
4107                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                {
4108                  fprintf(outfile, "%2dL ", i);
4109                  PCHARSV(stringlist[i], 0, -1, outfile);
4110                  putc('\n', outfile);
4111                  }
4112              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
4113                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
4114              pcre_free_substring_list(stringlist);              PCRE_FREE_SUBSTRING_LIST(stringlist);
4115              }              }
4116            }            }
4117          }          }
# Line 2936  while (!done) Line 4121  while (!done)
4121        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
4122          {          {
4123          if (markptr == NULL) fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
4124            else fprintf(outfile, "Partial match, mark=%s", markptr);          else
4125              {
4126              fprintf(outfile, "Partial match, mark=");
4127              PCHARSV(markptr, 0, -1, outfile);
4128              }
4129          if (use_size_offsets > 1)          if (use_size_offsets > 1)
4130            {            {
4131            fprintf(outfile, ": ");            fprintf(outfile, ": ");
4132            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],            PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4133              outfile);              outfile);
4134            }            }
4135          fprintf(outfile, "\n");          fprintf(outfile, "\n");
# Line 2957  while (!done) Line 4146  while (!done)
4146        terminated by CRLF, an advance of one character just passes the \r,        terminated by CRLF, an advance of one character just passes the \r,
4147        whereas we should prefer the longer newline sequence, as does the code in        whereas we should prefer the longer newline sequence, as does the code in
4148        pcre_exec(). Fudge the offset value to achieve this. We check for a        pcre_exec(). Fudge the offset value to achieve this. We check for a
4149        newline setting in the pattern; if none was set, use pcre_config() to        newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4150        find the default.        find the default.
4151    
4152        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
# Line 2973  while (!done) Line 4162  while (!done)
4162            if ((obits & PCRE_NEWLINE_BITS) == 0)            if ((obits & PCRE_NEWLINE_BITS) == 0)
4163              {              {
4164              int d;              int d;
4165              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4166              /* Note that these values are always the ASCII ones, even in              /* Note that these values are always the ASCII ones, even in
4167              EBCDIC environments. CR = 13, NL = 10. */              EBCDIC environments. CR = 13, NL = 10. */
4168              obits = (d == 13)? PCRE_NEWLINE_CR :              obits = (d == 13)? PCRE_NEWLINE_CR :
# Line 2987  while (!done) Line 4176  while (!done)
4176                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4177                &&                &&
4178                start_offset < len - 1 &&                start_offset < len - 1 &&
4179                bptr[start_offset] == '\r' &&  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4180                bptr[start_offset+1] == '\n')                (use_pcre16?
4181                       ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4182                    && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4183                  :
4184                       bptr[start_offset] == '\r'
4185                    && bptr[start_offset + 1] == '\n')
4186    #elif defined SUPPORT_PCRE16
4187                     ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4188                  && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4189    #else
4190                     bptr[start_offset] == '\r'
4191                  && bptr[start_offset + 1] == '\n'
4192    #endif
4193                  )
4194              onechar++;              onechar++;
4195            else if (use_utf8)            else if (use_utf)
4196              {              {
4197              while (start_offset + onechar < len)              while (start_offset + onechar < len)
4198                {                {
# Line 3007  while (!done) Line 4209  while (!done)
4209              case PCRE_ERROR_NOMATCH:              case PCRE_ERROR_NOMATCH:
4210              if (gmatched == 0)              if (gmatched == 0)
4211                {                {
4212                if (markptr == NULL) fprintf(outfile, "No match\n");                if (markptr == NULL)
4213                  else fprintf(outfile, "No match, mark = %s\n", markptr);                  {
4214                    fprintf(outfile, "No match\n");
4215                    }
4216                  else
4217                    {
4218                    fprintf(outfile, "No match, mark = ");
4219                    PCHARSV(markptr, 0, -1, outfile);
4220                    putc('\n', outfile);
4221                    }
4222                }                }
4223              break;              break;
4224    
4225              case PCRE_ERROR_BADUTF8:              case PCRE_ERROR_BADUTF8:
4226              case PCRE_ERROR_SHORTUTF8:              case PCRE_ERROR_SHORTUTF8:
4227              fprintf(outfile, "Error %d (%s UTF-8 string)", count,              fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4228                (count == PCRE_ERROR_BADUTF8)? "bad" : "short");                (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4229                  use_pcre16? "16" : "8");
4230              if (use_size_offsets >= 2)              if (use_size_offsets >= 2)
4231                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4232                  use_offsets[1]);                  use_offsets[1]);
4233              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4234              break;              break;
4235    
4236                case PCRE_ERROR_BADUTF8_OFFSET:
4237                fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4238                  use_pcre16? "16" : "8");
4239                break;
4240    
4241              default:              default:
4242              if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))              if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
4243                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
# Line 3061  while (!done) Line 4277  while (!done)
4277    
4278        else        else
4279          {          {
4280          bptr += use_offsets[1];          bptr += use_offsets[1] * CHAR_SIZE;
4281          len -= use_offsets[1];          len -= use_offsets[1];
4282          }          }
4283        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
# Line 3076  while (!done) Line 4292  while (!done)
4292  #endif  #endif
4293    
4294    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
4295    if (extra != NULL) pcre_free_study(extra);    if (extra != NULL)
4296        {
4297        PCRE_FREE_STUDY(extra);
4298        }
4299    if (locale_set)    if (locale_set)
4300      {      {
4301      new_free((void *)tables);      new_free((void *)tables);
4302      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
4303      locale_set = 0;      locale_set = 0;
4304      }      }
4305    if (jit_stack != NULL)    if (jit_stack != NULL)
4306      {      {
4307      pcre_jit_stack_free(jit_stack);      PCRE_JIT_STACK_FREE(jit_stack);
4308      jit_stack = NULL;      jit_stack = NULL;
4309      }      }
4310    }    }
4311    
4312  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
# Line 3102  free(dbuffer); Line 4321  free(dbuffer);
4321  free(pbuffer);  free(pbuffer);
4322  free(offsets);  free(offsets);
4323    
4324    #ifdef SUPPORT_PCRE16
4325    if (buffer16 != NULL) free(buffer16);
4326    #endif
4327    
4328  return yield;  return yield;
4329  }  }
4330    

Legend:
Removed from v.689  
changed lines
  Added in v.841

  ViewVC Help
Powered by ViewVC 1.1.5